Permalink
Browse files

Updates

  • Loading branch information...
1 parent e43d659 commit bbc2a04f054fba454703345ecbe00599d4266eef @dagwieers committed Mar 14, 2006
Showing with 860 additions and 32 deletions.
  1. +10 −8 README
  2. +5 −2 TODO
  3. +261 −0 patches/aerrate-changes-dag-20060202.diff
  4. +232 −0 patches/aerrate-changes-dag-20060210.diff
  5. +330 −0 patches/aerrate-changes-dag-20060314.diff
  6. +20 −20 sarahdb.py
  7. +2 −2 sarahinfo.py
View
18 README
@@ -7,26 +7,27 @@ Tim Rupp. Available from:
svn co http://caphrim.net/svn/fermi/aerrate/trunk aerrate
-And issuing the following script:
+Then apply the latest patch from patches/:
+
+ patch -p0 < patches/aerrate-changes-dag-TIMESTAMP.diff
+
+And issue the following command to scrape the RHN website for the most up-to-date
+advisories:
cd aerrate
- for release in rh{el4,el3,21}{as,es,ws} rh-desktop-{4,3} rh21aw rhel{4,3}-extras rhel3{cluster,devsuite} rhshas; do
- echo "== $release =="
- python ./aerrate.py -r --source=site --type=all --release=$release
- done
+ ./aerrate.py -r --source=site --type=all --release=enterprise
cd -
This will copy all errata as XML files into ./aerrata/advisories/
-
USING SARAH
^^^^^^^^^^^
sarah currently expects the advisories to be available from ./advisories/. So
making a symlink from aerrate/advisories to ./advisories is probably easiest.
ln -sf aerrate/advisories .
-Then to create an sqlite database out of these XML files, run:
+Then create an sqlite database out of these XML files, by doing:
./sarahdb.py
@@ -37,7 +38,8 @@ To create some statistics from this database, use:
The sarahinfo utility currently shows how to query the database. Not all
information is currently available in the XML files. Red Hat will be releasing
-these XML files in the future with much more info.
+these XML files in the future with more information we can get out of the
+RHN website.
I also added sarahsql to allow to query the database on the commandline, you
can do queries in bash, like:
View
7 TODO
@@ -1,7 +1,10 @@
aerrate.py: scrapes rhn advisory information
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-+ Allow to list a number of releases (eg. --release=rhel4as,rhel3as,rhel2.1as)
-+ Make aerrate smart about advisories already scraped (recently)
++ Fix file-element syntax to match new RHN xml output
++ Fix reference-element syntax to match new RHN xml output
++ Add bugzilla references and bugzilla reference synopsis information
++ Add references to RHBA and RHEA as well
++ Fix the problem with RHBA-2004-470.xml (aerrate freezes)
+ Use timestamp and HTTP HEAD requests to check for updates
+ Parallelize the download process (use HTTP Pipelining and use
eg. 4 connections)
@@ -0,0 +1,261 @@
+Index: parsers/security.py
+===================================================================
+--- parsers/security.py (revision 16)
++++ parsers/security.py (working copy)
+@@ -24,6 +24,7 @@
+ errata_p.__init__(self)
+ self.odir = "advisories/"
+ self.ofile = "security.xml"
++ self.synopsis = []
+ self.detail = []
+ self.cve_counter = 0
+ self.errata_counter = 0
+@@ -41,7 +42,7 @@
+ if os.environ['http_proxy'] == '':
+ self.proxy = None
+ else:
+- self.proxy = { 'http' : os.environ['http_proxy'] }
++ self.proxy = { 'http' : os.environ['http_proxy'], 'https' : os.environ['http_proxy'] }
+ except:
+ self.proxy = None
+
+@@ -93,6 +94,13 @@
+
+ self.detail = result
+
++ # Get RHSA title (contains severity and synopsis)
++ result = re.search('<h1>(.+?)</h1>', page, re.I | re.M)
++ if result:
++ self.synopsis = result.group(1)
++ else:
++ self.synopsis = 'Unknown: Unknown'
++
+ def update_database(self, link):
+ """
+ Updates the output XML file to reflect the new contents
+@@ -111,15 +119,20 @@
+ # 6 - RPMs Required
+ # 7 - References
+ # 8 - Requirements (not used)
+- self.get_rhsa_detail(link)
++ try:
++ self.get_rhsa_detail(link)
++ except urlgrabber.grabber.URLGrabError:
++ print 'Error grabbing link', link
++ return
+
+ # Hack to get past RPMs that outdate other rpms.
+ # FIXME: Check to see what RPM is outdated and update XML file
+ # as necessary
+- for detail in self.detail:
+- if detail.find("File outdated") > 0:
+- #print "Encountered outdated RPM"
+- return
++# if not __main__.all_advisories:
++# for detail in self.detail:
++# if detail.find("File outdated") > 0:
++# #print "Encountered outdated RPM"
++# return
+
+ self.advisory_url = link
+
+@@ -162,7 +175,7 @@
+ self.parse_rights(w)
+ self.parse_type(w)
+
+- self.parse_synopsis(w, self.detail[1])
++ self.parse_synopsis(w, self.synopsis)
+ self.parse_issue_date(w, self.strip_html(self.detail[0]))
+ self.parse_updated_on(w, self.strip_html(self.detail[0]))
+
+@@ -268,30 +281,16 @@
+ """
+ data = self.tags_to_space(data)
+ data = self.strip_html(data)
+- block = data.split("\n")
++ line = data.strip().split(':')
+
+- # Red hat doesnt separate out the severity information
+- # in their webpages like they do in their email archives.
+- # Therefore I'm looking for the severity string to
+- # determine the severity level.
+- for line in block:
+- if line.find("moderate security") > -1:
+- w.element("severity", "moderate")
+- break
+- elif line.find("important security") > -1:
+- w.element("severity", "important")
+- break
+- elif line.find("critical security") > -1:
+- w.element("severity", "critical")
+- break
+- elif line.find("low security") > -1:
+- w.element("severity", "low")
+- break
++ severity = line[0].strip().lower()
++ synopsis = ':'.join(line[1:]).strip()
++
++ if severity in ('low', 'moderate', 'important', 'critical'):
++ w.element('severity', severity)
+ else:
+- w.element("severity", "unknown")
++ w.element('severity', 'unknown')
+
+- synopsis = block[0].strip()
+-
+ w.element("synopsis", synopsis, lang="en_US")
+
+ def tags_to_space(self, item):
+@@ -464,7 +463,7 @@
+ # in the script to differentiate between releases
+ for i in block:
+ item = i.strip()
+- if item == "":
++ if not item:
+ continue
+ else:
+ if self.at_arch(item[0:-1]):
+@@ -508,9 +507,8 @@
+ self.parse_srpm_arch(w, block, item)
+ elif self.at_arch(item):
+ self.parse_arch(w, block, item)
+- elif item == "":
+- item = block.pop().replace("&#160;", '').strip()
+- continue
++ elif not item:
++ pass
+ else:
+ block.append(item)
+ break
+@@ -561,11 +559,11 @@
+ error = "new_arch"
+ break
+
+- if filename == "":
++ elif not filename:
+ error = "new_arch"
+ break
+
+- if not self.linked:
++ elif not self.linked:
+ junk = block.pop()
+
+ checksum = block.pop().replace("&#160;", '').strip()
+@@ -594,18 +592,21 @@
+ error = "header"
+ break
+
+- if self.at_arch(filename):
++ elif self.at_arch(filename):
+ error = "new_arch"
+ break
+
+- if filename == "":
++ elif not filename:
+ error = "new_arch"
+ break
+
+- if not self.linked:
++ elif not self.linked:
+ junk = block.pop()
+
+- checksum = block.pop().replace("&#160;", '').strip()
++ try:
++ checksum = block.pop().replace("&#160;", '').strip()
++ except IndexError:
++ break
+
+ # Hack to make RH webpages that list IA-32 stuff
+ # equal the data that is actually listed in the
+@@ -618,7 +619,7 @@
+ w.element("sum", checksum, type="md5")
+ w.end("file")
+
+- if error == "header" or error == "new_arch":
++ if error in ('header', 'new_arch'):
+ block.append(filename)
+ return
+
+Index: scrapers/site.py
+===================================================================
+--- scrapers/site.py (revision 16)
++++ scrapers/site.py (working copy)
+@@ -63,7 +63,11 @@
+ self.releases = {}
+ p = re.compile('\<ul\>')
+
+- page = urlgrabber.urlread(self.release_list+"/errata", proxies=self.proxy)
++ try:
++ page = urlgrabber.urlread(self.release_list+"/errata", proxies=self.proxy)
++ except urlgrabber.grabber.URLGrabError:
++ print 'Error grabbing list of all releases from', self.release_list+"/errata"
++ return
+
+ result = p.split(page)
+
+Index: aerrate.py
+===================================================================
+--- aerrate.py (revision 16)
++++ aerrate.py (working copy)
+@@ -30,8 +30,11 @@
+ enhancements
+
+ Argument list
+- --source Source to read from
++ --all Download al advisories (do not skip outdated advisories)
+
++ --source Source to read from (either archive, feed or site)
++ *Defaults to site*
++
+ --type The type of errata list to read from
+
+ --release Parse errata for specified release
+@@ -42,11 +45,13 @@
+ source = 0
+ type = 0
+ release = 'none'
++ all_advisories = 0
+ print_releases = 0
+
+ # Try to parse any command line arguments
+ try:
+ opts, args = getopt.getopt(sys.argv[1:], "hstru", [ "help",
++ "all",
+ "source=",
+ "type=",
+ "printreleases",
+@@ -63,24 +68,27 @@
+ if o in ("-h", "--help"):
+ usage()
+ sys.exit()
+- if o in ("-s", "--source"):
++ elif o in ("-s", "--source"):
+ source = a
+- if o in ("-t", "--type"):
++ elif o in ("-t", "--type"):
+ type = a
+- if o in ("-p", "--printreleases"):
++ elif o in ("-a", "--all"):
++ all_advisories = 1
++ elif o in ("-p", "--printreleases"):
+ print_releases = 1
+- if o in ("-r", "--release"):
++ elif o in ("-r", "--release"):
+ release = a
++ else:
++ print 'Option %s not understood.' %o
++ sys.exit(1)
+
+ # Print releases is one argument in particular
+ # that doesnt require the source argument.
+ # So if that argument has been sent, skip
+ # over this small argument checking block
+ if not print_releases:
+- # The source argument is required always
+ if not source:
+- usage()
+- sys.exit(0)
++ source = "site"
+ # The site argument requires a type
+ # argument and a release arg be sent too
+ if source == "site":
Oops, something went wrong.

0 comments on commit bbc2a04

Please sign in to comment.