Skip to content

Commit

Permalink
Updates
Browse files Browse the repository at this point in the history
  • Loading branch information
dagwieers committed Mar 14, 2006
1 parent e43d659 commit bbc2a04
Show file tree
Hide file tree
Showing 7 changed files with 860 additions and 32 deletions.
18 changes: 10 additions & 8 deletions README
Original file line number Diff line number Diff line change
Expand Up @@ -7,26 +7,27 @@ Tim Rupp. Available from:

svn co http://caphrim.net/svn/fermi/aerrate/trunk aerrate

And issuing the following script:
Then apply the latest patch from patches/:

patch -p0 < patches/aerrate-changes-dag-TIMESTAMP.diff

And issue the following command to scrape the RHN website for the most up-to-date
advisories:

cd aerrate
for release in rh{el4,el3,21}{as,es,ws} rh-desktop-{4,3} rh21aw rhel{4,3}-extras rhel3{cluster,devsuite} rhshas; do
echo "== $release =="
python ./aerrate.py -r --source=site --type=all --release=$release
done
./aerrate.py -r --source=site --type=all --release=enterprise
cd -

This will copy all errata as XML files into ./aerrata/advisories/


USING SARAH
^^^^^^^^^^^
sarah currently expects the advisories to be available from ./advisories/. So
making a symlink from aerrate/advisories to ./advisories is probably easiest.

ln -sf aerrate/advisories .

Then to create an sqlite database out of these XML files, run:
Then create an sqlite database out of these XML files, by doing:

./sarahdb.py

Expand All @@ -37,7 +38,8 @@ To create some statistics from this database, use:

The sarahinfo utility currently shows how to query the database. Not all
information is currently available in the XML files. Red Hat will be releasing
these XML files in the future with much more info.
these XML files in the future with more information we can get out of the
RHN website.

I also added sarahsql to allow to query the database on the commandline, you
can do queries in bash, like:
Expand Down
7 changes: 5 additions & 2 deletions TODO
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
aerrate.py: scrapes rhn advisory information
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ Allow to list a number of releases (eg. --release=rhel4as,rhel3as,rhel2.1as)
+ Make aerrate smart about advisories already scraped (recently)
+ Fix file-element syntax to match new RHN xml output
+ Fix reference-element syntax to match new RHN xml output
+ Add bugzilla references and bugzilla reference synopsis information
+ Add references to RHBA and RHEA as well
+ Fix the problem with RHBA-2004-470.xml (aerrate freezes)
+ Use timestamp and HTTP HEAD requests to check for updates
+ Parallelize the download process (use HTTP Pipelining and use
eg. 4 connections)
Expand Down
261 changes: 261 additions & 0 deletions patches/aerrate-changes-dag-20060202.diff
Original file line number Diff line number Diff line change
@@ -0,0 +1,261 @@
Index: parsers/security.py
===================================================================
--- parsers/security.py (revision 16)
+++ parsers/security.py (working copy)
@@ -24,6 +24,7 @@
errata_p.__init__(self)
self.odir = "advisories/"
self.ofile = "security.xml"
+ self.synopsis = []
self.detail = []
self.cve_counter = 0
self.errata_counter = 0
@@ -41,7 +42,7 @@
if os.environ['http_proxy'] == '':
self.proxy = None
else:
- self.proxy = { 'http' : os.environ['http_proxy'] }
+ self.proxy = { 'http' : os.environ['http_proxy'], 'https' : os.environ['http_proxy'] }
except:
self.proxy = None

@@ -93,6 +94,13 @@

self.detail = result

+ # Get RHSA title (contains severity and synopsis)
+ result = re.search('<h1>(.+?)</h1>', page, re.I | re.M)
+ if result:
+ self.synopsis = result.group(1)
+ else:
+ self.synopsis = 'Unknown: Unknown'
+
def update_database(self, link):
"""
Updates the output XML file to reflect the new contents
@@ -111,15 +119,20 @@
# 6 - RPMs Required
# 7 - References
# 8 - Requirements (not used)
- self.get_rhsa_detail(link)
+ try:
+ self.get_rhsa_detail(link)
+ except urlgrabber.grabber.URLGrabError:
+ print 'Error grabbing link', link
+ return

# Hack to get past RPMs that outdate other rpms.
# FIXME: Check to see what RPM is outdated and update XML file
# as necessary
- for detail in self.detail:
- if detail.find("File outdated") > 0:
- #print "Encountered outdated RPM"
- return
+# if not __main__.all_advisories:
+# for detail in self.detail:
+# if detail.find("File outdated") > 0:
+# #print "Encountered outdated RPM"
+# return

self.advisory_url = link

@@ -162,7 +175,7 @@
self.parse_rights(w)
self.parse_type(w)

- self.parse_synopsis(w, self.detail[1])
+ self.parse_synopsis(w, self.synopsis)
self.parse_issue_date(w, self.strip_html(self.detail[0]))
self.parse_updated_on(w, self.strip_html(self.detail[0]))

@@ -268,30 +281,16 @@
"""
data = self.tags_to_space(data)
data = self.strip_html(data)
- block = data.split("\n")
+ line = data.strip().split(':')

- # Red hat doesnt separate out the severity information
- # in their webpages like they do in their email archives.
- # Therefore I'm looking for the severity string to
- # determine the severity level.
- for line in block:
- if line.find("moderate security") > -1:
- w.element("severity", "moderate")
- break
- elif line.find("important security") > -1:
- w.element("severity", "important")
- break
- elif line.find("critical security") > -1:
- w.element("severity", "critical")
- break
- elif line.find("low security") > -1:
- w.element("severity", "low")
- break
+ severity = line[0].strip().lower()
+ synopsis = ':'.join(line[1:]).strip()
+
+ if severity in ('low', 'moderate', 'important', 'critical'):
+ w.element('severity', severity)
else:
- w.element("severity", "unknown")
+ w.element('severity', 'unknown')

- synopsis = block[0].strip()
-
w.element("synopsis", synopsis, lang="en_US")

def tags_to_space(self, item):
@@ -464,7 +463,7 @@
# in the script to differentiate between releases
for i in block:
item = i.strip()
- if item == "":
+ if not item:
continue
else:
if self.at_arch(item[0:-1]):
@@ -508,9 +507,8 @@
self.parse_srpm_arch(w, block, item)
elif self.at_arch(item):
self.parse_arch(w, block, item)
- elif item == "":
- item = block.pop().replace("&#160;", '').strip()
- continue
+ elif not item:
+ pass
else:
block.append(item)
break
@@ -561,11 +559,11 @@
error = "new_arch"
break

- if filename == "":
+ elif not filename:
error = "new_arch"
break

- if not self.linked:
+ elif not self.linked:
junk = block.pop()

checksum = block.pop().replace("&#160;", '').strip()
@@ -594,18 +592,21 @@
error = "header"
break

- if self.at_arch(filename):
+ elif self.at_arch(filename):
error = "new_arch"
break

- if filename == "":
+ elif not filename:
error = "new_arch"
break

- if not self.linked:
+ elif not self.linked:
junk = block.pop()

- checksum = block.pop().replace("&#160;", '').strip()
+ try:
+ checksum = block.pop().replace("&#160;", '').strip()
+ except IndexError:
+ break

# Hack to make RH webpages that list IA-32 stuff
# equal the data that is actually listed in the
@@ -618,7 +619,7 @@
w.element("sum", checksum, type="md5")
w.end("file")

- if error == "header" or error == "new_arch":
+ if error in ('header', 'new_arch'):
block.append(filename)
return

Index: scrapers/site.py
===================================================================
--- scrapers/site.py (revision 16)
+++ scrapers/site.py (working copy)
@@ -63,7 +63,11 @@
self.releases = {}
p = re.compile('\<ul\>')

- page = urlgrabber.urlread(self.release_list+"/errata", proxies=self.proxy)
+ try:
+ page = urlgrabber.urlread(self.release_list+"/errata", proxies=self.proxy)
+ except urlgrabber.grabber.URLGrabError:
+ print 'Error grabbing list of all releases from', self.release_list+"/errata"
+ return

result = p.split(page)

Index: aerrate.py
===================================================================
--- aerrate.py (revision 16)
+++ aerrate.py (working copy)
@@ -30,8 +30,11 @@
enhancements

Argument list
- --source Source to read from
+ --all Download al advisories (do not skip outdated advisories)

+ --source Source to read from (either archive, feed or site)
+ *Defaults to site*
+
--type The type of errata list to read from

--release Parse errata for specified release
@@ -42,11 +45,13 @@
source = 0
type = 0
release = 'none'
+ all_advisories = 0
print_releases = 0

# Try to parse any command line arguments
try:
opts, args = getopt.getopt(sys.argv[1:], "hstru", [ "help",
+ "all",
"source=",
"type=",
"printreleases",
@@ -63,24 +68,27 @@
if o in ("-h", "--help"):
usage()
sys.exit()
- if o in ("-s", "--source"):
+ elif o in ("-s", "--source"):
source = a
- if o in ("-t", "--type"):
+ elif o in ("-t", "--type"):
type = a
- if o in ("-p", "--printreleases"):
+ elif o in ("-a", "--all"):
+ all_advisories = 1
+ elif o in ("-p", "--printreleases"):
print_releases = 1
- if o in ("-r", "--release"):
+ elif o in ("-r", "--release"):
release = a
+ else:
+ print 'Option %s not understood.' %o
+ sys.exit(1)

# Print releases is one argument in particular
# that doesnt require the source argument.
# So if that argument has been sent, skip
# over this small argument checking block
if not print_releases:
- # The source argument is required always
if not source:
- usage()
- sys.exit(0)
+ source = "site"
# The site argument requires a type
# argument and a release arg be sent too
if source == "site":
Loading

0 comments on commit bbc2a04

Please sign in to comment.