Skip to content

Commit

Permalink
Lookup URLs a different way
Browse files Browse the repository at this point in the history
Rather than matching on position in the list, probably it’s better to
check the IDs match up.
  • Loading branch information
andylolz committed Aug 31, 2021
1 parent 6369cd4 commit 2cd2292
Showing 1 changed file with 5 additions and 5 deletions.
10 changes: 5 additions & 5 deletions glide.py
Expand Up @@ -59,9 +59,9 @@ def make_date(value):
print("Opening GLIDEnumber.net")
r = s.post(SEARCH_URL, data=search_post_data)
doc = html.fromstring(r.text)
urls = [
BASE_URL + a.get('href').split('&', 1)[0]
for a in doc.xpath('//table')[6].xpath('tr/td[1]/a')]
url_lookup = dict([
(a.text, BASE_URL + a.get('href').split('&', 1)[0])
for a in doc.xpath('//table')[6].xpath('tr/td[1]/a')])

post_data = [
("continueReport", "Continue"),
Expand All @@ -81,14 +81,14 @@ def make_date(value):
doc = html.fromstring(r.text)
rows = doc.xpath("//table")[2].xpath("tr/td/table[2]/tr")
print("Found {} entries".format(len(rows)))
for row, url in zip(rows, urls):
for row in rows:
# if not row.xpath("tr/td[@class='bfS']"): continue
if (len(row.xpath("td")) != 8):
print("Irregular column width, skipping")
continue
csv.writerow({
"GLIDE_number": get_t(row, 0),
"URL": url,
"URL": url_lookup.get(get_t(row, 0)),
"Event": get_t(row, 1),
"Country": get_t(row, 2),
"Date": make_date(get_t(row, 3)),
Expand Down

0 comments on commit 2cd2292

Please sign in to comment.