/
parsefacilities.py
executable file
·103 lines (82 loc) · 3.15 KB
/
parsefacilities.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/usr/bin/python
import csv
import sqlite3
import datetime
import dbhandler
def getFacilities(datasource):
facilities = csv.DictReader(datasource)
for place in facilities:
details = {
'Type': place['SUBCATEGORY'].decode('latin-1'),
'City': place['CITY'].upper().decode('latin-1'),
'Address': place['ADDR'].decode('latin-1'),
'ID': place['FACILITYID'].upper().decode('latin-1'),
'Name': place['BUSINESS_NAME'].decode('latin-1')
}
yield details
def restaurantRecognizer( s ):
return 'Restaurant' in s or 'Food Take Out' in s or 'Baked Goods - Retail' in s or 'Ice Cream / Yogurt Vendor' in s
def cityRecognizer( c ):
return c in ('WATERLOO', 'KITCHENER', 'ST.+JACOBS')
def addToDB(cursor, details, date):
if restaurantRecognizer( details['Type'] ) and cityRecognizer( details['City'] ):
cursor.execute("SELECT * FROM facilities WHERE id=?;", (details['ID'],))
if cursor.fetchone() is None:
print u"{Name}: {Address}, {City}".format(**details)
cursor.execute('''
INSERT INTO facilities (id, name, lastupdate, creation, address, city)
VALUES ( ?, ?, ?, ?, ?, ? );''',
(details['ID'],
details['Name'],
date,
date,
details['Address'],
details['City']))
else:
print "Updating time on {}".format( details['Name'])
cursor.execute( "UPDATE facilities SET lastupdate = ? WHERE id = ?;",
( date, details['ID']) )
def valid_date(s):
try:
return datetime.datetime.strptime(s, "%Y-%m-%d")
except ValueError:
msg = "Not a valid date: '{0}'.".format(s)
raise argparse.ArgumentTypeError(msg)
def main():
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--update", action="store_true",
help="Update the database with new restaurants from CSV")
parser.add_argument("--datasource", type=argparse.FileType('rt'),
default=open('testdata/Facilities_OpenData.csv', 'rt'),
help="File to obtain data from (default: testdata/Facilities_OpenData.csv")
parser.add_argument("--database", type=dbhandler.dbArgType,
default='restaurants.db',
help="SQLite database file for storing updates")
parser.add_argument("--getrecent", metavar="N", action="store", type=int,
help="Return informaiton on the N restaurants discovered in the last N days")
parser.add_argument("--enqueue", action="store_true",
help="For --getrecent, store the recent additions in the database.")
parser.add_argument("--date", type=valid_date,
help="The date of the update - format YYYY-MM-DD")
args = parser.parse_args()
db = args.database
cursor = db.cursor()
if args.date:
date = datetime.date(args.date.year, args.date.month, args.date.day)
else:
date = datetime.date.today()
if args.update:
for facility in getFacilities(args.datasource):
addToDB( cursor, facility, date )
db.commit()
if args.getrecent:
for result in dbhandler.getRecent( cursor, args.getrecent ):
print "{name}: {address}, {city}".format(
name=result.Name, address=result.Address, city=result.City)
if args.enqueue:
cursor.execute( "INSERT INTO queue (facilities_id) VALUES ( ? );", [(result.ID)] )
db.commit()
db.close()
if __name__ == '__main__':
main()