Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Scaled to allow for multiple locations, accounts

  • Loading branch information...
commit 8aface77e60f7662fa3f13619297b6bc58a02022 1 parent 1c80a37
Frank Harris authored
View
3  .gitignore
@@ -3,4 +3,5 @@
*~
*.pyc
.settings
-settings.py
+settings.py
+app.yaml
View
2  app.yaml
@@ -1,5 +1,5 @@
application: bkgasfinder
-version: 6
+version: 7
runtime: python27
api_version: 1
threadsafe: false
View
7 index.yaml
@@ -12,6 +12,13 @@ indexes:
- kind: Station
properties:
+ - name: account
+ - name: time
+ - name: date
+ direction: desc
+
+- kind: Station
+ properties:
- name: time
- name: date
direction: desc
View
143 main.py
@@ -24,7 +24,7 @@
from google.appengine.ext import webapp
from google.appengine.ext.webapp.util import run_wsgi_app
from google.appengine.api import urlfetch
-from google.appengine.api import mail
+from google.appengine.ext.webapp import util
from BeautifulSoup import BeautifulSoup
from model import *
@@ -39,70 +39,85 @@ class MainPage(webapp.RequestHandler):
def get(self):
self.response.headers['Content-Type'] = 'text/plain'
- # Load the data feed in Beautiful Soup.
- result = urlfetch.fetch(settings.FEED_URL)
- soup = BeautifulSoup(result.content)
-
- # Start scraping. This is pretty ugly.
- name = soup.findAll("td", { "class" : "td2" })
- brand = soup.findAll("td", { "class" : "td3" })
- address = soup.findAll("td", { "class" : "td4" })
- phone = soup.findAll("td", { "class" : "td5" })
- price = soup.findAll("td", { "class" : "td6" })
- time = soup.findAll("td", { "class" : "td7" })
-
- # Loop through the data. This is really inefficient.
- for i in range(len(name)):
-
- # Clean up data, again pretty ugly.
- c_name=str(name[i].find(text=True))
- c_brand=str(brand[i].find(text=True))
- c_address=str(address[i]).replace('<br />', ' ').replace('<td class="td4">','').replace('</td>', '').strip()
- c_phone=str(phone[i].find(text=True)).replace(')', ') ')
- c_price=str(price[i].find(text=True)).replace('&nbsp;', '')
- c_time=str(time[i].find(text=True))
-
- # Determine threshold. Only iterate for updates in the last 6 minutes.
- # hours is a complete hack because I couldn't figure out the timezone madness.
- threshold = datetime.now()-relativedelta(hours=5, minutes=6)
-
- if threshold <= dparser.parse(c_time):
- # Check to see if the latest update has already been logged.
- stationObj = Station.all().filter("time =", c_time).order('-date').fetch(1)
-
- # If not, log it and then tweet it.
- if not stationObj:
- station = Station(name=c_name, brand=c_brand, address=c_address, phone=c_phone, price=c_price, time=c_time)
- station.put()
- message_body = str(c_time) + ": " + str(c_name) + " (" + str(c_brand) + ") " + str(c_address) + ", " + str(c_phone) + " - " + str(c_price) + " per gallon"
-
- # Authenticate this app's credentials via OAuth.
- auth = tweepy.OAuthHandler(settings.CONSUMER_KEY, settings.CONSUMER_SECRET)
-
- # Set the credentials that we just verified and passed in.
- auth.set_access_token(settings.TOKEN_KEY, settings.TOKEN_SECRET)
-
- # Authorize with the Twitter API via OAuth.
- twitterapi = tweepy.API(auth)
-
- # Update the user's twitter timeline with the tweeted text.
- # Limit the length to 140 characters
- twitterapi.update_status(message_body[:140])
- self.response.out.write(message_body[:140])
- self.response.out.write(', ')
-
- else:
- # Wah. No new update.
- self.response.out.write('update already added, ')
-
- else:
- break
-
-application = webapp.WSGIApplication([('/', MainPage)],
- debug=True)
+ accounts = settings.ACCOUNTS
+ for account, account_info in accounts.iteritems():
+ ACCOUNT = account
+ ZIPCODE = account_info['zipcode']
+ RADIUS = account_info['radius']
+ FEED_URL = settings.BASE_URL + '&zip=' + ZIPCODE + '&radius=' + RADIUS
+ CONSUMER_KEY = account_info['consumer_key']
+ CONSUMER_SECRET = account_info['consumer_secret']
+ TOKEN_KEY = account_info['token_key']
+ TOKEN_SECRET = account_info['token_secret']
+
+ self.response.out.write(ACCOUNT)
+ self.response.out.write('-----------')
+
+ # Load the data feed in Beautiful Soup.
+ result = urlfetch.fetch(FEED_URL)
+ soup = BeautifulSoup(result.content)
+
+ # Start scraping. This is pretty ugly.
+ name = soup.findAll("td", { "class" : "td2" })
+ brand = soup.findAll("td", { "class" : "td3" })
+ address = soup.findAll("td", { "class" : "td4" })
+ phone = soup.findAll("td", { "class" : "td5" })
+ price = soup.findAll("td", { "class" : "td6" })
+ time = soup.findAll("td", { "class" : "td7" })
+
+ # Loop through the data. This is really inefficient.
+ for i in range(len(name)):
+
+ # Clean up data, again pretty ugly.
+ c_name=str(name[i].find(text=True))
+ c_brand=str(brand[i].find(text=True))
+ c_address=str(address[i]).replace('<br />', ' ').replace('<td class="td4">','').replace('</td>', '').strip()
+ c_phone=str(phone[i].find(text=True)).replace(')', ') ')
+ c_price=str(price[i].find(text=True)).replace('&nbsp;', '')
+ c_time=str(time[i].find(text=True))
+
+ # Determine threshold. Only iterate for updates in the last 6 minutes.
+ # hours is a complete hack because I couldn't figure out the timezone madness.
+ threshold = datetime.now()-relativedelta(hours=5, minutes=6)
+
+ if threshold <= dparser.parse(c_time):
+ # Check to see if the latest update has already been logged.
+ stationObj = Station.all().filter("account =", ACCOUNT).filter("time =", c_time).order('-date').fetch(1)
+
+ # If not, log it and then tweet it.
+ if not stationObj:
+ station = Station(account=ACCOUNT, name=c_name, brand=c_brand, address=c_address, phone=c_phone, price=c_price, time=c_time)
+ station.put()
+ message_body = str(c_time) + ": " + str(c_name) + " (" + str(c_brand) + ") " + str(c_address) + ", " + str(c_phone) + " - " + str(c_price) + " per gallon"
+
+ # Authenticate this app's credentials via OAuth.
+ auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
+
+ # Set the credentials that we just verified and passed in.
+ auth.set_access_token(TOKEN_KEY, TOKEN_SECRET)
+
+ # Authorize with the Twitter API via OAuth.
+ twitterapi = tweepy.API(auth)
+
+ # Update the user's twitter timeline with the tweeted text.
+ # Limit the length to 140 characters
+ twitterapi.update_status(message_body[:140])
+ self.response.out.write(message_body[:140])
+ self.response.out.write(', ')
+
+ else:
+ # Wah. No new update.
+ self.response.out.write('update already added, ')
+
+ else:
+ break
def main():
- run_wsgi_app(application)
+ application = webapp.WSGIApplication(
+ [('/', MainPage)],
+ debug=True)
+ util.run_wsgi_app(application)
+
-if __name__ == "__main__":
+if __name__ == '__main__':
main()
View
1  model.py
@@ -2,6 +2,7 @@
from google.appengine.ext import db
class Station(db.Model):
+ account = db.StringProperty(default="nycgasfinder")
name = db.StringProperty()
brand = db.StringProperty()
address = db.StringProperty()
View
22 settings-example.py
@@ -1,16 +1,20 @@
# Rename to settings.py
# Data source is wrightexpress.com
-# Take zipcode as input as well a radius in miles
-ZIPCODE = '11201'
-RADIUS = '10'
-BASE_URL = 'http://rrtexternalweb.wrightexpress.com/neo_ww/site_locator/list.action?sortByValue=LAST_TRAN_TIME&sortDirection=DESCENDING&latitude=40.6945036&longitude=-73.9565551&mapType=Hybrid&mapZoom=12&sorting=true&fuelType=Unleaded+Regular&'
-FEED_URL = BASE_URL + '&zip=' + ZIPCODE + '&radius=' + RADIUS
+BASE_URL = 'http://rrtexternalweb.wrightexpress.com/neo_ww/site_locator/list.action?sortByValue=LAST_TRAN_TIME&sortDirection=DESCENDING&mapType=Hybrid&mapZoom=12&sorting=true&fuelType=Unleaded+Regular&'
+# Takes zipcode as input as well a radius in miles
# Twitter API / Auth
# these tokens are necessary for user authentication
# (created within the twitter developer API pages)
-CONSUMER_KEY = "xxxxxxxxxxxxxxxx"
-CONSUMER_SECRET = "xxxxxxxxxxxxxxxx"
-TOKEN_KEY = "xxxxxxxxxxxxxxxx"
-TOKEN_SECRET = "xxxxxxxxxxxxxxxx"
+
+ACCOUNTS = {
+ 'nycgasfinder': {
+ 'zipcode': '11201',
+ 'radius': '10',
+ 'consumer_key': 'xxxxxxxxxxxxxxxx',
+ 'consumer_secret': 'xxxxxxxxxxxxxxxx',
+ 'token_key': 'xxxxxxxxxxxxxxxx',
+ 'token_secret': 'xxxxxxxxxxxxxxxx',
+ },
+}
Please sign in to comment.
Something went wrong with that request. Please try again.