Skip to content

Commit

Permalink
Update scraper.py
Browse files Browse the repository at this point in the history
  • Loading branch information
brmo committed May 24, 2019
1 parent 94bd0ec commit 421bb33
Showing 1 changed file with 80 additions and 0 deletions.
80 changes: 80 additions & 0 deletions scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,83 @@
# All that matters is that your final data is written to an SQLite database
# called "data.sqlite" in the current working directory which has at least a table
# called "data".


import urllib2
import scraperwiki
import time
# from time import strftime
from BeautifulSoup import BeautifulSoup

scraperwiki.sqlite.save(unique_keys, data, table_name="swdata", verbose=2)
CREATE TABLE `swdata` (`trend` text, `now` real, `cityName` text, `cityTableAveragePrice` text)
CREATE TABLE `swdata` (`trend` text, `now` real, `cityName` text, `cityTableAveragePrice` text)
scraperwiki.sqlite.save(unique_keys=['country'], data=data)
def removeNL(x):

def removeNL(x):
"""cleans a string of new lines and spaces"""
s = x.split('\n')
s = [x.strip() for x in s]
x = " ".join(s)
return x.lstrip()


# Create/open a file for data storage
f = open('gasprices-hamilton.txt', 'w')


#timestamp for this scraping
now = time.time()


# Open gasbuddy url and load to Beautiful Soup
url = "http://www.hamiltongasprices.com/"
page = urllib2.urlopen(url)
soup = BeautifulSoup(page)


# Cityname
cityName = soup.findAll('table')[1].tr.th.string
if '\n' in cityName:
cityName = removeNL(cityName)
cityName = cityName.replace(' ',' ')


# Price trending as per Gas Buddy, these strings are part of the trend image src
trendUp = 'trend_up'
trendDown = 'trend_down'
trendFlat = 'flat'

cityTableTrendImg = soup.findAll('table')[1].findAll('tr')[1].findAll('td')[3].findAll('img')
cityTableTrendSRC = cityTableTrendImg[0]['src']

if trendUp in cityTableTrendSRC:
trend = 'Rising'

if trendDown in cityTableTrendSRC:
trend = 'Falling'

if trendFlat in cityTableTrendSRC:
trend = 'Stable'


# cityTableRow = soup.findAll('table')[1].findAll('tr')[1]
cityTableAveragePrice = soup.findAll('table')[1].findAll('tr')[1].findAll('td')[1].find(text=True)
if '\n' in cityTableAveragePrice:
cityTableAveragePrice = removeNL(cityTableAveragePrice)
cityTableAveragePrice = cityTableAveragePrice.replace(' ',' ')


# prepare and save data to sqlite
scrapedata = { 'now': now, 'cityName': cityName, 'cityTableAveragePrice': cityTableAveragePrice, 'trend': trend }
# print scrapedata


scraperwiki.sqlite.save(unique_keys=['now'],data=scrapedata)






0 comments on commit 421bb33

Please sign in to comment.