-
Notifications
You must be signed in to change notification settings - Fork 0
/
scraper.py
37 lines (30 loc) · 1.03 KB
/
scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import scraperwiki
import requests
import lxml.html
import re
def duckint(i):
try:
return int(i)
except ValueError:
return i
# Blank Python
lookup = ['Name','GP', 'Shutouts', 'Wins']
num = [0,2,25,0]
#6 ==MIN
#9 = L
lstring = ', '.join(lookup)
scraperwiki.sqlite.execute('create table if not exists score (%s)'%lstring)
#url='http://sports.yahoo.com/nhl/stats/byposition?pos=D'
url='http://sports.yahoo.com/nhl/stats/byposition?pos=G&sort=102&conference=NHL&year=season_2015'
html=requests.get(url).content
root=lxml.html.fromstring(html)
rows=root.xpath('//tr[@class="ysprow1" or @class="ysprow2"]')
builder=[]
for row in rows:
data={}
cells=[cell.text_content().strip() for cell in row.xpath('td[@class="yspscores"]')]
for i,n in enumerate(num):
data[lookup[i]]=duckint(cells[n])
data['Wins']=duckint(row.xpath('descendant-or-self::span[@class="yspscores"]')[0].text_content().strip())
builder.append(data)
scraperwiki.sqlite.save(table_name='score', data=builder, unique_keys=['Name'])