Skip to content

Commit

Permalink
First commit!
Browse files Browse the repository at this point in the history
  • Loading branch information
Desiree Abbott committed Mar 10, 2019
1 parent 2595108 commit ba0a50b
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 7 deletions.
4 changes: 2 additions & 2 deletions requirements.txt
Expand Up @@ -5,5 +5,5 @@
# Custom version of scraperwiki library
-e git+http://github.com/openaustralia/scraperwiki-python.git@morph_defaults#egg=scraperwiki

lxml==3.4.4
cssselect==0.9.1
lxml==4.3.2
cssselect==0.9.1
40 changes: 35 additions & 5 deletions scraper.py
@@ -1,15 +1,45 @@
# This is a template for a Python scraper on morph.io (https://morph.io)
# including some code snippets below that you should find helpful

# import scraperwiki
# import lxml.html
#
import scraperwiki
from lxml import html
import requests


# # Read in a page
# html = scraperwiki.scrape("http://foo.com")
# html = scraperwiki.scrape("https://www.critrolestats.com/dmcrits-wm")
## need everything in the <ol> elements with class c6

##root = fromstring(page.content)
##print [child.tag for child in root.iterdescendants()]


page3 = requests.get('https://docs.google.com/document/d/e/2PACX-1vSxiGzyYSMs9Wd5VMJi1kk-vwxmMYRozfPfJ47jYrQjokC1N4-gwG6kMioUGcLj75KPFyTQsG4Zggyi/pub?embedded=true')
tree3 = html.fromstring(page3.content)


kos = tree3.xpath('//ol[2]//span/text()')


pk = 1
for k in kos:
ind = k.find('(')
char = k[0:ind-1]
ind2 = k.find(')')
epinfo = k[ind+1:ind2]
bywhat = k[ind2+2:len(k)]
dat = {'character': char, 'episode_info': epinfo, 'killed_by_what': bywhat, 'KOs': 1.0, 'full_string': k,'pk': pk}
scraperwiki.sqlite.save(unique_keys=['pk'], data = dat)
pk += 1





#
# # Find something on the page using css selectors
# root = lxml.html.fromstring(html)
# root.cssselect("div[align='left']")
# root.cssselect("ol[class='c6']")
#
# # Write out to the sqlite database using scraperwiki library
# scraperwiki.sqlite.save(unique_keys=['name'], data={"name": "susan", "occupation": "software developer"})
Expand Down

0 comments on commit ba0a50b

Please sign in to comment.