Skip to content
Permalink
Browse files

Fork of code from ScraperWiki at https://classic.scraperwiki.com/scra…

  • Loading branch information...
aashiqerasool committed Jun 26, 2014
0 parents commit 301bdac1e6e7eff8483c9f5b7692dd7b98eb48e9
Showing with 23 additions and 0 deletions.
  1. +2 −0 .gitignore
  2. +21 −0 scraper.py
@@ -0,0 +1,2 @@
# Ignore output of scraper
data.sqlite
@@ -0,0 +1,21 @@
import scraperwiki
import urllib, urlparse
import lxml.etree, lxml.html
import re

for i in range(10):
print "hello", i*i

def get_one_page(offset):
url = 'http://www.ofsted.gov.uk/oxfind/name/(%s)/0/(type)/4096/(length)/10' % (offset,)
root = lxml.html.parse(url).getroot()

# place your cssselection case here and extract the values
for tr in root.cssselect('div#contentHome table.list tr'):
rec = {}
rec['school_type'] = tr[1].text


print list(tr), lxml.etree.tostring(tr)

get_one_page(0)

0 comments on commit 301bdac

Please sign in to comment.
You can’t perform that action at this time.