Skip to content

Commit

Permalink
Fork of code from ScraperWiki at https://classic.scraperwiki.com/scra…
Browse files Browse the repository at this point in the history
  • Loading branch information
frabcus committed Jan 24, 2014
0 parents commit b50ab27
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
@@ -0,0 +1,2 @@
# Ignore output of scraper
data.sqlite
1 change: 1 addition & 0 deletions README.textile
@@ -0,0 +1 @@
This is just grabbing the HTML for now. Parse it later.
17 changes: 17 additions & 0 deletions scraper.py
@@ -0,0 +1,17 @@
import requests
import scraperwiki
import datetime


def get(url, when):
html = requests.get(url)
scraperwiki.sqlite.save(["when"], { "when": when, "html": html.text })

# One off code to get the history from archive.org
#get("http://web.archive.org/web/20100523063224/http://blog.revolutionanalytics.com/local-r-groups.html", datetime.date(2010, 5, 23))
#get("http://web.archive.org/web/20100806001622/http://blog.revolutionanalytics.com/local-r-groups.html", datetime.date(2010, 8, 6));
#get("http://web.archive.org/web/20110611085543/http://blog.revolutionanalytics.com/local-r-groups.html", datetime.date(2011, 6, 11))
#get("http://web.archive.org/web/20110715180837/http://blog.revolutionanalytics.com/local-r-groups.html", datetime.date(2011, 7, 15))

# And then get every day
get("http://blog.revolutionanalytics.com/local-r-groups.html", datetime.date.today())

0 comments on commit b50ab27

Please sign in to comment.