Fork of code from ScraperWiki at https://classic.scraperwiki.com/scra…

…pers/list_of_r_user_groups/
frabcus · Jan 24, 2014 · b50ab27 · b50ab27
commit b50ab27
Show file tree

Hide file tree

Showing 3 changed files with 20 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+# Ignore output of scraper
+data.sqlite
diff --git a/README.textile b/README.textile
@@ -0,0 +1 @@
+This is just grabbing the HTML for now. Parse it later.
diff --git a/scraper.py b/scraper.py
@@ -0,0 +1,17 @@
+import requests
+import scraperwiki
+import datetime
+
+
+def get(url, when):
+    html = requests.get(url)
+    scraperwiki.sqlite.save(["when"], { "when": when, "html": html.text })
+
+# One off code to get the history from archive.org
+#get("http://web.archive.org/web/20100523063224/http://blog.revolutionanalytics.com/local-r-groups.html", datetime.date(2010, 5, 23)) 
+#get("http://web.archive.org/web/20100806001622/http://blog.revolutionanalytics.com/local-r-groups.html", datetime.date(2010, 8, 6));
+#get("http://web.archive.org/web/20110611085543/http://blog.revolutionanalytics.com/local-r-groups.html", datetime.date(2011, 6, 11))
+#get("http://web.archive.org/web/20110715180837/http://blog.revolutionanalytics.com/local-r-groups.html", datetime.date(2011, 7, 15))
+
+# And then get every day
+get("http://blog.revolutionanalytics.com/local-r-groups.html", datetime.date.today())