Skip to content
Browse files

Initial Creation

  • Loading branch information...
0 parents commit 8bd8fa791505114baf87105a818fdffb56fba175 @coderpunk committed Feb 6, 2010
Showing with 55 additions and 0 deletions.
  1. +6 −0 README
  2. +49 −0 newseum-pages.py
6 README
@@ -0,0 +1,6 @@
+This is a simple Python application to pull the newspaper jpg imges from newseum.org
+
+Edit the script to set the id's of the papers to pull.
+
+Images are stored in the current directory.
+
49 newseum-pages.py
@@ -0,0 +1,49 @@
+#!/usr/bin/env python
+"""
+ Quick Newseum Frontpage Grabber script
+ Copyright 2009 by Brian C. Lane
+ Imp Software
+ All Rights Reserved
+
+ Modify CITIES list below to add the city designators (as seen in the
+ URLS at http://www.newseum.org/todaysfrontpages/default.asp)
+"""
+import urllib2
+import re
+import os
+import urlparse
+
+# Add more cities here
+CITIES = [ "AL_AS", "AL_MA", ]
+
+NEWSEUM_URL="http://www.newseum.org/todaysfrontpages/hr.asp?fpVname=%s"
+NEWSEUM_IMG="http://www.newseum.org"
+
+def fetchNewseumImage(city):
+ """
+ Fetch the image for a city
+ """
+ print "Parsing the page for %s" % (city)
+ page = urllib2.urlopen(NEWSEUM_URL % city).read()
+
+ # Quick and dirty grep for the image name
+ match = re.search('<img class="tfp_lrg_img" src="(.*)" alt=', page)
+ if match:
+ img_url = NEWSEUM_IMG + os.path.abspath(match.group(1))
+ print "Saving the image for %s" % (city)
+ image = urllib2.urlopen(img_url).read()
+ open(os.path.basename(match.group(1)), "wb").write(image)
+
+
+def main():
+ """
+ Main code goes here
+ """
+ for city in CITIES:
+ fetchNewseumImage(city)
+
+
+if __name__ == '__main__':
+ main()
+
+

0 comments on commit 8bd8fa7

Please sign in to comment.
Something went wrong with that request. Please try again.