introduce -c / --content switch which you can use to only use to iden…

…tify the book => it's mutually exclusive to the -l switch Thanks to flo for this idea and a basic patch.
hannoL · Apr 8, 2009 · d3c15dc · d3c15dc
1 parent be63c25
commit d3c15dc
Showing 1 changed file with 44 additions and 31 deletions.
diff --git a/springer_download.py b/springer_download.py
@@ -17,55 +17,64 @@ def main(argv):
         error("You have to install pdftk.")
     if not findInPath("iconv"):
         error("You have to install iconv.")
-    
+
     try:
-        opts, args = getopt.getopt(argv, "hl:", ["help", "link="])
+        opts, args = getopt.getopt(argv, "hl:c:", ["help", "link=","content="])
     except getopt.GetoptError:
         error()
-        
+
     link = ""
-    
+
     for opt, arg in opts:
         if opt in ("-h", "--help"):
             usage()
             sys.exit()
+        elif opt in ("-c", "--content"):
+            if link != "":
+                error("-c and -l arguments are mutually exclusive")
+
+            link = "http://springerlink.com/content/" + arg
         elif opt in ("-l", "--link"):
+            if link != "":
+                error("-c and -l arguments are mutually exclusive")
+
             link = arg
+
     if link == "":
         error("You have to define a link.")
     if not re.match("https?://(www\.)?springerlink.(com|de)/content/[a-z0-9]+(/\?[^/]*)?$", link):
         error("Bad link given. See LINK below.")
-    
+
     # remove all arguments from link
     link = re.sub(r"/?\?[^/]*$", "/", link)
-    
+
     baseLink = link
 
     chapters = list()
     hasFrontMatter = False
     hasBackMatter = False
 
     loader = urllib.FancyURLopener()
-    
+
     bookTitle = ""
-    
+
     while True:
         # download page source
         try:
             print "Please wait, link source is being downloaded...\n\t%s" % link
             page = loader.open(link).read()
         except IOError, e:
             error("Bad link given (%s)" % e)
-        
+
         if bookTitle == "":
             match = re.search(r'<h2 class="MPReader_Profiles_SpringerLink_Content_PrimitiveHeadingControlName">([^<]+)</h2>', page)
             if not match or match.group(1).strip() == "":
                 error("Could not evaluate book title - bad link?")
             else:
                 bookTitle = match.group(1).strip()
             print "\nThe book you are trying to download is called '%s'\n" % bookTitle
-            
-        
+
+
         # get chapters
         for match in re.finditer('href="([^"]+.pdf)"', page):
             chapterLink = match.group(1)
@@ -78,52 +87,52 @@ def main(argv):
             if chapterLink[:7] == "http://":
                 continue
             chapters.append(match.group(1))
-        
+
         # get next page
         match = re.search(r'<a href="([^"]+)">Next</a>', page)
         if match:
             link = "http://springerlink.com" + match.group(1).replace("&amp;", "&")
         else:
             break
-    
+
     if hasFrontMatter:
         chapters.insert(0, "front-matter.pdf")
 
     if hasBackMatter:
         chapters.append("back-matter.pdf")
-    
+
     if len(chapters) == 0:
         error("No chapters found - bad link?")
-    
+
     print "found %d chapters" % len(chapters)
-    
+
     # setup
     curDir = os.getcwd()
-    tempDir = tempfile.mkdtemp()    
+    tempDir = tempfile.mkdtemp()
     os.chdir(tempDir)
-    
+
     i = 1
     fileList = list()
-    
+
     for chapterLink in chapters:
         if chapterLink[0] == "/":
             chapterLink = "http://springerlink.com" + chapterLink
         else:
             chapterLink = baseLink + chapterLink
-            
+
         print "downloading chapter %d/%d" % (i, len(chapters))
         localFile, mimeType = geturl(chapterLink, "%d.pdf" % i)
-        
+
         if mimeType.gettype() != "application/pdf":
             os.chdir(curDir)
             shutil.rmtree(tempDir)
             error("downloaded chapter %s has invalid mime type %s - are you allowed to download it?" % (chapterLink, mimeType.gettype()))
-          
+
         fileList.append(localFile)
         i += 1
-    
+
     print "merging chapters"
-    
+
     p1 = subprocess.Popen(["echo", bookTitle], stdout=subprocess.PIPE)
     p2 = subprocess.Popen(["iconv", "-f", "UTF-8", "-t" ,"ASCII//TRANSLIT"], stdin=p1.stdout, stdout=subprocess.PIPE)
     bookTitlePath = p2.communicate()[0]
@@ -132,40 +141,44 @@ def main(argv):
         os.chdir(curDir)
         shutil.rmtree(tempDir)
         error("could not transliterate book title %s" % bookTitle)
-    
+
     bookTitlePath = bookTitlePath.replace("/", "-")
     bookTitlePath = re.sub("\s+", "_", bookTitlePath)
-    
+
     bookTitlePath = curDir + "/%s.pdf" % bookTitlePath
-    
+
     if len(fileList) == 1:
       shutil.move(fileList[0], bookTitlePath)
     else:
       os.system("pdftk %s cat output '%s'" % (" ".join(fileList), bookTitlePath))
-    
+
     # cleanup
     os.chdir(curDir)
     shutil.rmtree(tempDir)
 
     print "book %s was successfully downloaded, it was saved to %s" % (bookTitle, bookTitlePath)
-    
+
     sys.exit()
-        
+
 # give a usage message
 def usage():
     print """Usage:
 %s [OPTIONS]
 
 Options:
   -h, --help              Display this usage message
-  -l LINK, --link=LINK    define the link of the book to start downloading
+  -l LINK, --link=LINK    defines the link of the book you intend to download
+  -c HASH, --content=HASH builds the link from a given HASH (see below)
+
+You have to set exactly one of these options.
 
 LINK:
   The link to your the detail page of the ebook of your choice on SpringerLink.
   It lists book metadata and has a possibly paginated list of the chapters of the book.
   It has the form:
     http://springerlink.com/content/HASH/STUFF
   Where: HASH is a string consisting of lower-case, latin chars and numbers.
+         It alone identifies the book you intent do download.
          STUFF is optional and looks like ?p=...&p_o=... or similar. Will be stripped.
 """ % os.path.basename(sys.argv[0])