Permalink
Browse files

goalhi downloader...

  • Loading branch information...
1 parent df50c23 commit 64be61c8e4de12aac799c7b2e3310481d1de74d1 @donie committed Mar 10, 2012
Showing with 101 additions and 0 deletions.
  1. +1 −0 .gitignore
  2. +75 −0 gh_downloader.py
  3. +25 −0 vimeo.html
View
1 .gitignore
@@ -0,0 +1 @@
+*.swp
View
75 gh_downloader.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python
+
+from bs4 import BeautifulSoup
+import re
+import os
+import sys
+import getopt
+import urllib2
+
+sfx = ('small.jpg', 'small.gif', 'small.png')
+USAGE = '''
+Usage: gh_downloader.py [arguments]
+
+Arguments:
+ -h display this help and exit
+ -u hc user's id
+ -p hc album's first page, please note, better be with the first page of the album
+
+Examples:
+ gh_downloader.py -p
+ gh_mkfakefile.py -h
+'''
+
+def if_next(page_url):
+ page = urllib2.urlopen(page_url)
+ soup = BeautifulSoup(page)
+ if len(soup.find_all("a", { "class" : "next" })) == 1:
+ return "http://my.hoopchina.com" + soup.find_all("a", { "class" : "next" })[0].get('href')
+ else:
+ print "----------------------------------"
+ print "no more pages"
+ print "----------------------------------"
+
+def page_download(page_url):
+ page = urllib2.urlopen(page_url)
+ soup = BeautifulSoup(page)
+ print len(soup.find_all("a", { "class" : "next" }))
+ for src in soup.find_all('img'):
+ if src.get('src').endswith(sfx):
+ #tgt_url = src.get('src').replace('small', '')
+ print src.get('src').replace('small', 'big')
+
+def main():
+ print "Started"
+ url = "http://my.hoopchina.com/lishg1990/photo/a70625-1.html"
+ try:
+ opts, args = getopt.getopt(sys.argv[1:], "p:hu:", ["page=", "user="])
+ except getopt.GetoptError, err:
+ # print help information and exit:
+ print USAGE
+ sys.exit(2)
+ page_no = 1
+ user = False
+ for o, v in opts:
+ if o == "-p":
+ page_download(v)
+ while not if_next(v) is None:
+ v = if_next (v)
+ page_no = page_no + 1
+ print "----------------------------------"
+ print "Page"+ str(page_no) + ":url is " + url
+ print "----------------------------------"
+ page_download(v)
+ continue
+ elif o == "-u":
+ print "lol @ " + v
+ sys.exit()
+ elif o in ("-h", "--help"):
+ print USAGE
+ sys.exit()
+ else:
+ assert False, "unhandled option"
+
+if __name__ == '__main__':
+ main()
View
25 vimeo.html
@@ -0,0 +1,25 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html>
+<head>
+ <meta http-equiv="content-type" content="text/html; charset=utf-8">
+
+ <title>test html</title>
+
+</head>
+<body>
+ <object width="640" height="480" classid="clsid:02BF25D5-8C17-4B23-BC80-D3488ABDDC6B"
+ codebase="http://www.apple.com/qtactivex/qtplugin.cab">
+ <param name="src" value="movie.mov">
+ <param name="controller" value="true">
+ <param name="autoplay" value="false">
+ <embed src="movie.mov"
+ width="640" height="480"
+ controller="true" autoplay="false"
+ scale="tofit" cache="true"
+ pluginspage="http://www.apple.com/quicktime/download/"
+ >
+ </object>
+
+</body>
+</html>

0 comments on commit 64be61c

Please sign in to comment.