Skip to content

Commit

Permalink
goalhi downloader...
Browse files Browse the repository at this point in the history
  • Loading branch information
donie committed Mar 10, 2012
1 parent df50c23 commit 64be61c
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
@@ -0,0 +1 @@
*.swp
75 changes: 75 additions & 0 deletions gh_downloader.py
@@ -0,0 +1,75 @@
#!/usr/bin/env python

from bs4 import BeautifulSoup
import re
import os
import sys
import getopt
import urllib2

sfx = ('small.jpg', 'small.gif', 'small.png')
USAGE = '''
Usage: gh_downloader.py [arguments]
Arguments:
-h display this help and exit
-u hc user's id
-p hc album's first page, please note, better be with the first page of the album
Examples:
gh_downloader.py -p
gh_mkfakefile.py -h
'''

def if_next(page_url):
page = urllib2.urlopen(page_url)
soup = BeautifulSoup(page)
if len(soup.find_all("a", { "class" : "next" })) == 1:
return "http://my.hoopchina.com" + soup.find_all("a", { "class" : "next" })[0].get('href')
else:
print "----------------------------------"
print "no more pages"
print "----------------------------------"

def page_download(page_url):
page = urllib2.urlopen(page_url)
soup = BeautifulSoup(page)
print len(soup.find_all("a", { "class" : "next" }))
for src in soup.find_all('img'):
if src.get('src').endswith(sfx):
#tgt_url = src.get('src').replace('small', '')
print src.get('src').replace('small', 'big')

def main():
print "Started"
url = "http://my.hoopchina.com/lishg1990/photo/a70625-1.html"
try:
opts, args = getopt.getopt(sys.argv[1:], "p:hu:", ["page=", "user="])
except getopt.GetoptError, err:
# print help information and exit:
print USAGE
sys.exit(2)
page_no = 1
user = False
for o, v in opts:
if o == "-p":
page_download(v)
while not if_next(v) is None:
v = if_next (v)
page_no = page_no + 1
print "----------------------------------"
print "Page"+ str(page_no) + ":url is " + url
print "----------------------------------"
page_download(v)
continue
elif o == "-u":
print "lol @ " + v
sys.exit()
elif o in ("-h", "--help"):
print USAGE
sys.exit()
else:
assert False, "unhandled option"

if __name__ == '__main__':
main()
25 changes: 25 additions & 0 deletions vimeo.html
@@ -0,0 +1,25 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8">

<title>test html</title>

</head>
<body>
<object width="640" height="480" classid="clsid:02BF25D5-8C17-4B23-BC80-D3488ABDDC6B"
codebase="http://www.apple.com/qtactivex/qtplugin.cab">
<param name="src" value="movie.mov">
<param name="controller" value="true">
<param name="autoplay" value="false">
<embed src="movie.mov"
width="640" height="480"
controller="true" autoplay="false"
scale="tofit" cache="true"
pluginspage="http://www.apple.com/quicktime/download/"
>
</object>

</body>
</html>

0 comments on commit 64be61c

Please sign in to comment.