-
Notifications
You must be signed in to change notification settings - Fork 1
/
unsplash.py
83 lines (64 loc) · 1.83 KB
/
unsplash.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from pyquery import PyQuery
from StringIO import StringIO
from os import path
from urlparse import urljoin
from Queue import Queue
from threading import Thread
import requests, sys
URL = 'https://unsplash.com/grid?page=%d'
THREAD_COUNT = 3
DIRECTORY = 'img/'
class Downloader(Thread):
def __init__(self, queue, directory):
Thread.__init__(self)
self.queue = queue
self.directory = directory
self.kill_received = False
def run(self):
while not self.kill_received:
self.download(self.queue.get())
self.queue.task_done()
def download(self, url):
basename = url.split('/')[-2]
filename = '%s/%s.jpg' % (self.directory, basename)
if path.exists(filename):
return
sys.stdout.write('Downloading %s.jpg\n' % (basename))
sys.stdout.flush()
response = requests.get(url)
open(filename, 'wb').write(
StringIO(response.content).read()
)
def run(directory = DIRECTORY, *args):
page = 1
if any(args):
directory = args.pop(0)
if any(args):
raise Exception('Unknown arguments')
queue = Queue()
threads = []
for i in range(THREAD_COUNT):
t = Downloader(queue, directory)
t.daemon = True
t.start()
threads.append(t)
sys.stdout.flush()
while True:
doc = PyQuery(URL % page)
images = []
for div in doc('.photo'):
url = PyQuery(div).find('a').attr('href')
queue.put(
urljoin(URL, url)
)
images.append(url)
page += 1
if not any(images):
break
try:
queue.join()
except KeyboardInterrupt:
for t in threads:
t.kill_received = True
if __name__ == '__main__':
run(*sys.argv[1:])