-
Notifications
You must be signed in to change notification settings - Fork 0
/
crawl.py
executable file
·36 lines (31 loc) · 1.16 KB
/
crawl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#!/usr/bin/env python3
from urllib import request
import json
def search(lang, page_num):
url = 'https://api.github.com'
api = '/search/repositories'
query_lang = 'language:'
query_suffix = '&stars:>10&per_page=100'
token = '53886cf699fad92bf1f9f156c79efd9b2ee57c15'
query = url + api + '?q=' + query_lang + lang + query_suffix
for i in range(1,page_num+1):
actual_query = query + '&page=' + str(i);
# actual_query += token
# print("requesting " + actual_query + ' ..')
req = request.Request(actual_query)
req.add_header("Authorization", "token " + token)
response = request.urlopen(req)
s = response.read().decode('utf8')
# TODO use username--reponame as name
filename = lang + '-' + str(i) + '.json'
with open(filename, 'w') as f:
f.write(s)
print('writen to ' + filename)
if __name__ == '__main__':
# TODO multi-thread
# for lang in ['c', 'java', 'javascript', 'python', 'ruby', 'php', 'c++', 'csharp', 'objective-c', 'shell']:
for lang in ['shell']:
search(lang, 10);
# print(query)
# j = json.loads(s)
# print(j)