-
Notifications
You must be signed in to change notification settings - Fork 2
/
fetch_real_streakids.py
58 lines (45 loc) · 1.85 KB
/
fetch_real_streakids.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
from bs4 import BeautifulSoup
import requests
import re
import datetime
import json
import os
if __name__ == '__main__':
''' load secrets '''
with open('./secrets.json') as sjson:
secrets = json.load(sjson)
# date_start = datetime.datetime(2018, 1, 31)
# date_start = datetime.datetime(2018, 5, 31)
# date_start = datetime.datetime(2018, 11, 1)
date_start = datetime.datetime(2018, 12, 1)
# date_start = datetime.datetime(2018, 12, 15)
# date_end = datetime.datetime(2018, 4, 30)
date_end = datetime.datetime.utcnow()
session = requests.Session()
session.auth = (secrets['yupana']['user'], secrets['yupana']['pwd'])
reals = dict()
for dd in range((date_end - date_start).days + 1):
# date = '20180929'
date = (date_start + datetime.timedelta(days=dd)).strftime('%Y%m%d')
try:
# url = 'http://private.caltech.edu:8088/zstreak/shepherd.cgi'
url = 'http://yupana.caltech.edu/cgi-bin/ptf/ssm/zsrs/shepherd.cgi'
result = session.get(url, params={'date': date})
if result.status_code == 200:
# print(result.content)
soup = BeautifulSoup(result.content, 'html.parser')
# cutouts = re.findall(r'stamps_(.*)//(strkid.*)_scimref', str(soup))
cutouts = re.findall(r'(strkid.*)_scimref', str(soup))
print(date)
print(cutouts)
if len(cutouts) > 0:
reals[date] = cutouts
except Exception as e:
print(str(e))
json_filename = f'reals_{date_start.strftime("%Y%m%d")}_{date_end.strftime("%Y%m%d")}.json'
with open(json_filename, 'w') as outfile:
json.dump(reals, outfile, sort_keys=True, indent=2)
real_ids = []
for date in reals:
real_ids += reals[date]
print('\n', real_ids)