-
Notifications
You must be signed in to change notification settings - Fork 1
/
test.py
60 lines (49 loc) · 2.26 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from cloudreports import database, client
import requests
import json
import datetime
# Data destination initialization (BigQuery)
# TODO(developer): Change project, dataset and key_path to the path to the Google BigQuery service
# account key file.
# See https://cloud.google.com/iam/docs/creating-managing-service-account-keys
project = 'my_project'
dataset = 'my_dataset'
key_path = 'credentials-bigquery.json'
db = database.BigQuery(project, dataset, credentials_file_path=key_path)
# Data destination initialization (ClickHouse)
# TODO(developer): Change host, db_name, user, password and key_path to the path to the certificate.
# See https://cloud.yandex.ru/docs/managed-clickhouse/operations/connect
# host = 'myhost'
# db_name = 'mydb'
# user = 'myuser'
# password = '....'
# key_path = 'YandexInternalRootCA.crt'
# db = database.ClickHouse(host, db_name, user, password, key_path)
client = client.Client(db)
# Load examples data from https://api.nasa.gov
# NeoWs data
r = requests.get('https://api.nasa.gov/neo/rest/v1/feed?start_date=2015-09-08&end_date=2015-09-09&api_key=DEMO_KEY')
r = r.json()
for key, value in r['near_earth_objects'].items():
date = datetime.datetime.strptime(key, '%Y-%m-%d')
for row in value:
# send data to BigQuery
# load_json_data method arguments:
# entity_href - entity unique identifier, e.g. document number (technical field)
# entity_id - entity unique identifier, e.g. document number
# entity_type - e.g. document type
# entity_data - data from API in json format
# event_moment - date the entity was modified or created
client.load_json_data(entity_href=row['id'], entity_id=row['id'], entity_type='NeoWs',
entity_data=row, event_moment=date)
# DONKI data
r = requests.get('https://api.nasa.gov/DONKI/CME?startDate=2017-01-01&endDate=2017-01-31&api_key=DEMO_KEY')
r = r.json()
for row in r:
date = datetime.datetime.strptime(row['startTime'], '%Y-%m-%dT%H:%MZ')
# send data to BigQuery
client.load_json_data(row['activityID'], row['activityID'], 'DONKI', row, date)
# must be called before completion
client.finish_load_json_data()
# adding views for new entity types
db.update_tables()