-
Notifications
You must be signed in to change notification settings - Fork 0
/
extract.py
111 lines (84 loc) · 3.06 KB
/
extract.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
"""extract simply gets course ids in the initial call and then gets course details for each course.
it then calls parses each course data with transform object to be loaded."""
from os import system, environ
import requests
import json
from datetime import datetime
from multiprocessing import Pool
import pandas as pd
from sqlalchemy import types, create_engine
import time
import logging
from transform import *
import concoursefig as cfg
api_key= cfg.cred['key']
HEADER={'X-AUTH-KEY': api_key}
params=['past', 'present', 'future']
logfile=cfg.cred['desktop'] + f'zconcourse.log'
logging.basicConfig(filename= logfile, level=logging.DEBUG)
system('cls')
domain=cfg.cred['url']
def course_ids(url):
logging.debug('getting course ids')
url += 'course_ids&year=2021&timeframe=current_future'
r=requests.get(url, headers=HEADER)
response=r.json()
return [sid['id'] for sid in response] #just ids not ext_id
def course_info(course_id):
url=domain + f'course_info&course_id={course_id}'
r=requests.get(url, headers=HEADER)
response=r.json()
return response
def oracle_connect():
#Database credentials
dbuser=cfg.cred['proxyuser']
dbpass=cfg.cred['password']
dbase=cfg.cred['WHD']
connstr="oracle://{}:{}@{}".format(dbuser,dbpass,dbase)
conn=sqlalchemy.create_engine(connstr)
#creates oracle connection
# conn=cx_Oracle.connect(dbuser, dbpass, dbase, encoding="UTF-8", nencoding="UTF-8")
return conn
def oracle_run(SQL, conn):
#runs SQL queries
cursor=conn.cursor()
cursor.execute(SQL)
return cursor
def main():
start_time=time.time()
logging.debug('start time:', start_time)
idlist=course_ids(domain)
total_items=len(idlist)
logging.debug('processing', total_items, 'items')
transformer=Transform() #creating a transformer object
with Pool() as p:
res=p.map(course_info, idlist)
for item in res:
transformer.master(item)
syllabus=pd.DataFrame(data=transformer.syl)
permissions=pd.DataFrame(data=transformer.perm)
description=pd.DataFrame(data=transformer.desc)
information=pd.DataFrame(data=transformer.info)
rationale=pd.DataFrame(data=transformer.rati)
outcomes=pd.DataFrame(data=transformer.outc)
resources=pd.DataFrame(data=transformer.reso)
assignment=pd.DataFrame(data=transformer.assi)
grading=pd.DataFrame(data=transformer.grad)
policy=pd.DataFrame(data=transformer.poli)
schedule=pd.DataFrame(data=transformer.sche)
print('syllabus\n', syllabus, '\n')
print('permissions\n', permissions, '\n')
print('description\n', description, '\n')
print('contact info\n', information, '\n')
print('rationale\n',rationale, '\n')
print('outcomes\n',outcomes, '\n')
print('resources\n',resources, '\n')
print('assignment\n',assignment, '\n')
print('grading\n',grading, '\n')
print('policy\n',policy, '\n')
print('schedule\n', schedule, '\n')
total_time=round(float(time.time()-start_time) / 60, 2)
message='TOTAL RUNTIME for' + str(total_items) +'items was: ' + str(total_time)+'minutes'
logging.debug(message)
if __name__ == '__main__':
main()