In [1]:
import requests
import json
import pandas as pd

RUN_FROM = 'uni_wifi' #'bastion'

if RUN_FROM == 'bastion' : URL, HEADERS = 'http://fission:31001/', None
if RUN_FROM == 'uni_wifi': URL, HEADERS =  'http://172.26.135.52:9090/', {'HOST': 'fission'}

In [24]:
PAGE_NOT_FOUND_STR = '404 page not found\n'
BAD_PARAMS_STR = '{"Status": 400, "Message": "Invalid Parameters"}'
ERROR_STR = '{"Status": 500, "Message": "Internal Server Error"}'
EMPTY_STR = '{"Status": 200, "Data": []}'

# Station API

In [35]:
url_stations = URL+'stations'
url_stations

'http://172.26.135.52:9090/stations'

In [36]:
resp = requests.get(url_stations, headers=HEADERS).json()
df_stations = pd.DataFrame.from_records(resp['Data'], index='Station ID')
df_stations.loc[23034,'Station Name']=='ADELAIDE AIRPORT'

True

# Weather API

In [44]:
resp = requests.get(URL+'weather/23034/2014').text
print((resp==PAGE_NOT_FOUND_STR))

# Invalid station_id
resp = requests.get(URL+'weather/STATION_ID/2014/2015').text
print((resp==EMPTY_STR))


# Invalid start_year
resp = requests.get(URL+'weather/23034/START_YEAR/2015').text
print((resp==ERROR_STR))


# Invalid end_year
resp = requests.get(URL+'weather/23034/2014/END_YEAR').text
print((resp==ERROR_STR))

# Valid parameters
resp = requests.get(URL+'weather/94250/2014/2015').json()
df_weather = pd.DataFrame.from_records(resp['Data'], index='Date')
df_weather.loc['03/01/2014','UV']=='28.70'



True
True
True
True


True

# Crash API

In [68]:
# Missing header
resp = requests.get(URL+'crashes/23034/5000').text
print((resp==PAGE_NOT_FOUND_STR))

# Invalid station_id
resp = requests.get(URL+'crashes/STATION_ID/5000/3000').text
print((resp==EMPTY_STR))

# Invalid size
resp = requests.get(URL+'crashes/23034/SIZE/3000').text
print((resp==ERROR_STR))

# Size too big
resp = requests.get(URL+'crashes/23034/10500/3000').text
print((resp==BAD_PARAMS_STR))

# Invalid radius_km
resp = requests.get(URL+'crashes/23034/5000/RADIUS_KM').text
print((resp==ERROR_STR))

# Negative radius_km
resp = requests.get(URL+'crashes/23034/5000/-3000').text
print((resp==BAD_PARAMS_STR))

# Valid parameters
resp = requests.get(URL+'crashes/23034/5000/800').json()
df_crash = pd.DataFrame.from_records(resp['Data'], index='_id')
df_crash.loc['5XZkcI8B_XhVKXBOfiMP','_source']['crash_date']=='2014-06-19T00:00:00.000+0000'

True
True
True
True
True
True


True

# Crime API

In [98]:
# Missing header
resp = requests.get(URL+'crime/23034/5000').text
print((resp==PAGE_NOT_FOUND_STR))

# Invalid station_id
resp = requests.get(URL+'crime/STATION_ID/5000/3000').text
print((resp==EMPTY_STR))

# Invalid size
resp = requests.get(URL+'crime/23034/SIZE/3000').text
print((resp==ERROR_STR))

# Size too big
resp = requests.get(URL+'crime/23034/10500/3000').text
print((resp==BAD_PARAMS_STR))

# Invalid radius_km
resp = requests.get(URL+'crime/23034/5000/RADIUS_KM').text
print((resp==ERROR_STR))

# Negative radius_km
resp = requests.get(URL+'crime/23034/5000/-3000').text
print((resp==BAD_PARAMS_STR))

# Valid parameters
resp = requests.get(URL+'crime/95003/5000/500').json()
df_crime = pd.DataFrame.from_records(resp['Data'], index='_id')
df_crime.loc['mYMPVo8BeqktFCObjzke']['_source']['reported_date']=='2020-03-20T00:00:00'

True
True
True
True
True
True


True

# Stream API

In [90]:
def get_stream_to_pd(api: str, station_id: str, size: int, radius_km: int, verb=False) -> pd.DataFrame:
    resp_dict = json.loads(requests.get(URL+api+f'/{station_id}/{size}/{radius_km}', headers=HEADERS).text)

    count=0
    status, token, new_data = resp_dict['Status'], resp_dict['Token'], resp_dict['Data']
    data = [new_data[i]['_source'] for i in range(len(new_data))]
    if verb : print(f'Called {api} api, fetched {len(new_data)} lines')


    while (status == 200) and (new_data != []) :
        count+=1
        resp_dict = json.loads(requests.get(URL+f'stream/'+token, headers=HEADERS).text)
        status, token, new_data = resp_dict['Status'], resp_dict['Token'], resp_dict['Data']
        if verb : print(f'Called stream {count} times, fetched {len(new_data)} new lines')
        data += [new_data[i]['_source'] for i in range(len(new_data))]

    if verb: print(f'Fetched a total of {len(data)}lines')
    return pd.DataFrame.from_records(data)

In [100]:
df_crime_full = get_stream_to_pd(api='crime', station_id='95003', size=5, radius_km=500, verb=True)
df_crime_full.shape[0]==7

Called crime api, fetched 5 lines
Called stream 1 times, fetched 2 new lines
Called stream 2 times, fetched 0 new lines
Fetched a total of 7lines


True