# Imports

In [1]:
import requests
import json
import pandas as pd

from sklearn.linear_model import LinearRegression

from data_to_jupyter import time_range_query, query_to_pandas, \
                            fetch_weather_crime,call_fission_function

RUN_FROM = 'uni_wifi' #'bastion'

if RUN_FROM == 'bastion' : URL, HEADERS = 'http://fission:31001/', None
if RUN_FROM == 'uni_wifi': URL, HEADERS =  'http://172.26.135.52:9090/', {'HOST': 'fission'}

# Functions

In [2]:
def weather_to_pd(station_id: str, start_year: int, end_year: int, verb=False) -> pd.DataFrame:
    resp_dict = json.loads(requests.get(URL+f'weather/{station_id}/{start_year}/{end_year}', headers=HEADERS).text)
    data = resp_dict['Data']
    if verb : print(f'Called weather api, fetched {len(data)} lines')
    return pd.DataFrame.from_records(data)

In [3]:
def get_stream_to_pd(api: str, station_id: str, size: int, radius_km: int, verb=False) -> pd.DataFrame:
    resp_dict = json.loads(requests.get(URL+api+f'/{station_id}/{size}/{radius_km}', headers=HEADERS).text)

    count=0
    status, token, new_data = resp_dict['Status'], resp_dict['Token'], resp_dict['Data']
    data = [new_data[i]['_source'] for i in range(len(new_data))]
    if verb : print(f'Called {api} api, fetched {len(new_data)} lines')


    while (status == 200) and (new_data != []) :
        count+=1
        resp_dict = json.loads(requests.get(URL+f'stream/'+token, headers=HEADERS).text)
        status, token, new_data = resp_dict['Status'], resp_dict['Token'], resp_dict['Data']
        if verb : print(f'Called stream {count} times, fetched {len(new_data)} new lines')
        data += [new_data[i]['_source'] for i in range(len(new_data))]

    if verb: print(f'Fetched a total of {len(data)}lines')
    return pd.DataFrame.from_records(data)

# Tests

In [4]:
df_weather_full = weather_to_pd(station_id='95003', start_year =2014, end_year=2019, verb=True)

Called weather api, fetched 1822 lines


In [16]:
df_weather = df_weather_full.copy()

weather_num_cols = ['UV', 'Min Temp', 'Max Temp', 'WindSpeed', 'Min Humid', 'Max Humid', 'Rain', 'Pan-Rain', 'Evapo-Rain']

for col in weather_num_cols:
    df_weather[col] = pd.to_numeric(df_weather[col])

df_weather = df_weather.rename(columns={'Date':'date'})
df_weather['date'] = pd.to_datetime(df_weather['date'], format='%d/%m/%Y').dt.date
df_weather = df_weather.drop(columns=['created_at','source'])

df_weather.dtypes

UV              float64
Max Humid         int64
Min Temp        float64
WindSpeed       float64
Min Humid         int64
Station Name     object
date             object
Rain            float64
Pan-Rain        float64
Max Temp        float64
Evapo-Rain      float64
dtype: object

In [6]:
df_crime_full = get_stream_to_pd(api='crime', station_id='95003', size=8000, radius_km=800, verb=True)

Called crime api, fetched 8000 lines
Called stream 1 times, fetched 8000 new lines
Called stream 2 times, fetched 2622 new lines
Called stream 3 times, fetched 0 new lines
Fetched a total of 18622lines


In [7]:
df_crime_full.head(4)

Unnamed: 0,reported_date,suburb,postcode,description_1,description_2,description_3,offence_count
0,2013-02-19T00:00:00,MOUNT GAMBIER,5290,OFFENCES AGAINST THE PERSON,OTHER OFFENCES AGAINST THE PERSON,Dangerous or negligent acts,1.0
1,2013-02-21T00:00:00,GLENCOE,5291,OFFENCES AGAINST PROPERTY,PROPERTY DAMAGE AND ENVIRONMENTAL,Other property damage and environmental,1.0
2,2013-02-21T00:00:00,GLENCOE,5291,OFFENCES AGAINST PROPERTY,THEFT AND RELATED OFFENCES,Theft/Illegal Use of MV,1.0
3,2013-02-21T00:00:00,TARPEENA,5277,OFFENCES AGAINST PROPERTY,SERIOUS CRIMINAL TRESPASS,SCT - Residence,1.0


In [17]:
df_crime = df_crime_full.groupby(['reported_date', 'postcode', 'description_1'])['offence_count'].sum().reset_index()

df_crime = df_crime.rename(columns={'reported_date':'date'})
df_crime['date'] = pd.to_datetime(df_crime['date']).dt.date

df_crime.dtypes

date              object
postcode          object
description_1     object
offence_count    float64
dtype: object

In [19]:
df_crime

Unnamed: 0,date,postcode,description_1,offence_count
0,2012-07-01,5278,OFFENCES AGAINST PROPERTY,1.0
1,2012-07-01,5290,OFFENCES AGAINST PROPERTY,3.0
2,2012-07-02,5280,OFFENCES AGAINST PROPERTY,1.0
3,2012-07-02,5290,OFFENCES AGAINST PROPERTY,4.0
4,2012-07-02,5290,OFFENCES AGAINST THE PERSON,1.0
...,...,...,...,...
10607,2023-06-29,5291,OFFENCES AGAINST PROPERTY,1.0
10608,2023-06-29,5291,OFFENCES AGAINST THE PERSON,1.0
10609,2023-06-30,5290,OFFENCES AGAINST PROPERTY,2.0
10610,2023-06-30,5290,OFFENCES AGAINST THE PERSON,2.0


In [20]:
df_weather

Unnamed: 0,UV,Max Humid,Min Temp,WindSpeed,Min Humid,Station Name,date,Rain,Pan-Rain,Max Temp,Evapo-Rain
0,17.85,92,12.7,3.40,48,BUSHY PARK (BUSHY PARK ESTATES),2015-11-06,0.0,7.2,24.3,4.0
1,24.20,89,10.1,3.62,32,BUSHY PARK (BUSHY PARK ESTATES),2015-11-07,0.0,3.8,20.0,4.6
2,23.30,96,2.1,2.23,20,BUSHY PARK (BUSHY PARK ESTATES),2015-11-08,0.0,5.6,26.1,4.9
3,18.44,80,8.4,4.49,18,BUSHY PARK (BUSHY PARK ESTATES),2015-11-09,0.0,3.2,31.7,7.0
4,23.83,78,11.1,3.81,24,BUSHY PARK (BUSHY PARK ESTATES),2015-11-10,0.0,6.0,19.3,4.9
...,...,...,...,...,...,...,...,...,...,...,...
1817,20.42,95,11.4,4.45,49,BUSHY PARK (BUSHY PARK ESTATES),2015-11-01,0.0,-1.0,29.0,5.0
1818,14.41,84,10.8,4.85,42,BUSHY PARK (BUSHY PARK ESTATES),2015-11-02,0.0,9.0,15.5,3.3
1819,14.88,90,2.6,2.69,38,BUSHY PARK (BUSHY PARK ESTATES),2015-11-03,0.0,3.4,14.8,2.8
1820,27.96,93,1.5,3.23,24,BUSHY PARK (BUSHY PARK ESTATES),2015-11-04,0.2,1.6,22.6,5.1
