# Imports

In [1]:
import requests
import json
import pickle
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.display import display, HTML
import ipywidgets as widgets
from ipyleaflet import Map, Marker, Heatmap
from sklearn.preprocessing import MinMaxScaler
import time
from functools import partial
import random
import os

from scipy.stats import pearsonr
from sklearn.linear_model import LinearRegression

RUN_FROM = 'uni_wifi' #'bastion'

if RUN_FROM == 'bastion' : URL, HEADERS = 'http://fission:31001/', None
if RUN_FROM == 'uni_wifi': URL, HEADERS =  'http://172.26.135.52:9090/', {'HOST': 'fission'}

WEATHER_NUM_COL = ['UV', 'Min Temp', 'Max Temp', 'WindSpeed', 'Min Humid', 'Max Humid', 'Rain', 'Pan-Rain', 'Evapo-Rain']


FISSION_URL = 'http://172.26.135.52:9090/'
FISSION_HEADERS = {'HOST': 'fission'}

scaler = MinMaxScaler()

# Functions

In [73]:
def get_full_data(params):
    # Define search query (optional, can be empty to retrieve all documents)
    data = []
    max_retries = 3
    retry_delay = 5  # seconds
    timeout = 60  # seconds

    for _ in range(max_retries):
        try:
            res = requests.get(f"{FISSION_URL}/{params}", headers=FISSION_HEADERS, timeout=timeout)
            if res.status_code != 200:
                print(res.text)
                return None
            data = json.loads(res.text)
            return data
        except requests.exceptions.RequestException as e:
            print(f"Connection error: {e}")
            print("Retrying in 5 seconds...")
            time.sleep(retry_delay)
    
    print("Max retries exceeded. Unable to retrieve data.")
    return None


def clean_data(crashes_df):
    crashes_df_copy =  crashes_df.copy(deep=True)
    # drop unnecessary columns
    crashes_df_copy = crashes_df_copy.drop(columns=['_index', '_id', '_score'])
    # rename columns _source.light_condition to light_condition, _source.crash_date to crash_date, _source.severity to severity
    crashes_df_copy = crashes_df_copy.rename(columns={"_source.light_condition": "light_condition", "_source.crash_date": "crash_date", "_source.severity": "severity","_source.location": "location"})
    #convert crash_date to datetime DD/MM/YYYY
    crashes_df_copy['crash_date'] = pd.to_datetime(crashes_df_copy['crash_date']).dt.strftime('%d/%m/%Y')
    #drop rows with missing/magic values (severity == -1)
    crashes_df_copy = crashes_df_copy[crashes_df_copy['severity'] != -1]
    #convert location array to 2 columns
    crashes_df_copy = pd.concat([crashes_df_copy, crashes_df_copy['location'].apply(pd.Series)], axis=1)
    # rename columns 0 to Latitude, 1 to Longitude
    crashes_df_copy = crashes_df_copy.rename(columns={0: "longitude", 1: "latitude"})
    crashes_df_copy['severity'] = crashes_df_copy['severity'].astype(int)
    crashes_df_copy['Station ID'] = crashes_df_copy['Station ID'].astype(str)
    crashes_df_copy['severity_normalized'] = scaler.fit_transform(crashes_df_copy[['severity']])
    return crashes_df_copy


def plot_trend(crashes_df_copy2):
    # Convert the crash_date to datetime format
    crashes_df_copy2['crash_date'] = pd.to_datetime(crashes_df_copy2['crash_date'], format='%d/%m/%Y')

    # Extract year and month from the crash_date
    crashes_df_copy2['year'] = crashes_df_copy2['crash_date'].dt.year
    crashes_df_copy2['month'] = crashes_df_copy2['crash_date'].dt.month

    # Group by year and month and count the number of crashes
    monthly_crashes = crashes_df_copy2.groupby(['month']).size().reset_index(name='crash_count')

    # Create a date column for plotting
    # monthly_crashes['date'] = pd.to_datetime(monthly_crashes[['month']].assign(day=1))

    
    # Plot the trend line
    plt.figure(figsize=(14, 7))
    sns.lineplot(data=monthly_crashes, x='month', y='crash_count', marker='o')
    plt.title('Monthly Trend of Crash Counts per Month')
    plt.xlabel('Month')
    plt.ylabel('Number of Crashes')
    plt.grid(True)
    plt.show()

def update_plot(plot_output, df):
    with plot_output:
        #remove all current plot
        plt.clf()
        # Convert the crash_date to datetime format
        df['crash_date'] = pd.to_datetime(df['crash_date'], format='%d/%m/%Y')

        # Extract year and month from the crash_date
        df['year'] = df['crash_date'].dt.year
        df['month'] = df['crash_date'].dt.month

        # Group by year and month and count the number of crashes
        monthly_crashes = df.groupby(['month']).size().reset_index(name='crash_count')

        plot_output.clear_output()  # Clear the previous plot
        plt.figure(figsize=(10, 5))
        sns.lineplot(data=monthly_crashes, x='month', y='crash_count', marker='o')
        plt.xlabel('Date')
        plt.ylabel('Value')
        # plt.grid(True)
        plt.show()


def get_closest_station(lat, lon):
    resp = requests.get(URL+f'stations/{lon}/{lat}').json()
    if "Data" not in resp:
        print("Server error - unable to make a request for station data")
    elif resp["Data"] == [] or resp["Status"] != 200:
        print("Error fetching data - no data available")
    else:
        return resp['Data']['Station ID'], resp['Data']['location']
    return None, None
def clean_weather_data(resp):
    if resp is None or "Data" not in resp or "Status" not in resp:
        print("Server error - unable to make a request for weather data")
    elif resp["Data"] == [] or resp["Status"] != 200:
        print("Error fetching data - no data available")
    else: 
        return resp["Data"]
    return None
        

def get_current_weather(lat, lon):
    resp  = get_closest_station(lat, lon)    
    resp = get_full_data(f'current-weather?id={resp[0]}')
    return clean_weather_data(resp)

def get_prediction(weather_data):
    try:
        # Build query
        predictors_col = ['UV', 'WindSpeed', 'MaxTemp', 'MinTemp', 'Rain', 'EvapoRain']
        predictors = {}
        
        predictors['UV'] = round(random.uniform(0, 2),1)
        predictors['WindSpeed'] = float(int(weather_data['Wind Speed (km/h)']))
        predictors['MaxTemp'] = weather_data['Temp'] if float(weather_data['Temp']) > float(weather_data['Apparent Temp']) else weather_data['Apparent Temp']
        predictors['MinTemp'] = weather_data['Apparent Temp'] if float(weather_data['Temp']) <= float(weather_data['Apparent Temp']) else weather_data['Temp']
        predictors['Rain'] = weather_data['Rain']
        predictors['EvapoRain'] = round(random.uniform(1.6, 2.5),1)

        query = ''
        for field in predictors_col:
            query += str(predictors[field]) + ','
        res = requests.get(os.path.join(URL, 'models', 'crash_weather_LogisticRegression'), params={'predictors': query[:-1]}, headers=HEADERS)
        print("Severity Prediction: " + str(json.loads(res.text)['prediction']))
    except:
        print('Error Getting Prediction')

# Define a function to update the circle location based on the marker location
def update_location(change, plot_output):
    clear_output()
    # Summary of the study
    LOCATION = change['new']
    RADIUS = slider.value
    SIZE = 1000
    resp = requests.get(URL+f'stations/{LOCATION[1]}/{LOCATION[0]}').json()
    STATION_ID = resp['Data']['Station ID']
    STATION_LOCATION = resp['Data']['location']
    params = f"crashes/{STATION_ID}/{SIZE}/{RADIUS}"
    resp = get_full_data(params)
    dfs = []
    if resp is None or "Data" not in resp or "Token" not in resp or "Status" not in resp:
        print("Server error - unable to make a request for crash data")
    elif resp["Data"] == [] or resp["Token"] == "END" or resp["Status"] != 200:
        print("Error fetching data - no data available")
    else:
        while resp and "Token" in resp and resp["Token"] != "END":
            temp = pd.json_normalize(resp["Data"])
            temp['Station ID'] = STATION_ID
            dfs.append(temp)
            params = f"stream/{resp['Token']}"
            resp = get_full_data(params)
    if dfs == []:
        print("No data available")
    else:
        crashes_df_tmp = pd.concat(dfs, ignore_index=True)
        crashes_df2 = clean_data(crashes_df_tmp)
        # Create a list of locations with severity
        heatmap_data = crashes_df2[['latitude', 'longitude', 'severity_normalized']].values.tolist()
        crashes_df = crashes_df2
        heatmap.locations = heatmap_data
        update_plot(plot_output, crashes_df)

### Where do you live? How many crashes have happened in your neighborhood?

In [3]:
# Initialize the map
center = (-41.55381099217959, 147.1123612984353)
m = Map(center=center, zoom=10)

# Create a draggable marker
marker = Marker(location=center, draggable=True)
m.add_layer(marker)

# Create a slider for the radius
slider = widgets.IntSlider(description='Radius in km', min=0, max=50, value=20)

heatmap = Heatmap(
    locations=[],
    radius=10,   # Radius of each point of the heatmap
    blur=10,     # Amount of blur
    max_zoom=1,  # Maximum zoom level
)
# Add the heatmap layer to the map
m.add_layer(heatmap)
# Link the marker location change to the update_circle_location function
plot_output = widgets.Output()
crashes_df = pd.DataFrame()
on_marker_moved_with_params = partial(update_location, plot_output=plot_output)

marker.observe(on_marker_moved_with_params, names='location')
# Display the map and the slider
display(widgets.VBox([slider, m, plot_output]))


VBox(children=(IntSlider(value=20, description='Radius in km', max=50), Map(center=[-41.55381099217959, 147.11…

# Get Prediction
### If you get in a car crash, how severe will it be?

In [74]:
current_weather = get_current_weather(marker.location[0], marker.location[1])
prediction = get_prediction(current_weather)

Severity Prediction: 0
