# Request Data

In [None]:
from requests_function import fetch_and_process_data
data = fetch_and_process_data()

## Test

In [None]:
print(data.keys())

# # Print first few items of the population lookup dictionary
# if 'population_lookup' in data:
#     for key, value in list(data['population_lookup'].items())[:5]:
#         print(key, value)

# Check if 'accident_details' exists in the dictionary
if 'accident_details' in data:
    for detail in data['accident_details'][:5]:
        print(detail)
else:
    print("'accident_details' key not found in the data.")


# LGA areas and the number/severity of accidents
## 1. LGA & number of accidents & population
### Full map

In [None]:
from map_functions import display_all_map

if data:
    all_map = display_all_map(data)
    # all_map.save('map.html')
else:
    print("Data acquisition failed, please check the network connection and URL.")

all_map

### Map search

In [None]:
from map_functions import display_city_map

if data:
    city_name = input("Please enter the city name (LGA_NAME):")
    city_map = display_city_map(data, city_name)  
    if city_map:
        # city_map.save('city_map.html')
        print(f"Map for {city_name} is displayed.")
    else:
        print("The specified city name was not found, please make sure to enter it correctly.")
else:
    print("Data acquisition failed, please check the network connection and URL.")

city_map

## 2. LGA & number of accidents map
### Accident dots map

In [None]:
from map_functions import display_map_with_accidents_severity_dot

if data:
    all_map = display_map_with_accidents_severity_dot(data)
    # all_map.save('map.html')
else:
    print("Data acquisition failed, please check the network connection and URL.")

all_map

### Accident clustering map

In [None]:
from map_functions import display_map_with_clustering_accidents

if data:
    all_map = display_map_with_clustering_accidents(data)
    # all_map.save('map.html')
else:
    print("Data acquisition failed, please check the network connection and URL.")

all_map

# Population and the number of car accidents
### Bar chart for Population and the number of car accidents

In [None]:
import pandas as pd
import plotly.express as px

population_lookup  = dict(sorted(data['population_lookup'].items(), key=lambda item: item[1]))
accident_counts  = dict(sorted(data['accident_counts'].items(), key=lambda item: item[1]))

df_population = pd.DataFrame(list(population_lookup.items()), columns=['City', 'Population'])
df_accidents = pd.DataFrame(list(accident_counts.items()), columns=['City', 'AccidentCounts'])

df = pd.merge(df_population, df_accidents, on='City')

fig = px.bar(df, y='City', x='Population', orientation='h',
             hover_data=['AccidentCounts'],
             color='AccidentCounts', labels={'Population': 'Population of City'})

fig.update_layout(
    title='Population and Accident Counts of Cities',
    xaxis_title="Population",
    yaxis_title="City",
    autosize=False,
    width=1000,  
    height=1200,  
    coloraxis_colorbar=dict(title="Accident Counts"),
    yaxis=dict(
        tickmode='array',
        tickvals=df.index,
        ticktext=df['City'],
        tickfont=dict(size=10)  
    )
)

fig.show()

### Scatter plot for Population and the number of car accidents

In [None]:
population_lookup  = dict(sorted(data['population_lookup'].items(), key=lambda item: item[1]))
accident_counts  = dict(sorted(data['accident_counts'].items(), key=lambda item: item[1]))

df_population = pd.DataFrame(list(population_lookup.items()), columns=['City', 'Population'])
df_accidents = pd.DataFrame(list(accident_counts.items()), columns=['City', 'AccidentCounts'])

df = pd.merge(df_population, df_accidents, on='City')

fig = px.scatter(df, x='Population', y='AccidentCounts', color='City',
                 trendline='ols',  
                 labels={'Population': 'Population of City', 'AccidentCounts': 'Number of Accidents'},
                 title='Relationship between Population and Number of Accidents')

fig.update_layout(
    autosize=False,
    width=1000,
    height=600,
    xaxis_title="Population",
    yaxis_title="Number of Accidents"
)

fig.show()

### OLS model for linear regression fot scatter plot

In [None]:
import pandas as pd
import plotly.graph_objects as go
import statsmodels.api as sm

X = sm.add_constant(df['Population'])  # Adds a constant term to the predictor
model = sm.OLS(df['AccidentCounts'], X)
results = model.fit()

df['fit'] = results.fittedvalues

fig = go.Figure()
fig.add_trace(go.Scatter(x=df['Population'], y=df['AccidentCounts'], mode='markers', name='Data'))
fig.add_trace(go.Scatter(x=df['Population'], y=df['fit'], mode='lines', name='OLS Fit'))

fig.update_layout(title='Relationship between Population and Number of Accidents with OLS',
                  xaxis_title='Population',
                  yaxis_title='Number of Accidents',
                  autosize=False,
                  width=1100,
                  height=900)

fig.show()

# Rainfall & speed on the number of accidents

In [None]:
from requests_function import fetch_weather_data

start_date = '20230102'
end_date = '20230930'
rainfall_data = fetch_weather_data(start_date, end_date)

#rainfall_data

In [None]:
import pandas as pd
from datetime import datetime
accidents = data['accident_details']

melbourne_accidents = [accident for accident in accidents if accident['LOCATION'] == 'MELBOURNE']

filtered_accidents = [accident for accident in melbourne_accidents if accident['SPEED_ZONE'] not in ['777', '888', '999']]

start_date = datetime.strptime('2022-01-01', '%Y-%m-%d')
end_date = datetime.strptime('2023-09-30', '%Y-%m-%d')

date_filtered_accidents = [
    accident for accident in filtered_accidents
    if start_date <= datetime.strptime(accident['ACCIDENT_DATE'], '%Y-%m-%d') <= end_date
]

#date_filtered_accidents

df_rainfall = pd.DataFrame(list(rainfall_data.items()), columns=['Date', 'Rainfall'])
df_accidents = pd.DataFrame(date_filtered_accidents)

df_rainfall['Date'] = pd.to_datetime(df_rainfall['Date'], format='%Y%m%d')
df_accidents['ACCIDENT_DATE'] = pd.to_datetime(df_accidents['ACCIDENT_DATE'], format='%Y-%m-%d')

df_merged = pd.merge(df_accidents, df_rainfall, left_on='ACCIDENT_DATE', right_on='Date', how='inner')

df_merged['SPEED_ZONE'] = pd.to_numeric(df_merged['SPEED_ZONE'], errors='coerce')
df_merged['Rainfall'] = pd.to_numeric(df_merged['Rainfall'], errors='coerce')

#print(df_merged.head())

In [None]:
import plotly.express as px

fig = px.scatter(df_merged, x='Rainfall', y='SPEED_ZONE', color='SPEED_ZONE',
                 title='Relationship Between Rainfall, Accidents and Speed Zone',
                 labels={'Rainfall': 'Rainfall (mm)', 'SPEED_ZONE': 'Speed Zone (km/h)'},
                 range_x=[0, 25])  


fig.update_layout(
    autosize=False,
    width=1200,  
    height=800,  
    yaxis=dict(
        tickmode='linear', 
        dtick=10  
    )
)

fig.show()

### Rainfall and car accidents

In [None]:
import pandas as pd
import plotly.express as px

accidents_per_rainfall = df_merged.groupby('Rainfall').size().reset_index(name='Accident_Count')

fig = px.bar(accidents_per_rainfall, x='Rainfall', y='Accident_Count',
             title='Accidents Count by Rainfall',
             labels={'Rainfall': 'Rainfall (mm)', 'Accident_Count': 'Number of Accidents'},
             text='Accident_Count')  


fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide',
                  xaxis=dict(
                    range=[0, 15],  
                    title="Rainfall (mm)"),
                  yaxis_title="Number of Accidents",
                  autosize=False,
                  width=1600,
                  height=800)


fig.show()

### Speed and car accidents

In [None]:
import plotly.express as px

accidents_per_speed_zone = df_merged.groupby('SPEED_ZONE').size().reset_index(name='Accident_Count')

fig = px.bar(accidents_per_speed_zone, x='SPEED_ZONE', y='Accident_Count',
             title='Accidents Count by Speed Zone',
             labels={'SPEED_ZONE': 'Speed Zone (km/h)', 'Accident_Count': 'Number of Accidents'},
             text='Accident_Count')  

fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig.update_layout(
    xaxis_title="Speed Zone (km/h)",
    yaxis_title="Number of Accidents",
    autosize=False,
    width=1000,
    height=800
)


fig.show()

# Severity of the car accident
## 1. Full Severity Statistics for 2023

In [None]:
import pandas as pd
import plotly.graph_objects as go

severities = [accident['SEVERITY'] for accident in data['accident_details']]


severity_counts = pd.Series(severities).value_counts().sort_index()


labels = severity_counts.index
values = severity_counts.values


colors = ['red', 'orange', 'yellow', 'green']


fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=0.3, marker_colors=colors)])
fig.update_layout(title_text='Severity of 2022-2023 Accidents in Victoria, Australia')
fig.show()


# print(severity_counts)
severity_labels = {
    1: "Fatal accident",
    2: "Serious injury accident",
    3: "Other injury accident",
    4: "Non injury accident"
}

for severity, count in severity_counts.items():
    print(f"Severity {severity} - {severity_labels[int(severity)]}: {count} occurrences")

## 2. Relation of Speed Zone, Severity, Surface Condition

In [None]:
import plotly.express as px
import pandas as pd

accident_details = pd.DataFrame(data['accident_details'])


accident_details['SPEED_ZONE'] = pd.to_numeric(accident_details['SPEED_ZONE'], errors='coerce')
accident_details['SEVERITY'] = pd.to_numeric(accident_details['SEVERITY'], errors='coerce')
accident_details['SURFACE_COND'] = pd.to_numeric(accident_details['SURFACE_COND'], errors='coerce')


def map_speed_zone(zone):
    if 30 <= zone <= 110:
        return zone
    else:
        return 'Other'  


accident_details['SPEED_ZONE'] = accident_details['SPEED_ZONE'].apply(map_speed_zone)


accident_counts = accident_details['SPEED_ZONE'].value_counts().reset_index()
accident_counts.columns = ['SPEED_ZONE', 'COUNT']


accident_details = accident_details.merge(accident_counts, on='SPEED_ZONE', how='left')


severity_mapping = {1: 'Fatal accident', 2: 'Serious injury accident', 3: 'Other injury accident', 4: 'Non injury accident'}
surface_cond_mapping = {1: 'Dry', 2: 'Wet', 3: 'Muddy', 4: 'Snowy', 5: 'Icy', 9: 'Unknown'}


fig = px.parallel_coordinates(
    accident_details,
    dimensions=["SPEED_ZONE", "SEVERITY", "SURFACE_COND"],
    color="COUNT",
    labels={"SPEED_ZONE": "Speed Zone", "SEVERITY": "Severity", "SURFACE_COND": "Surface Condition"},
    color_continuous_scale=px.colors.sequential.Viridis # 使用离散颜色映射
)


fig.update_traces(dimensions=[
    dict(label='Speed Zone', values=accident_details['SPEED_ZONE']),
    dict(label='Severity', values=accident_details['SEVERITY'], ticktext=list(severity_mapping.values()), tickvals=list(severity_mapping.keys())),
    dict(label='Surface Condition', values=accident_details['SURFACE_COND'], ticktext=list(surface_cond_mapping.values()), tickvals=list(surface_cond_mapping.keys())),
])

fig.show()


# speed_zone_counts = accident_details['SPEED_ZONE'].value_counts()

# speed_zone_counts_df = speed_zone_counts.reset_index()
# speed_zone_counts_df.columns = ['Speed Zone', 'Number of Accidents']

# print(speed_zone_counts_df)

# Road conditions and the number of car accidents

In [None]:
import pandas as pd
import plotly.express as px

accident_details = pd.DataFrame(data['accident_details'])

accident_counts_by_condition = accident_details.groupby('SURFACE_COND_DESC').size()

accident_counts_df = accident_counts_by_condition.reset_index(name='Number of Accidents')
accident_counts_df.rename(columns={'SURFACE_COND_DESC': 'Surface Condition'}, inplace=True)

print(accident_counts_df)

fig = px.bar(
    accident_counts_df,
    x='Surface Condition',
    y='Number of Accidents',
    title='Number of Car Accidents by Road Conditions',
    color='Surface Condition',  
    labels={'Surface Condition': 'Road Conditions', 'Number of Accidents': 'Number of Accidents'}
)

fig.show()