# Road Death Analysis
## A road death or fatality is a person who dies within 30 days of a crash as a result of injuries received in that crash.

Australian Road Deaths Database
https://www.bitre.gov.au/statistics/safety/fatal_road_crash_database

Australia population data
https://www.abs.gov.au/ausstats/abs@.nsf/0/D56C4A3E41586764CA2581A70015893E?Opendocument
https://www.abs.gov.au/ausstats/abs@.nsf/0/632CDC28637CF57ECA256F1F0080EBCC?Opendocument
https://www.abs.gov.au/AUSSTATS/abs@.nsf/DetailsPage/3101.0Dec%202017?OpenDocument
https://www.abs.gov.au/AUSSTATS/abs@.nsf/DetailsPage/3101.0Dec%202018?OpenDocument 
https://www.abs.gov.au/AUSSTATS/abs@.nsf/DetailsPage/3101.0Dec%202019?OpenDocument

https://www.abs.gov.au/AUSSTATS/abs@.nsf/DetailsPage/3218.02009-10?OpenDocument

Australia Breathe Testing Data
https://data.gov.au/data/dataset/australian-random-breath-testing/resource/6c5cbea3-79dc-40b9-9775-49521a57eacb

In [17]:
import folium
import pandas as pd
import json
import numpy as np

In [42]:
# given the cvs filename read the csv data
def read_file(filename):
    return pd.read_csv(filename, sep=',', error_bad_lines=False, index_col=False, dtype='unicode')

data = read_file('ardd_fatalities.csv')
data.head()

Unnamed: 0,Crash ID,State,Month,Year,Dayweek,Time,Crash Type,Bus Involvement,Heavy Rigid Truck Involvement,Articulated Truck Involvement,...,Age,National Remoteness Areas,SA4 Name 2016,National LGA Name 2017,National Road Type,Christmas Period,Easter Period,Age Group,Day of week,Time of day
0,20205023,WA,5,2020,Wednesday,16:10,Multiple,No,No,No,...,88,,,,,No,No,75_or_older,Weekday,Day
1,20201099,NSW,5,2020,Saturday,23:25,Multiple,No,No,No,...,25,Major Cities of Australia,Sydney - Outer South West,Campbelltown,National or State Highway,No,No,17_to_25,Weekend,Night
2,20207006,NT,5,2020,Sunday,12:00,Single,No,No,No,...,47,,,,,No,No,40_to_64,Weekend,Day
3,20205049,WA,5,2020,Friday,19:45,Multiple,No,No,No,...,0,,,,,No,No,0_to_16,Weekend,Night
4,20205049,WA,5,2020,Friday,19:45,Multiple,No,No,No,...,21,,,,,No,No,17_to_25,Weekend,Night


In [43]:
# read the apbs_popultion data
data_pop = read_file('apbs_population.csv')
data_pop.head()

Unnamed: 0,State,1989,1990,1991,1992,1993,1994,1995,1996,1997,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,NSW,5776283,5834021,5898731,5957822,5995055,6044819,6105560,6176461,6246267,...,7144292,7218529,7304244,7404032,7508353,7616168,7732858,7915069,8046070,8128984
1,Vic,4320164,4378592,4420373,4450217,4462766,4472989,4497660,4534984,4569297,...,5461101,5537817,5651091,5772669,5894917,6022322,6173172,6385849,6526413,6651074
2,Qld,2827637,2899283,2960951,3023198,3096185,3166566,3237380,3303192,3355417,...,4404744,4476778,4568687,4652824,4719653,4777692,4845152,4965033,5052827,5129996
3,SA,1419029,1432056,1446299,1455442,1458632,1463089,1465340,1469079,1475658,...,1627322,1639614,1656725,1671488,1686945,1700668,1712843,1728053,1742744,1759184
4,WA,1578434,1613049,1636067,1658544,1678722,1704649,1736066,1768206,1798341,...,2290845,2353409,2425507,2486944,2517608,2540672,2555978,2584768,2606338,2639080


# Animated Line Chart

In [44]:
# read the file
data = read_file('ardd_fatalities.csv')

data_plot = {}
# check_2019 = 0
for year,states in zip(data["Year"],data["State"]):
    if str(year) != "2020":
        if year not in data_plot:
            data_plot[year] = {}
        elif states not in data_plot[year]:
            data_plot[year][states] = 1
        else:
            data_plot[year][states] += 1

cvs_data = {}
for year in data_plot: 
    if not bool(cvs_data):
        cvs_data["year"] = []
        cvs_data["state"] = []
        cvs_data["number"] = []
    for states,values in data_plot[year].items():
        cvs_data["year"].append(year)
        cvs_data["state"].append(states)
        cvs_data["number"].append(values)
cvs_data = pd.DataFrame(data=cvs_data)
cvs_data.head()

Unnamed: 0,year,state,number
0,2019,SA,114
1,2019,WA,163
2,2019,NSW,357
3,2019,Qld,219
4,2019,Tas,29


In [45]:
import plotly.express as px
import pandas as pd
import plotly.graph_objects as go # https://plotly.com/python/legend/
# import collections

# ======================animated line chart======================
# create a list with all states name
states_total = {}
for states in data["State"]:
    if states not in states_total:
        states_total[states] = 1
    else:
        states_total[states] += 1
states_list = sorted(states_total.keys(), key=lambda k: states_total[k])


# from the cvsdata reanage
def get_cvsdata(name_list, cvs_data):
    traceCVS_list = []
    for states in name_list:
        traceCVS_list.append(cvs_data[cvs_data['state'].isin([states])])
    return  traceCVS_list
traceCVS_list = get_cvsdata(states_list, cvs_data)


def get_traceScatter(traceCVS_list, y_name, x_name, name_list, color_list):
    traceScatter_list=[]
    index_color = 0;
    for trace,state in zip(traceCVS_list, name_list):
        traceScatter_list.append(go.Scatter(
            x=trace[x_name][0:],
            y=trace[y_name][0:],
            mode='lines',
            name=state,  
            line=dict(width=1.5, color=color_list[index_color]), #change the color 'color = color in the color list'
            stackgroup='one'
        
        ))
        index_color += 1
    return  traceScatter_list


# from the cvsdata -> frames data
year_range = 30
frames = [dict(data = [
    dict(type='scatter', x=trace['year'][k:],y=trace['number'][k:])
    for trace in traceCVS_list],
    traces=[i for i in range(len(states_list))],
)for k in range(year_range, -1, -1)]


# set the plot layout
layout = go.Layout(
                width=1000,
                height=800,
                title="<b>Road Deaths by State from 1989 to 2019 in Australia<b>",
                xaxis_title="<b>Year</b>",
                yaxis_title="<b>Deaths<b>",
                plot_bgcolor= 'white',
                showlegend=True,
                hovermode='x unified',
                updatemenus=[
                    dict(
                        type='buttons', 
                        showactive=True,
                        y=1.2,
                        x=1.2,
                        xanchor='right',
                        yanchor='top',
                        pad=dict(t=0, r=10),
                        buttons=[dict(label='Play',
                        method='animate',
                        args=[
                            None, 
                            dict(frame=dict(duration=200, 
                                redraw=False),
                                transition=dict(duration=0),
                                fromcurrent=True,
                                mode='immediate')]
                        )]
                    )
                ]
            )

# create a color list which must have 8 different colors

color_list = ['#ffffd9','#edf8b1','#c7e9b4','#7fcdbb','#41b6c4','#1d91c0','#225ea8','#253494']
traceScatter_list = get_traceScatter(traceCVS_list, 'number', 'year', states_list, color_list)

fig = go.Figure(data=traceScatter_list, frames=frames, layout=layout)
fig.show()

# Interactive Map

In [46]:
states_name_list = ['NSW', 'Vic', 'Qld','SA','WA','Tas','NT','ACT']
id_list = [i for i in range(8)]

def c_fatality_rate(road_death, pop):
    return int(road_death)/int(pop.replace(',','')) * 100000


def statestotal_eachyear(year):
    data_death=read_file('ardd_fatalities.csv')
    states_total={}
    for states, years in zip(data_death["State"],data_death["Year"]):
        
        if str(year) != "2020":
            if year==None:
                if states not in states_total:
                    states_total[states] = 1
                else:
                    states_total[states] += 1
            elif years==year:
                if states not in states_total:
                    states_total[states] = 1
                else:
                    states_total[states] += 1
                
    return states_total


def get_state_id(states_name_list, state_name):
    ids = 0
    for i in states_name_list: 
        if i == state_name:
            return ids
        ids+=1

        
def get_year_state_pop(year, state_id, data_pop):
    return data_pop[str(year)][state_id]



def calculate_stack(roaddeath_filename, pop_filename, states_name_list, id_list):
    data_pop = read_file(pop_filename)
    year_start=1989
    year_end=2020
    states_fatality_rate_each_year=statestotal_eachyear(str(year_start))
    while year_start != year_end: 
        temp=statestotal_eachyear(str(year_start))
        ids=0
        for key,value in temp.items():  
            pop = get_year_state_pop(year_start,get_state_id(states_name_list, key), data_pop)
            if str(year_start)=='1989':
                states_fatality_rate_each_year[key] = c_fatality_rate(value,pop)
            else:
                states_fatality_rate_each_year[key] +=  c_fatality_rate(value,pop)
            ids += 1
        year_start += 1
    return states_fatality_rate_each_year


def calculate_ave(stack_dic, year_total):
    for key,value in stack_dic.items(): 
        stack_dic[key] = round(value/year_total,3)
    return stack_dic



def covert_dic_csv(states_name_list, dic_data, string_Col):
    re_dic = {"state_name":[],"state_id":[], string_Col:[]}
    for key,value in dic_data.items():
        re_dic['state_name'].append(key)
        re_dic["state_id"].append(get_state_id(states_name_list, key))
        re_dic[string_Col].append(value)
    yearly_states_average_data = pd.DataFrame(data=re_dic)
    return yearly_states_average_data

In [23]:
states_total = statestotal_eachyear(None)
states_total = calculate_ave(states_total, 31)
yearly_states_average_data = covert_dic_csv(states_name_list, states_total, "Average")
yearly_states_average_data.head()

Unnamed: 0,state_name,state_id,Average
0,WA,4,195.387
1,NSW,0,514.161
2,NT,6,51.387
3,Vic,1,363.935
4,Qld,2,325.871


In [47]:
print('add a new key name is "Average death per year", vlaue is "AMOUNT" into the shape json file' + '\n') 
with open('australian-states.json') as f:
      data = json.load(f)
for state_pro,number in zip(data['features'],yearly_states_average_data['Average']):
    state_pro['properties']['Average death per year'] = state_pro['properties']['STATE_NAME'] + ' : ' + str(number)
    # print(state_pro['properties'])
    
print('new dict will be used show the labels of each shapes' + '\n')

Australia_Location = [-25.274398, 133.775136]
m = folium.Map(Australia_Location, zoom_start=4)
folium.TileLayer('cartodbpositron').add_to(m)
choropleth = folium.Choropleth(
    geo_data=data,    # this data is for shpae of states 
    name='choropleth',
    data=yearly_states_average_data, # this data is for the toatal numbers for each states
    columns=['state_id', 'Average'], 
    key_on='feature.id',  # this is the key that indefine states
    fill_color='YlGnBu', # set the color of the map YlGnBu
    fill_opacity=0.8,
    line_opacity=0.8,
    legend_name='yearly road death number', 
    
#     bins=np.arange(0,18000,2000)  # optional but not necessary; customize the color range
).add_to(m)


# show the labels
choropleth.geojson.add_child(
    folium.features.GeoJsonTooltip(['Average death per year'])
)

m

add a new key name is "Average death per year", vlaue is "AMOUNT" into the shape json file

new dict will be used show the labels of each shapes



In [48]:
stacked_dict = calculate_stack('ardd_fatalities.csv', 'apbs_population.csv', states_name_list, id_list)
states_average = calculate_ave(stacked_dict,31)
yearly_states_averageRATE_data = covert_dic_csv(states_name_list, states_average, 'rate')
yearly_states_averageRATE_data.head()

Unnamed: 0,state_name,state_id,rate
0,SA,3,9.237
1,Vic,1,7.343
2,ACT,7,4.597
3,Qld,2,8.797
4,WA,4,9.739


In [50]:
print('add a new key name is "Average fatality rate per 100,000 population per year", vlaue is "AMOUNT" into the shape json file' + '\n') 
with open('australian-states.json') as f:
      data = json.load(f)
for state_pro,number in zip(data['features'],yearly_states_averageRATE_data['rate']):
    state_pro['properties']['Average fatality rate per 100,000 population per year'] = state_pro['properties']['STATE_NAME'] + ' : ' + str(number)
    # print(state_pro['properties'])
    
print('new dict will be used show the labels of each shapes' + '\n')

Australia_Location = [-25.274398, 133.775136]
m = folium.Map(Australia_Location, zoom_start=4)
folium.TileLayer('cartodbpositron').add_to(m)
choropleth = folium.Choropleth(
    geo_data=data,    # this data is for shpae of states 
    name='choropleth',
    data=yearly_states_averageRATE_data, # this data is for the toatal numbers for each states
    columns=['state_id', 'rate'], 
    key_on='feature.id',  # this is the key that indefine states
    fill_color='YlGnBu', # set the color of the map YlGnBu
    fill_opacity=0.8,
    line_opacity=0.8,
    legend_name='yearly fatality rate per 100,000 population', 
    
#     bins=np.arange(0,18000,2000)  # optional but not necessary; customize the color range
).add_to(m)


# show the labels
choropleth.geojson.add_child(
    folium.features.GeoJsonTooltip(['Average fatality rate per 100,000 population per year'])
)

m

add a new key name is "Average fatality rate per 100,000 population per year", vlaue is "AMOUNT" into the shape json file

new dict will be used show the labels of each shapes



# Multiple Axes and plots

In [11]:
# Count the total_death for each states
data_death = read_file('ardd_fatalities.csv')
years_total = {}
for year in data_death["Year"]:
    if str(year) != "2020":
        if year not in years_total:
            years_total[year] = 1
        else:
            years_total[year] += 1
# print(years_total)

data_pop = read_file('apbs_population.csv')
years_rate = {}
index_for_total = 8
TEN_THS = 100000
for key,value in years_total.items():
#     print("{value}...{k}".format(value=value, k=data_pop[key][index_for_total]))
    years_rate[key] = round(int(value) / int(data_pop[key][index_for_total]) * TEN_THS,3)
# print(years_rate)

re_dic = {"year":[],"number":[], "rate":[]}
for key,value in years_total.items():
    re_dic['year'].append(key)
    re_dic["number"].append(value)
    re_dic["rate"].append(years_rate[str(key)])
    
yearly_states_total_data = pd.DataFrame(data=re_dic)
yearly_states_total_data.head()

Unnamed: 0,year,number,rate
0,2019,1194,4.678
1,2018,1134,4.504
2,2017,1222,4.933
3,2016,1292,5.341
4,2015,1204,5.055


In [12]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px

fig_rate = make_subplots(specs=[[{"secondary_y": True}]])


fig_rate.add_trace(
    go.Bar(
        x=yearly_states_total_data.year,
        y=yearly_states_total_data.number, 
        name="Road Deaths",   # change
        opacity=1,
        marker={'color': yearly_states_total_data.number, 'colorscale': 'YlGnBu'},
    ),
    secondary_y=False,
)

fig_rate.add_trace(
    go.Scatter(
        x=yearly_states_total_data.year,
        y=yearly_states_total_data.rate, 
        name="Rate/pop", #change
        line=dict(width=1.5*2, color='darkred',dash='dash'), #change
    ),
    secondary_y=True,
)

# Add figure title
fig_rate.update_layout(
    title_text="<b>Road Deaths and fatality rate per 100,000 population from 1989 to 2019 in Australia<b>",  # change
    plot_bgcolor='white',  # change
)

# # Set x-axis title
fig_rate.update_xaxes(title_text="<b>Yea<b>r")   # change

# # Set y-axes titles
fig_rate.update_yaxes(title_text="<b>Total Number of Road Deaths</b>", secondary_y=False)  # change
fig_rate.update_yaxes(title_text="<b>Fatality Rate/100,000 population", secondary_y=True)  # change

fig_rate.show()

# fig.show()



## 1992: Road Safety 2000 Strategy
## 2008: Economic Crisis

# Chart

In [249]:
# Count the total_death for each states
data_user = read_file('ardd_fatalities.csv')
users_total = {}
for user in data_user["Road User"]:
    if str(year) != "2020":
        if user not in users_total:
            users_total[user] = 1
        else:
            users_total[user] += 1
print(users_total)

re_dic = {"user":[],"number":[]}
for key,value in users_total.items():
    re_dic['user'].append(key)
    re_dic["number"].append(value)
    
users_total_data = pd.DataFrame(data=re_dic)
users_total_data.head()

{'Driver': 23103, 'Motorcycle rider': 6362, 'Passenger': 12022, 'Motorcycle pillion passenger': 354, 'Pedal cyclist': 1379, 'Pedestrian': 8067, 'Other/-9': 81}


Unnamed: 0,user,number
0,Driver,23103
1,Motorcycle rider,6362
2,Passenger,12022
3,Motorcycle pillion passenger,354
4,Pedal cyclist,1379


In [251]:
import plotly.express as px

fig_user = px.pie(users_total_data, 
             title='<b>Road Deaths by user type from 1989-2019 in Australia<b>',
             values='number', 
             names='user',
             opacity=0.7,
             color_discrete_sequence=px.colors.sequential.YlGnBu_r
            )

fig_user.update_traces(textposition='inside', textinfo='percent+label'
                 )
fig_user.show()

In [252]:
# read the file
data = read_file('ardd_fatalities.csv')

data_plot = {}
# check_2019 = 0
for year,states in zip(data["Year"],data["State"]):
    if str(year) != "2020":
        if year not in data_plot:
            data_plot[year] = {}
        elif states not in data_plot[year]:
            data_plot[year][states] = 1
        else:
            data_plot[year][states] += 1

cvs_data = {}
for year in data_plot: 
    if not bool(cvs_data):
        cvs_data["year"] = []
        cvs_data["state"] = []
        cvs_data["number"] = []
    for states,values in data_plot[year].items():
        cvs_data["year"].append(year)
        cvs_data["state"].append(states)
        cvs_data["number"].append(values)
cvs_data = pd.DataFrame(data=cvs_data)
cvs_data.head()

Unnamed: 0,year,state,number
0,2019,SA,114
1,2019,WA,163
2,2019,NSW,357
3,2019,Qld,219
4,2019,Tas,29


In [253]:
# Count the total_death for each states
data_age = read_file('ardd_fatalities.csv')
ages_total = {}

for age,gender,year in zip(data_age["Age"],data_age['Gender'],data_age['Year']):
    if str(year) != '2020' and str(gender) != '-9'and str(gender) != 'Unspecified': 
        if age not in ages_total:
            ages_total[age] = {}             
        elif gender not in ages_total[age]: 
            ages_total[age][gender] = 1
        else: 
             ages_total[age][gender] += 1 


cvs_ages_total = {}
for age in ages_total: 
    if not bool(cvs_ages_total):
        cvs_ages_total["age"] = []
        cvs_ages_total["gender"] = []
        cvs_ages_total["number"] = []
    for gender,values in ages_total[age].items():
        cvs_ages_total["age"].append(age)
        cvs_ages_total["gender"].append(gender)
        cvs_ages_total["number"].append(values)
        
# print(cvs_ages_total)
cvs_ages_total = pd.DataFrame(data=cvs_ages_total)
cvs_ages_total.head()

Unnamed: 0,age,gender,number
0,5,Male,111
1,5,Female,75
2,18,Male,1435
3,18,Female,469
4,31,Male,710


In [246]:
import plotly.express as px

# '#ffffd9','#edf8b1','#c7e9b4','#7fcdbb','#41b6c4','#1d91c0','#225ea8','#253494',,,,,'#8FBC8F','#DB7093'
fig_age = px.histogram(cvs_ages_total, 
                   x="age", 
                   y="number", 
                   color='gender',  
                   marginal="box", # or violin, rug
                   hover_data=cvs_ages_total.columns,
                   color_discrete_sequence=['#7fcdbb','#c7e9b4'],
                   opacity=0.8
                  )

fig_age.update_layout(
    title_text="<b>Road Deaths by age group and gender from 1989 to 2019 in Australia<b>",  # change
    plot_bgcolor='white',  # change
    xaxis_title_text="<b>Age<b>",
    yaxis_title_text="<b>Total Number of Road Deaths<b>",
    legend_title_text="",
    legend_orientation="h"
)
fig_age.show()