In [None]:
# Import Dependencies
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import json
import requests
import gmaps
import os
from pprint import pprint
from scipy.stats import linregress
from scipy import stats
import seaborn as sb
from sklearn import preprocessing
from mlxtend.preprocessing import standardize

In [None]:
# Create empty DataFrame
df_type = pd.DataFrame()
df_seatbelt = pd.DataFrame()
df_restraint = pd.DataFrame()
df_phones = pd.DataFrame()

# Read data into pandas

df_raw_type = pd.read_csv("data_raw/summary_table_road_traffic_deathsp.csv")
df_raw_seatbelt = pd.read_csv("data_raw/seat_belt_laws.csv")
df_raw_restraint = pd.read_csv("data_raw/child_restraints_by_country.csv")
df_raw_phones = pd.read_csv("data_raw/mobile_phone_laws.csv")

# Chosen countries
countries = ["Argentina",
             "Australia",
             "Belgium",
             "Canada",
             "Finland",
             "Germany",
             "Greece",
             "Israel",
             "Spain",
             "Sweden",
             "United Kingdom",
             "United States of America"
            ]

# Making loop to fill DataFrames
for x in countries:
    df_type = df_type.append( df_raw_type[df_raw_type['Country/Area'].str.contains(x)], ignore_index = True )
    df_seatbelt = df_seatbelt.append( df_raw_seatbelt[df_raw_seatbelt['Country/Area'].str.contains(x)], ignore_index = True )
    df_restraint = df_restraint.append( df_raw_restraint[df_raw_restraint['Country/Area'].str.contains(x)], ignore_index = True )
    df_phones = df_phones.append( df_raw_phones[df_raw_phones['Country/Area'].str.contains(x)], ignore_index = True )

# Merge DataFrames
df1=pd.merge(df_type,df_seatbelt, how='left', on='Country/Area')
df2=pd.merge(df1,df_restraint, how='left', on='Country/Area')
df=pd.merge(df2,df_phones, how='left', on='Country/Area')

# Rename columns so that they are differentiated
df.rename(columns= {"Country/Area":"country","Population numbers for 2016a":"population","Income levelc":"income","Reported number of road traffic deaths":"number of deaths","Estimated road traffic death rate per 100 000 populationd ":"estimated road death","National seat\nbelt law":"seat belt law","National child restraint law":"child restraint law","Seat belt applies to Drivers":"seat belt drivers","Seat belt applies to Front seat passengers":"seat belt front seat","Seat belt applies to Rear-seat passengers":"seat belt rear seat","Seat-belt wearing rates (%) Drivers only ":"rates seat belt drivers"," Seat-belt wearing rates (%) Front-seat occupants":"rates seat belt front seat","Seat-belt wearing rates (%) Rear-seat occupants":"rates seat belt rear seat","Seat-belt wearing rates (%) All occupants":"rates seat belt all","\nChild restraints required ":"child restraints","Children seated \nin front seat":"child seatedcin front","Standard referred to and/or specified":"standard specified","Percentage of children using child restraints":"percentage child restraints","National mobile phone law":"mobile phone law","Mobile phone law Applies to hand-held phone use":"hand_held phone","Mobile phone law Applies to hands-free phone use":"hand_free phone","Any data on use of mobile phone while driving":"any_data phone"}, inplace=True)

# Collecting columns which will be used in analyzation
df = df[["country","population","income","number of deaths","death estimate","estimated road death","seat belt law","seat belt drivers","seat belt front seat","seat belt rear seat","rates seat belt drivers","rates seat belt front seat","rates seat belt rear seat","rates seat belt all","child restraint law","child restraints","standard specified","child seatedcin front","percentage child restraints","mobile phone law","hand_held phone","hand_free phone","any_data phone"]]
display(df)

In [None]:
# Create a new DataFrame to show the data of all countries in child-restraints laws
df_new=pd.merge(df_raw_type,df_raw_restraint, how='left', on='Country/Area')

In [None]:
# percentage of child restraints use by countries
(df_raw_restraint.loc[df_raw_restraint["National child restraint law"]=="Yes"].count())/(df_raw_restraint["National child restraint law"].count())*100

In [None]:
# percentage of child restraints not use by countries
(df_raw_restraint.loc[df_raw_restraint["National child restraint law"]=="No"].count())/(df_raw_restraint["National child restraint law"].count())*100

In [None]:
# Make a groupby dataframe for child restraint law
child_by_income = df_new.groupby(["Income levelc","National child restraint law"])["National child restraint law"].count()
display(child_by_income)

In [None]:
# Create a new DataFrame to show the data of all countries in seat-belt laws
df_new2=pd.merge(df_raw_type,df_raw_seatbelt, how='left', on='Country/Area')

In [None]:
# percentage of seat-belt law use by countries
(df_raw_seatbelt.loc[df_raw_seatbelt["National seat\nbelt law"]=="Yes"].count())/(df_raw_seatbelt["National seat\nbelt law"].count())*100

In [None]:
# percentage of seat-belt law not use by countries
(df_raw_seatbelt.loc[df_raw_seatbelt["National seat\nbelt law"]=="No"].count())/(df_raw_seatbelt["National seat\nbelt law"].count())*100

In [None]:
# Make a groupby dataframe for seat-belt law
seatbelt_by_income = df_new2.groupby(["Income levelc","National seat\nbelt law"])["National seat\nbelt law"].count()
display(seatbelt_by_income)

In [None]:
# Create a new DataFrame to show the data of all countries in mobile phone laws
df_new3=pd.merge(df_raw_type,df_raw_phones, how='left', on='Country/Area')

In [None]:
# percentage of mobile phone law use by countries
(df_raw_phones.loc[df_raw_phones["National mobile phone law"]=="Yes"].count())/(df_raw_phones["National mobile phone law"].count())*100

In [None]:
# percentage of mobile phone law use by countries
(df_raw_phones.loc[df_raw_phones["National mobile phone law"]=="No"].count())/(df_raw_phones["National mobile phone law"].count())*100

In [None]:
# Make a groupby dataframe for mobile phone law
mobile_by_income = df_new3.groupby(["Income levelc","National mobile phone law"])["National mobile phone law"].count()
display(mobile_by_income)

In [None]:
# Convert type of my data from object to float
df["percentage child restraints"] = pd.to_numeric(df["percentage child restraints"])
df["rates seat belt drivers"] = pd.to_numeric(df["rates seat belt drivers"])
df["rates seat belt front seat"] = pd.to_numeric(df["rates seat belt front seat"])
df["rates seat belt rear seat"] = pd.to_numeric(df["rates seat belt rear seat"])
df["rates seat belt all"] = pd.to_numeric(df["rates seat belt all"])

In [None]:
# General information about data for each column
df.info()

In [None]:
# Basic descriptive statistics for each column
df.describe()

In [None]:
# Create a new DataFrame
child_restraints = df[["population","number of deaths","estimated road death","percentage child restraints"]]

# Standardized Created DataFrame 
standardize(child_restraints, columns=['population', 'percentage child restraints','estimated road death','number of deaths'])

In [None]:
# PiePlot to calculate population by income
plt.figure(figsize=(6,6))
colors = ["burlywood", "olive", "orangered"]
income = df_raw_type.groupby("Income levelc")["Population numbers for 2016a"].sum()
count_chart = income.plot(kind='pie',colors=colors,explode = (0.1, 0, 0), autopct="%1.1f%%" , shadow=True, startangle=140)
plt.title("Calculate Population by Income")
plt.axis("equal")
plt.savefig("piechart_populationbyincome.png")
plt.show()

In [None]:
# DounutPlot to calculate population by income
fig, ax = plt.subplots(figsize=(8, 8), subplot_kw=dict(aspect="equal"))

recipe = ["High(15.7%)",
          "Low(8.4%)",
          "Middle(75.9%)",
          ]

data = df_raw_type.groupby("Income levelc")["Population numbers for 2016a"].sum()
colors = ["burlywood", "olive", "orangered"]
wedges, texts = ax.pie(data, wedgeprops=dict(width=0.5), startangle=140,colors=colors)

bbox_props = dict(boxstyle="square,pad=0.3", fc="w", ec="k", lw=0.72)
kw = dict(arrowprops=dict(arrowstyle="-"),
          bbox=bbox_props, zorder=0, va="center")
plt.setp( wedges, width=0.3, edgecolor='white')

for i, p in enumerate(wedges):
    ang = (p.theta2 - p.theta1)/2. + p.theta1
    y = np.sin(np.deg2rad(ang))
    x = np.cos(np.deg2rad(ang))
    horizontalalignment = {-1: "right", 1: "left"}[int(np.sign(x))]
    connectionstyle = "angle,angleA=0,angleB={}".format(ang)
    kw["arrowprops"].update({"connectionstyle": connectionstyle})
    ax.annotate(recipe[i], xy=(x, y), xytext=(1.35*np.sign(x), 1.4*y),
                horizontalalignment=horizontalalignment, **kw)

ax.set_title("Calculate Population by Income",fontsize=12)
plt.savefig("output_data/piechart_populationbyincome.png")
plt.show()

In [None]:
# PiePlot to calculate number of death by income
plt.figure(figsize=(6,6))
colors = ["burlywood", "olive", "orangered"]
income = df_raw_type.groupby("Income levelc")["death estimate"].sum()
count_chart = income.plot(kind='pie',colors=colors,explode = (0.1, 0, 0), autopct="%1.1f%%" , shadow=True, startangle=140)
plt.title("Calculate Number of Death by Income")
plt.axis("equal")
plt.savefig("piechart_deathsbyincome.png")
plt.show()

In [None]:
# DounutPlot to calculate number of death by income
fig, ax = plt.subplots(figsize=(8, 8), subplot_kw=dict(aspect="equal"))

recipe = ["High(7.2%)",
          "Low(12.9%)",
          "Middle(79.9%)",
          ]

data = df_raw_type.groupby("Income levelc")["death estimate"].sum()
colors = ["burlywood", "olive", "orangered"]
wedges, texts = ax.pie(data, wedgeprops=dict(width=0.5), startangle=140,colors=colors)

bbox_props = dict(boxstyle="square,pad=0.3", fc="w", ec="k", lw=0.72)
kw = dict(arrowprops=dict(arrowstyle="-"),
          bbox=bbox_props, zorder=0, va="center")
plt.setp( wedges, width=0.3, edgecolor='white')

for i, p in enumerate(wedges):
    ang = (p.theta2 - p.theta1)/2. + p.theta1
    y = np.sin(np.deg2rad(ang))
    x = np.cos(np.deg2rad(ang))
    horizontalalignment = {-1: "right", 1: "left"}[int(np.sign(x))]
    connectionstyle = "angle,angleA=0,angleB={}".format(ang)
    kw["arrowprops"].update({"connectionstyle": connectionstyle})
    ax.annotate(recipe[i], xy=(x, y), xytext=(1.35*np.sign(x), 1.4*y),
                horizontalalignment=horizontalalignment, **kw)

ax.set_title("Calculate Number of Death by Income",fontsize=12)
plt.savefig("output_data/piechart_deathsbyincome.png")
plt.show()

In [None]:
# Bar chart to present income levels and child restraints
labels = ['High', 'Middle', 'Low']
yes = [44, 35, 5]
no = [5, 63, 23]

# the label locations
x = np.arange(len(labels))  

# the width of the bars
width = 0.35  

fig, ax = plt.subplots()
rects1 = ax.bar(x - width/2, yes, width, label='Yes')
rects2 = ax.bar(x + width/2, no, width, label='No')

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('number of usage child restraints')
ax.set_title('Child restraints by income and number of usage this law')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()


def autolabel(rects):
    for rect in rects:
        height = rect.get_height()
        ax.annotate('{}'.format(height),
                    xy=(rect.get_x() + rect.get_width() / 2, height),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom')


autolabel(rects1)
autolabel(rects2)

fig.tight_layout()
plt.savefig("output_data/child_restraints_income.png")
plt.show()

In [None]:
# Plot child restraints versus number of deaths on a scatterplot
colors = np.random.rand(12)
plt.figure(figsize=(8,8))

n = 12
x_values=child_restraints["estimated road death"]
y_values=child_restraints["percentage child restraints"]

scale = child_restraints["population"]/100000
plt.scatter(x_values, y_values, c=colors,s=scale,alpha=0.5, edgecolors='none')
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(7,50),fontsize=15,color="red")

# Give our chart some labels and a tile 
plt.title("Influence of Child Restraints Laws on Road's Deaths", fontsize=16)
plt.xlabel("Estimated road traffic death rate per 100,000 populationd")
plt.ylabel("Child Restraints")

print(f"The r-squared is: {rvalue**2}")
plt.savefig("output_data/child_restraints_laws.png")
plt.grid()
plt.show()

In [None]:
# Bar chart to present income levels and child restraints
labels = ['High', 'Middle', 'Low']
yes = [47, 93, 21]
no = [2, 5, 7]

# the label locations
x = np.arange(len(labels))  

# the width of the bars
width = 0.35  

fig, ax = plt.subplots()
rects1 = ax.bar(x - width/2, yes, width, label='Yes')
rects2 = ax.bar(x + width/2, no, width, label='No')

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('number of usage seat-belt')
ax.set_title('Seat-belt by income and number of usage this law')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()


def autolabel(rects):
    for rect in rects:
        height = rect.get_height()
        ax.annotate('{}'.format(height),
                    xy=(rect.get_x() + rect.get_width() / 2, height),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom')


autolabel(rects1)
autolabel(rects2)

fig.tight_layout()
plt.savefig("output_data/seatbelt_income.png")
plt.show()

In [None]:
# set width of bar and size of plot
barWidth = 0.17
plt.figure(figsize=(20,10))

# set height of bar
bars1 = df["rates seat belt drivers"]
bars2 = df["rates seat belt front seat"]
bars3 = df["rates seat belt rear seat"]
bars4 = df["rates seat belt all"]
bars5 = child_restraints["estimated road death"]
 
# Set position of bar on X axis
r1 = np.arange(len(bars1))
r2 = [x + barWidth for x in r1]
r3 = [x + barWidth for x in r2]
r4 = [x + barWidth for x in r3]
r5 = [x + barWidth for x in r4]
 
# Make the plot
plt.bar(r1, bars1, color='olive', width=barWidth, edgecolor='white', label='rates seat belt drivers')
plt.bar(r2, bars2, color='goldenrod', width=barWidth, edgecolor='white', label='rates seat belt front seat')
plt.bar(r3, bars3, color='rosybrown', width=barWidth, edgecolor='white', label='rates seat belt rear seat')
plt.bar(r4, bars4, color='gold', width=barWidth, edgecolor='white', label='rates seat belt all')
plt.bar(r5, bars5, color='orangered', width=barWidth, edgecolor='white', label='rates of deaths')
 
# Give our chart some labels and a tile 
plt.title('Wearing Seat-belt', fontsize='16', fontweight='bold')
plt.xlabel('Countries', fontsize='14', fontweight='bold')
plt.ylabel('Rate of wearing seat-belt',fontsize='14', fontweight='bold')

# Add xticks on the middle of the group bars
plt.xticks([r + barWidth for r in range(len(bars1))], df["country"], rotation=90)

# Create legend & Show graphic
plt.grid()
plt.legend(loc="best")
plt.savefig("output_data/wearing_seat_belt.png")
plt.show()


In [None]:
# Bar chart to present income levels and mobile phone laws
labels = ['High', 'Middle', 'Low']
yes = [47, 84, 19]
no = [2, 14, 9]

# the label locations
x = np.arange(len(labels))  

# the width of the bars
width = 0.35  

fig, ax = plt.subplots()
rects1 = ax.bar(x - width/2, yes, width, label='Yes')
rects2 = ax.bar(x + width/2, no, width, label='No')

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('number of usage mobile phone laws')
ax.set_title('Mobile phone laws by income and number of usage this law')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()


def autolabel(rects):
    for rect in rects:
        height = rect.get_height()
        ax.annotate('{}'.format(height),
                    xy=(rect.get_x() + rect.get_width() / 2, height),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom')


autolabel(rects1)
autolabel(rects2)

fig.tight_layout()
plt.savefig("output_data/mobile_income.png")
plt.show()

In [None]:
# Make a groupby dataframe for hand-hold law
detail_mobile_by_income1 = df_new3.groupby(["Income levelc","Mobile phone law Applies to hand-held phone use"])["Mobile phone law Applies to hand-held phone use"].count()
display(detail_mobile_by_income1)

In [None]:
# Make a groupby dataframe for hand free law
detail_mobile_by_income2 = df_new3.groupby(["Income levelc","Mobile phone law Applies to hands-free phone use"])["Mobile phone law Applies to hands-free phone use"].count()
display(detail_mobile_by_income2)

In [None]:
# Make a groupby dataframe for any used law
detail_mobile_by_income3 = df_new3.groupby(["Income levelc","Any data on use of mobile phone while driving"])["Any data on use of mobile phone while driving"].count()
display(detail_mobile_by_income3)

In [None]:
# Grouped barplot
def mk_groups(data):
    try:
        newdata = data.items()
    except:
        return

    thisgroup = []
    groups = []
    for key, value in newdata:
        newgroups = mk_groups(value)
        if newgroups is None:
            thisgroup.append((key, value))
        else:
            thisgroup.append((key, len(newgroups[-1])))
            if groups:
                groups = [g + n for n, g in zip(newgroups, groups)]
            else:
                groups = newgroups
    return [thisgroup] + groups

def add_line(ax, xpos, ypos):
    line = plt.Line2D([xpos, xpos], [ypos + .1, ypos],
                      transform=ax.transAxes, color='grey')
    line.set_clip_on(False)
    ax.add_line(line)

def label_group_bar(ax, data):
    groups = mk_groups(data)
    xy = groups.pop()
    x, y = zip(*xy)
    ly = len(y)
    xticks = range(1, ly + 1)
    ax.set_title('How to use mobile phone in different countries')
    ax.bar(xticks, y, align='center',color=["orangered","olive"])
    ax.set_xticks(xticks)

    ax.set_xticklabels(x)
    ax.set_xlim(.5, ly + .5)
    ax.yaxis.grid(True)

    scale = 1. / ly
    for pos in range(ly + 1):
        add_line(ax, pos * scale, -.1)
    ypos = -.2
    while groups:
        group = groups.pop()
        pos = 0
        for label, rpos in group:
            lxpos = (pos + .5 * rpos) * scale
            ax.text(lxpos, ypos, label, ha='center', transform=ax.transAxes)
            add_line(ax, pos * scale, ypos)
            pos += rpos
        add_line(ax, pos * scale, ypos)
        ypos -= .1

if __name__ == '__main__':
    data = {'hand-held phone':
               {'High':
                   {'Yes': 45,
                    'No': 4},
                'Middle':
                   {'Yes': 19,
                    'No': 9},
                'Low':
                   {'Yes': 81,
                    'No': 17}
               },
            'hand-free phone':
               {'High':
                   {'Yes': 0,
                    'No': 49},
                'Middle':
                   {'Yes': 27,
                    'No': 71},
                'Low':
                   {'Yes': 8,
                    'No': 20}
               },
            'any used phone':
               {'High':
                   {'Yes': 34,
                    'No': 15},
                'Middle':
                   {'Yes': 46,
                    'No': 52},
                'Low':
                   {'Yes': 5,
                    'No': 23}
               }
            
           }
    fig = plt.figure(figsize=(20,10))
    
    ax = fig.add_subplot(1,1,1)
    label_group_bar(ax, data)
    fig.subplots_adjust(bottom=0.3)
    fig.savefig('output_data/use_mobile_phone.png')