In [71]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

In [72]:
road: pd.DataFrame = pd.read_csv("datas/caracteristics.csv", dtype={"long": str})

# remove the rows that contains null values
road = road[road.int != 0]

In [73]:
road.head(5)

Unnamed: 0,Num_Acc,an,mois,jour,hrmn,lum,agg,int,atm,col,com,adr,gps,lat,long,dep
0,201600000001,16,2,1,1445,1,2,1,8.0,3.0,5.0,"46, rue Sonneville",M,,,590
1,201600000002,16,3,16,1800,1,2,6,1.0,6.0,5.0,1a rue du cimeti�re,M,0.0,0.0,590
2,201600000003,16,7,13,1900,1,1,1,1.0,6.0,11.0,,M,0.0,0.0,590
3,201600000004,16,8,15,1930,2,2,1,7.0,3.0,477.0,52 rue victor hugo,M,0.0,0.0,590
4,201600000005,16,12,23,1100,1,2,3,1.0,3.0,11.0,rue Joliot curie,M,0.0,0.0,590


The column "int" contains the type of intersection :

    1 - Out of intersection

    2 - Intersection in X

    3 - Intersection in T

    4 - Intersection in Y

    5 - Intersection with more than 4 branches

    6 - Giratory

    7 - Place

    8 - Level crossing

    9 - Other intersection



In [74]:
dict_int_names = {
    '1' : 'Out of intersection',
    '2' : 'Intersection in X',
    '3' : 'Intersection in T',
    '4' : 'Intersection in Y',
    '5' : 'Intersection with more than 4 branches',
    '6' : 'Giratory',
    '7' : 'Place',
    '8' : 'Level crossing',
    '9' : 'Other intersection'
}

In [75]:
road.head(4)

Unnamed: 0,Num_Acc,an,mois,jour,hrmn,lum,agg,int,atm,col,com,adr,gps,lat,long,dep
0,201600000001,16,2,1,1445,1,2,1,8.0,3.0,5.0,"46, rue Sonneville",M,,,590
1,201600000002,16,3,16,1800,1,2,6,1.0,6.0,5.0,1a rue du cimeti�re,M,0.0,0.0,590
2,201600000003,16,7,13,1900,1,1,1,1.0,6.0,11.0,,M,0.0,0.0,590
3,201600000004,16,8,15,1930,2,2,1,7.0,3.0,477.0,52 rue victor hugo,M,0.0,0.0,590


In [76]:
# change the int values to the corresponding text for the type of road
road["type"] = road.int.apply(lambda x: dict_int_names[str(x)])

### On va pas interpréter les valeurs qui sont "Out of intersection" car on s'intéresse au type le plus dangereux d'intersections.

### Cependant, pour information, on peut voir le nombre d'accidents qui a lieu dans une intersections et en dehors

In [77]:
count = road.type.value_counts()[0]
print(f'nous avons {count} accidents en dehors d\'une intersections')

# we drop the "out of intersection columns"
road = road[road.type != 'Out of intersection']

count_total = road.type.value_counts().sum()
print(f'nous avons {count_total} accidents dans des intersections')

road

nous avons 599652 accidents en dehors d'une intersections
nous avons 240227 accidents dans des intersections


Unnamed: 0,Num_Acc,an,mois,jour,hrmn,lum,agg,int,atm,col,com,adr,gps,lat,long,dep,type
1,201600000002,16,3,16,1800,1,2,6,1.0,6.0,5.0,1a rue du cimeti�re,M,0.0,0,590,Giratory
4,201600000005,16,12,23,1100,1,2,3,1.0,3.0,11.0,rue Joliot curie,M,0.0,0,590,Intersection in T
14,201600000015,16,8,17,1930,1,1,6,7.0,5.0,466.0,,M,0.0,0,590,Giratory
19,201600000020,16,9,5,2030,5,2,6,2.0,3.0,452.0,rue Emile Macquart,M,0.0,0,590,Giratory
21,201600000022,16,4,2,1045,1,1,2,1.0,3.0,402.0,Rte de Lederzeele,M,5084579.0,226407,590,Intersection in X
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
839974,200500087944,5,12,9,1615,1,2,2,1.0,3.0,416.0,"SANS, FOUR A CHAUX(RUE D",,,,974,Intersection in X
839975,200500087945,5,12,12,1000,1,2,2,1.0,3.0,414.0,TOULOUSE(AVENUE),,,,974,Intersection in X
839980,200500087950,5,12,21,2035,5,2,2,1.0,3.0,416.0,"sans, LEBLOND(RUE M. ET",,,,974,Intersection in X
839982,200500087952,5,12,26,1715,1,2,2,1.0,3.0,416.0,"SANS, LEBLOND(RUE M. ET",,,,974,Intersection in X


### Histogramme - types de route avec le plus d'accidents

In [78]:
fig = px.histogram(road, x="type")
fig.show()

### On voit que c'est les intersections à 4 routes (en 'X') qui sont les plus dangereuses.

In [79]:
list_percentage=[]

#remove the 'Out of Intersection' kye
dict_int_names.pop('1', None)

for values in dict_int_names.values():
    list_percentage.append((road.loc[road['type'] == values].shape[0] / count_total)*100)

In [100]:
#road['percentage']=road.type.apply(lambda x: df['MyColumn'].sum()])

dict_percentage={}

#remove the 'Out of Intersection' kye
dict_int_names.pop('1', None)

for value in dict_int_names.values():
    percent = (road.loc[road['type'] == value].shape[0] / count_total) * 100
    dict_percentage[value] = round(percent, 2)

df = pd.DataFrame([dict_percentage])

Unnamed: 0,Intersection in X,Intersection in T,Intersection in Y,Intersection with more than 4 branches,Giratory,Place,Level crossing,Other intersection
0,42.88,30.71,4.78,3.9,9.31,2.53,0.38,5.51


In [90]:
# This dataframe has 244 lines, but 4 distinct values for `day`
fig = px.pie(dict_percentage.keys, values=dict_percentage.values, names=dict_percentage.keys)
fig.show()

ValueError: DataFrame constructor not properly called!