# Accidents corporels de la circulation routière (ACCR) en France

Pour chaque accident corporel (soit un accident survenu sur une voie ouverte à la circulation publique, impliquant au moins un véhicule et ayant fait au moins une victime ayant nécessité des soins), des saisies d’information décrivant l’accident sont effectuées par l’unité des forces de l’ordre (police, gendarmerie, etc.) qui est intervenue sur le lieu de l’accident. Ces saisies sont rassemblées dans une fiche intitulée bulletin d’analyse des accidents corporels. L’ensemble de ces fiches constitue le fichier national des accidents corporels de la circulation dit « Fichier BAAC » administré par l’Observatoire national interministériel de la sécurité routière "ONISR".

In [1]:
from pathlib import Path
import pandas as pd
import numpy as np

In [2]:
!pwd

/home/godwin/Documents/academic/esgi/s1/hackaton/hackaton-accr/accr/notebook


In [3]:
HERE = Path.cwd().parent.parent
DATASET_PATH = HERE / 'dataset'
DATASET_PATH

PosixPath('/home/godwin/Documents/academic/esgi/s1/hackaton/hackaton-accr/dataset')

In [4]:
import sys
sys.path.append(str(HERE))

## Dataset 

In [5]:
caracteristiques = pd.read_csv(
    f'{DATASET_PATH}/caract-2023.csv',
    sep=';',
)

lieux = pd.read_csv(
    f'{DATASET_PATH}/lieux-2023.csv', 
    sep=';',
    dtype={'lartpc': str}
)
vehicules = pd.read_csv(
    f'{DATASET_PATH}/vehicules-2023.csv',
    sep=';'
    )

usagers = pd.read_csv(
    f'{DATASET_PATH}/usagers-2023.csv',
    sep=';'
    )

In [6]:
print(caracteristiques.shape)
caracteristiques.head()

(54822, 15)


Unnamed: 0,Num_Acc,jour,mois,an,hrmn,lum,dep,com,agg,int,atm,col,adr,lat,long
0,202300000001,7,5,2023,06:00,1,75,75101,2,4,2,7,RUE DE RIVOLI,4886638600,232347100
1,202300000002,7,5,2023,05:30,5,94,94080,2,1,3,6,Avenue de Paris,4884547782,242868146
2,202300000003,7,5,2023,20:50,1,94,94022,2,3,2,1,Avenue du Général Leclerc,4876240000,240655000
3,202300000004,6,5,2023,23:57,5,94,94078,2,1,3,5,Rue de Paris,4873248432,244687575
4,202300000005,7,5,2023,00:50,5,94,94068,2,2,3,3,56bis Avenue Raspail,4878581000,249217000


In [7]:
print(vehicules.shape)
vehicules.head()

(93585, 11)


Unnamed: 0,Num_Acc,id_vehicule,num_veh,senc,catv,obs,obsm,choc,manv,motor,occutc
0,202300000001,155 680 557,A01,1,30,0,0,5,1,1,
1,202300000002,155 680 556,A01,2,7,0,1,1,1,1,
2,202300000003,155 680 554,B01,1,2,0,2,1,16,1,
3,202300000003,155 680 555,A01,2,7,0,2,2,15,1,
4,202300000004,155 680 551,B01,1,7,0,2,9,2,4,


In [8]:
print(usagers.shape)
usagers.head()

(125789, 16)


Unnamed: 0,Num_Acc,id_usager,id_vehicule,num_veh,place,catu,grav,sexe,an_nais,trajet,secu1,secu2,secu3,locp,actp,etatp
0,202300000001,203 851 184,155 680 557,A01,1,1,4,1,1978.0,5,2,0,-1,-1,-1,-1
1,202300000002,203 851 182,155 680 556,A01,1,1,1,2,1997.0,9,1,0,-1,-1,-1,-1
2,202300000002,203 851 183,155 680 556,A01,10,3,3,1,1997.0,9,0,-1,-1,2,3,1
3,202300000003,203 851 180,155 680 554,B01,1,1,3,1,1987.0,0,2,6,0,0,0,-1
4,202300000003,203 851 181,155 680 555,A01,1,1,1,2,1984.0,0,1,0,0,0,0,-1


In [9]:
print(lieux.shape)
lieux.head()

(70860, 18)


Unnamed: 0,Num_Acc,catr,voie,v1,v2,circ,nbv,vosp,prof,pr,pr1,plan,lartpc,larrout,surf,infra,situ,vma
0,202300000001,4,RUE DE RIVOLI,0,,1,2,0,1,-1,-1,1,,-1,2,0,1,30
1,202300000001,4,RUE SAINT FLORENTIN,0,,1,1,0,1,-1,-1,1,,-1,2,0,1,30
2,202300000002,3,120,0,,2,3,2,1,-1,-1,1,,-1,2,0,1,50
3,202300000003,3,5,0,,2,4,0,1,1,0,1,,-1,2,5,1,50
4,202300000003,3,87,0,,2,4,0,1,1,0,1,,-1,2,5,1,50


## Data modeling

In [11]:
caract_lieux = pd.merge(caracteristiques, lieux, on='Num_Acc', how='inner')
caract_lieux_veh = pd.merge(caract_lieux, vehicules, on='Num_Acc', how='inner')
final_df = pd.merge(caract_lieux_veh, usagers, on=['Num_Acc', 'id_vehicule'], how='inner')

print(final_df.shape)
final_df.head()

(163683, 56)


Unnamed: 0,Num_Acc,jour,mois,an,hrmn,lum,dep,com,agg,int,...,grav,sexe,an_nais,trajet,secu1,secu2,secu3,locp,actp,etatp
0,202300000001,7,5,2023,06:00,1,75,75101,2,4,...,4,1,1978.0,5,2,0,-1,-1,-1,-1
1,202300000001,7,5,2023,06:00,1,75,75101,2,4,...,4,1,1978.0,5,2,0,-1,-1,-1,-1
2,202300000002,7,5,2023,05:30,5,94,94080,2,1,...,1,2,1997.0,9,1,0,-1,-1,-1,-1
3,202300000002,7,5,2023,05:30,5,94,94080,2,1,...,3,1,1997.0,9,0,-1,-1,2,3,1
4,202300000003,7,5,2023,20:50,1,94,94022,2,3,...,3,1,1987.0,0,2,6,0,0,0,-1


## Traitement des données

In [12]:
from accr.data.processing import Processing

processing = Processing()

In [13]:
np.unique(final_df.actp)

array([' -1', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B'],
      dtype=object)

In [14]:
# final_df.dtypes

In [15]:
# remplacer les valeurs vides, -1, 0, (.) par np.nan
clean_df = processing.replace_missing_values(final_df)

In [16]:
print(clean_df.shape)
clean_df.head()

(163683, 56)


Unnamed: 0,Num_Acc,jour,mois,an,hrmn,lum,dep,com,agg,int,...,grav,sexe,an_nais,trajet,secu1,secu2,secu3,locp,actp,etatp
0,202300000001,7,5,2023,06:00,1.0,75,75101,2,4.0,...,4.0,1.0,1978.0,5.0,2.0,,,,,
1,202300000001,7,5,2023,06:00,1.0,75,75101,2,4.0,...,4.0,1.0,1978.0,5.0,2.0,,,,,
2,202300000002,7,5,2023,05:30,5.0,94,94080,2,1.0,...,1.0,2.0,1997.0,9.0,1.0,,,,,
3,202300000002,7,5,2023,05:30,5.0,94,94080,2,1.0,...,3.0,1.0,1997.0,9.0,,,,2.0,3.0,1.0
4,202300000003,7,5,2023,20:50,1.0,94,94022,2,3.0,...,3.0,1.0,1987.0,,2.0,6.0,,,,


In [17]:
np.unique(clean_df.actp.to_string)

array([<bound method Series.to_string of 0         NaN
       1         NaN
       2         NaN
       3           3
       4         NaN
                ...
       163678    NaN
       163679    NaN
       163680    NaN
       163681    NaN
       163682    NaN
       Name: actp, Length: 163683, dtype: object>     ], dtype=object)