## Reading data

In [1]:
# Importing libraries
import pandas as pd
import numpy as np
import datetime

# Auxiliar functions created by us
import functions

In [2]:
# Creating list with folders name
files = []
years = range(2010,2019+1)
for year in years:
    files.append("./accidentes/"+str(year)+"_Accidentalidad.csv")

In [3]:
# Reading files and concatenating all the data
accidents = pd.DataFrame()

for file in files:
    new_table = pd.read_csv(file,sep=";",skiprows=0,encoding="latin-1", low_memory= False)
    accidents = pd.concat([accidents,new_table], axis=0)

## Cleaning data

In [4]:
# Converting to snake_case
accidents.columns = list(map(lambda x: x.lower(), accidents.columns))
accidents.columns = accidents.columns.str.replace(' ','_')

# Cleaning blank spaces 
accidents.loc[:, "distrito"] = accidents["distrito"].str.replace(" ","")
accidents.loc[:, "tipo_vehiculo"] = accidents["tipo_vehiculo"].str.replace(" ","")

# Filtering the center of madrid
centro = accidents[accidents["distrito"].isin(['ARGANZUELA','CENTRO','CHAMARTIN','CHAMBERI','MONCLOA-ARAVACA','RETIRO','SALAMANCA','TETUAN'])]

# Taking surroundings of madrid and then filtering out everything that it's not in M-30
colindantes = accidents[accidents["distrito"].isin(['CARABANCHEL','CIUDADLINEAL','LATINA','MORATALAZ','PUENTEDEVALLECAS','USERA'])]
colindantes = colindantes[colindantes['lugar_accidente'].str.contains("M-30")]

# concatenating everything back into one 
accidents = pd.concat([centro,colindantes],axis=0).reset_index(drop=True)

In [5]:
# we are classifying in two different zones according to their location (center = A, M-30 = B)
accidents["zone"] = np.where(accidents["lugar_accidente"].str.contains("M-30"),'B','A')

In [6]:
# converting date to datetime
accidents['fecha'] = pd.to_datetime(accidents['fecha'],format="%d/%m/%Y")
accidents = accidents[accidents['fecha']>='2013-01-01'].reset_index(drop=True)

In [7]:
# deleting NaNs from tipo_vehiculo becuase it's only a small percent of the data
print('We only have a %.2f%% of NaNs.' %(sum(accidents['tipo_vehiculo'].isna())/len(accidents)*100))
accidents = accidents[~accidents['tipo_vehiculo'].isna()]

We only have a 0.05% of NaNs.


In [8]:
# we transform to lowercase the categories of vehicles
accidents = accidents.copy()
accidents['tipo_vehiculo'] = accidents['tipo_vehiculo'].apply(lambda x: x.lower())

In [9]:
# Cleaning the categories of vehicles:
accidents['tipo_vehiculo']= np.where(accidents['tipo_vehiculo'].isin(['turismo','todoterreno']),'turismo', accidents['tipo_vehiculo'])
accidents['tipo_vehiculo']= np.where(accidents['tipo_vehiculo'].isin(['auto-taxi']),'taxi', accidents['tipo_vehiculo'])
accidents['tipo_vehiculo']= np.where(accidents['tipo_vehiculo'].str.contains(r"autob[uú]s",regex=True),'autobus', accidents['tipo_vehiculo'])
accidents['tipo_vehiculo']= np.where(accidents['tipo_vehiculo'].str.contains("motocicleta"),'motocicleta', accidents['tipo_vehiculo'])
accidents['tipo_vehiculo']= np.where(accidents['tipo_vehiculo'].str.contains(r"^ciclo",regex=True),'ciclomotor', accidents['tipo_vehiculo'])
accidents['tipo_vehiculo']= np.where(accidents['tipo_vehiculo'].isin(['camiónrígido','tractocamión']),'camion', accidents['tipo_vehiculo'])
accidents['tipo_vehiculo']= np.where(accidents['tipo_vehiculo'].str.contains("bicicleta"),'bicicleta', accidents['tipo_vehiculo'])

In [10]:
# Dropping bicycles and categories that should not be counted towards the total
accidents = accidents[~accidents['tipo_vehiculo'].isin(['ambulancia','bicicleta','noasignado','sinespecificar','varios','tranvia','patinete','remolque','cuadriciclonoligero','otrosvehiculossinmotor','semiremolque','autocaravana','cuadricicloligero','veh.3ruedas','vmueléctrico','maquinariadeobras','otrosvehiculosconmotor'])].reset_index(drop=True)

## Transforming data (agreggations)

In [11]:
# Counting the number of accidents for a specific vehicle type in an specific day and zone
accidents = accidents.groupby(["tipo_vehiculo","fecha","zone"]).agg(n_accidents=("distrito",len)).sort_values(by='fecha').reset_index()

In [12]:
# Filtering 2013 onwards
accidents = accidents[accidents['fecha']>='2013-01-01'].reset_index(drop=True)
accidents

Unnamed: 0,tipo_vehiculo,fecha,zone,n_accidents
0,taxi,2013-01-01,A,1
1,turismo,2013-01-01,A,29
2,turismo,2013-01-01,B,1
3,furgoneta,2013-01-01,A,2
4,ciclomotor,2013-01-01,A,1
...,...,...,...,...
14702,turismo,2019-12-30,B,7
14703,ciclomotor,2019-12-30,A,1
14704,motocicleta,2019-12-31,A,5
14705,turismo,2019-12-31,B,16


In [13]:
accidents = accidents.copy()
# Creating a new column equal to the first day of the time_range that row lies in.
accidents = functions.aggregate_time2(accidents,'fecha',days=15)

100.0% completed.

In [14]:
#grouping by vehicle type and time range and zone
accidents = accidents.groupby(['tipo_vehiculo','time_range','zone']).agg({'n_accidents':sum}).sort_values(by='time_range').reset_index()

In [15]:
# computing total number of accidents per time_range and per zone (A-B) and adding it as a column
time_ranges = accidents['time_range'].unique()
accidents = accidents.copy()
for trange in time_ranges:
    accidents.loc[(accidents['time_range']==trange)&(accidents['zone']=='A'),'trange_total'] = sum(accidents[(accidents['time_range']==trange)&(accidents['zone']=='A')]['n_accidents'])
    accidents.loc[(accidents['time_range']==trange)&(accidents['zone']=='B'),'trange_total'] = sum(accidents[(accidents['time_range']==trange)&(accidents['zone']=='B')]['n_accidents'])

In [16]:
# Creating column with the proportion of accidents
accidents['prop_accidents'] = accidents['n_accidents']/accidents['trange_total']

In [17]:
# Concatenating vehicle type with zone to create a "key" we will have to use later
accidents['key']=accidents['tipo_vehiculo']+"_"+accidents['zone']
accidents

Unnamed: 0,tipo_vehiculo,time_range,zone,n_accidents,trange_total,prop_accidents,key
0,autobus,2013-01-01,A,6,260.0,0.023077,autobus_A
1,motocicleta,2013-01-01,B,4,49.0,0.081633,motocicleta_B
2,motocicleta,2013-01-01,A,57,260.0,0.219231,motocicleta_A
3,turismo,2013-01-01,A,168,260.0,0.646154,turismo_A
4,turismo,2013-01-01,B,38,49.0,0.775510,turismo_B
...,...,...,...,...,...,...,...
1942,ciclomotor,2019-12-26,A,5,189.0,0.026455,ciclomotor_A
1943,turismo,2019-12-26,B,55,56.0,0.982143,turismo_B
1944,autobus,2019-12-26,A,8,189.0,0.042328,autobus_A
1945,furgoneta,2019-12-26,A,7,189.0,0.037037,furgoneta_A


# Computing coefficients

***(We did this but now we are not sure if it was necessary: even though with these coefficients, our approximated proportion of a certain vehicle is way more precise, we don't know if the correlation has changed. We'll keep it since we don't think it harms the model, but we do not know if its useful)***

We have data for the traffic flow. However, we do not have data on the individual flow $j_i$ of each category of vehicle (cars, motorbikes, buses, etc). We suppose the individual flow is just dependant on the proportion of circulating vehicles of that category, $j_i = J \times n_i/n_T$. However, we do not know $n_i$ (number of circulating vehicles of category i) nor $n_T$ (total number of vehicles circulating). So we approximate this value: we make the assumption that the proportion of accidents of vehicles i ($acc_i/acc_T$) is proportional to the proportion of vehicles i circulating, that is $n_i/n_T \approx \alpha_i \times acc_i / acc_T$. And so we have:
$$ j_i = J \times n_i /n_T \approx J \times \alpha_i \times acc_i/acc_T$$
This constant $\alpha_i$ is different for every category of vehicles, and it relates the real proportion of circulating vehicles of category i with the proportion of accidents of the same category (this constant would be one if each type of vehicle had the same chances of having an accident, but that is not true). We assume this constant does not change with time (the probability of having an accident with each vehicle should not change with time). Since we have data for $n_i/n_T$ for two specific years: 2017 and 2013, we can use the data of both to compute the constant of each category in each year, and then choose our coefficients as the mean of both. We will then use it for the rest of the years.

## Computing the coefficients alpha_i for 2007

In [18]:
# Filtering accident data for 2017 and grouping all the weeks
accidents2017 = accidents[accidents['time_range'].dt.year == 2017]
accidents2017 = accidents2017.groupby(['tipo_vehiculo','zone']).agg({'n_accidents':sum})
accidents2017 = accidents2017.reset_index()

# We know compute the propotion of accidents in each zone for each kind of vehicle
for i,row in accidents2017.iterrows():
    if row['zone'] == 'A':
        accidents2017.loc[i,'prop_accidents'] = row['n_accidents']/sum(accidents2017[accidents2017['zone']=='A']['n_accidents'])
    else:
        accidents2017.loc[i,'prop_accidents'] = row['n_accidents']/sum(accidents2017[accidents2017['zone']=='B']['n_accidents'])
        
accidents2017

Unnamed: 0,tipo_vehiculo,zone,n_accidents,prop_accidents
0,autobus,A,487,0.048535
1,autobus,B,2,0.0013
2,camion,A,91,0.009069
3,camion,B,28,0.018194
4,ciclomotor,A,297,0.029599
5,ciclomotor,B,5,0.003249
6,furgoneta,A,450,0.044848
7,furgoneta,B,89,0.05783
8,motocicleta,A,2526,0.251744
9,motocicleta,B,204,0.132554


In [19]:
# Reading real traffic data for 2017
real_park2017 = pd.read_excel("./traffic_flux/parque2017.xls")
real_park2017.head()

Unnamed: 0,Sector,Subsector,Tecnología,ZONA A,ZONA B,ZONA C,ZONA D,ZONA E,TOTAL
0,Autobuses,Autobuses <=15 t,Convencional,3.217288e-06,3.2e-05,5e-06,3.2e-05,2.6e-05,1.8e-05
1,Autobuses,Autobuses <=15 t,HD Euro I - 91/542/EEC Stage I,8.128383e-07,3e-06,9e-06,3e-06,8e-06,6e-06
2,Autobuses,Autobuses <=15 t,HD Euro II - 91/542/EEC Stage II,2.559878e-05,5e-05,5.8e-05,5e-05,0.00011,6.4e-05
3,Autobuses,Autobuses <=15 t,HD Euro III - 2000 Estándar,0.0002162824,0.000226,0.000204,0.000226,0.000434,0.000274
4,Autobuses,Autobuses <=15 t,HD Euro IV - 2005 Estándar,0.0002102973,0.000275,0.000316,0.000275,0.000475,0.000329


In [20]:
# Grouping by sector (type of vehicle)
real_park2017 = real_park2017.groupby('Sector').agg({'ZONA A':sum,'ZONA B':sum,'ZONA C':sum,'ZONA D':sum,'ZONA E':sum,'TOTAL':sum}).reset_index()
real_park2017=real_park2017.reset_index(drop=True)

In [21]:
# We are just studing A and B so we drop everything else:
real_park2017 = real_park2017[['Sector','ZONA A','ZONA B']]
# We drop total because it's a useless column
real_park2017 = real_park2017[real_park2017['Sector']!='TOTAL']
real_park2017 = real_park2017.reset_index(drop=True)

In [22]:
# Cleaning vehicle type column
real_park2017 = real_park2017.copy()
real_park2017['Sector'] = np.where(real_park2017['Sector']=='Autobuses','autobus',real_park2017['Sector'])
real_park2017['Sector'] = np.where(real_park2017['Sector']=='Vehículos Pesados','camion',real_park2017['Sector'])
real_park2017['Sector'] = np.where(real_park2017['Sector']=='Ciclomotores','ciclomotor',real_park2017['Sector'])
real_park2017['Sector'] = np.where(real_park2017['Sector']=='Vehículos Ligeros','furgoneta',real_park2017['Sector'])
real_park2017['Sector'] = np.where(real_park2017['Sector']=='Motocicletas','motocicleta',real_park2017['Sector'])
real_park2017['Sector'] = np.where(real_park2017['Sector']=='Taxis','taxi',real_park2017['Sector'])
real_park2017['Sector'] = np.where(real_park2017['Sector']=='Turismos','turismo',real_park2017['Sector'])

In [23]:
# Creating a new table with the real traffic data and the accident data from 2017
after_coefficient2017 = pd.merge(left = accidents2017, right = real_park2017, how = 'inner', left_on = 'tipo_vehiculo', right_on = 'Sector')
after_coefficient2017

Unnamed: 0,tipo_vehiculo,zone,n_accidents,prop_accidents,Sector,ZONA A,ZONA B
0,autobus,A,487,0.048535,autobus,0.021952,0.009709
1,autobus,B,2,0.0013,autobus,0.021952,0.009709
2,camion,A,91,0.009069,camion,0.016009,0.025562
3,camion,B,28,0.018194,camion,0.016009,0.025562
4,ciclomotor,A,297,0.029599,ciclomotor,0.003411,0.0
5,ciclomotor,B,5,0.003249,ciclomotor,0.003411,0.0
6,furgoneta,A,450,0.044848,furgoneta,0.05007,0.088788
7,furgoneta,B,89,0.05783,furgoneta,0.05007,0.088788
8,motocicleta,A,2526,0.251744,motocicleta,0.082535,0.004262
9,motocicleta,B,204,0.132554,motocicleta,0.082535,0.004262


In [24]:
# Computing the coefficient as the ratio between the proportion of circulating traffic divided by the proportion of accidents
for i,row in after_coefficient2017.iterrows():
    if row['zone']== 'A':
        after_coefficient2017.loc[i,'Coefficient'] = row['ZONA A']/row['prop_accidents']
    else:
        after_coefficient2017.loc[i,'Coefficient'] = row['ZONA B']/row['prop_accidents']

In [25]:
after_coefficient2017

Unnamed: 0,tipo_vehiculo,zone,n_accidents,prop_accidents,Sector,ZONA A,ZONA B,Coefficient
0,autobus,A,487,0.048535,autobus,0.021952,0.009709,0.4523
1,autobus,B,2,0.0013,autobus,0.021952,0.009709,7.470751
2,camion,A,91,0.009069,camion,0.016009,0.025562,1.765181
3,camion,B,28,0.018194,camion,0.016009,0.025562,1.405007
4,ciclomotor,A,297,0.029599,ciclomotor,0.003411,0.0,0.115236
5,ciclomotor,B,5,0.003249,ciclomotor,0.003411,0.0,0.0
6,furgoneta,A,450,0.044848,furgoneta,0.05007,0.088788,1.11644
7,furgoneta,B,89,0.05783,furgoneta,0.05007,0.088788,1.535343
8,motocicleta,A,2526,0.251744,motocicleta,0.082535,0.004262,0.327854
9,motocicleta,B,204,0.132554,motocicleta,0.082535,0.004262,0.032155


# Computing coefficients alpha_i for 2013

In [26]:
# checking the coefficient_i per zone (A-B)
accidents2013 = accidents[accidents['time_range'].dt.year == 2013]
accidents2013 = accidents2013.groupby(['tipo_vehiculo','zone']).agg({'n_accidents':sum})
accidents2013 = accidents2013.reset_index()

for i,row in accidents2013.iterrows():
    if row['zone'] == 'A':
        accidents2013.loc[i,'prop_accidents'] = row['n_accidents']/sum(accidents2013[accidents2013['zone']=='A']['n_accidents'])
    else:
        accidents2013.loc[i,'prop_accidents'] = row['n_accidents']/sum(accidents2013[accidents2013['zone']=='B']['n_accidents'])

In [27]:
real_park2013 = pd.read_excel("./traffic_flux/parque2013.xls")

In [28]:
real_park2013 = real_park2013.groupby('Sector').agg({'ZONA A':sum,'ZONA B':sum,'ZONA C':sum,'ZONA D':sum,'ZONA E':sum,'TOTAL':sum}).reset_index()
#real_park2017 = real_park2017[~real_park2017['Sector'].isin(['TOTAL','Turismos','Vehículos Ligeros'])]
real_park2013=real_park2013.reset_index(drop=True)

# We are just studing A and B so we drop everything else:
real_park2013 = real_park2013[['Sector','ZONA A','ZONA B']]
# we drop total because we don't need it
real_park2013 = real_park2013[real_park2013['Sector']!='TOTAL']
real_park2013 = real_park2013.reset_index(drop=True)

# We clean the 'Sector' so that it matches our dataset
real_park2013 = real_park2013.copy()
real_park2013['Sector'] = np.where(real_park2013['Sector']=='Autobuses','autobus',real_park2013['Sector'])
real_park2013['Sector'] = np.where(real_park2013['Sector']=='Vehículos Pesados','camion',real_park2013['Sector'])
real_park2013['Sector'] = np.where(real_park2013['Sector']=='Ciclomotores','ciclomotor',real_park2013['Sector'])
real_park2013['Sector'] = np.where(real_park2013['Sector']=='Vehículos Ligeros','furgoneta',real_park2013['Sector'])
real_park2013['Sector'] = np.where(real_park2013['Sector']=='Motocicletas','motocicleta',real_park2013['Sector'])
real_park2013['Sector'] = np.where(real_park2013['Sector']=='Taxis','taxi',real_park2013['Sector'])
real_park2013['Sector'] = np.where(real_park2013['Sector']=='Turismos','turismo',real_park2013['Sector'])

In [29]:
after_coefficient2013 = pd.merge(left = accidents2013, right = real_park2013, how = 'inner', left_on = 'tipo_vehiculo', right_on = 'Sector')

In [30]:
for i,row in after_coefficient2013.iterrows():
    if row['zone']== 'A':
        after_coefficient2013.loc[i,'Coefficient'] = row['ZONA A']/row['prop_accidents']
    else:
        after_coefficient2013.loc[i,'Coefficient'] = row['ZONA B']/row['prop_accidents']

## Coefficients: we take the mean value of the coefficients from 2013 and 2017

In [31]:
# Creating the coefficients table
coefficients = after_coefficient2013[['tipo_vehiculo','zone']].copy()
coefficients['key']=coefficients['tipo_vehiculo']+"_"+coefficients['zone']
coefficients = pd.concat([coefficients,(after_coefficient2013['Coefficient']+after_coefficient2017['Coefficient'])/2],axis=1)
coefficients = coefficients[['key','Coefficient']]
coefficients

Unnamed: 0,key,Coefficient
0,autobus_A,0.531081
1,autobus_B,4.353595
2,camion_A,1.282402
3,camion_B,1.263447
4,ciclomotor_A,0.131428
5,ciclomotor_B,0.0
6,furgoneta_A,1.098788
7,furgoneta_B,1.529274
8,motocicleta_A,0.356309
9,motocicleta_B,0.037482


# Testing coefficients for real data from 2013 and 2017

In [32]:
# It should work since the coefficients were computed using those years, but we'll test it to see that everything
# works fine and we did no mistakes.

In [33]:
tabla2013 = accidents[accidents['time_range'].dt.year == 2013]
tabla2013 = tabla2013.groupby(['tipo_vehiculo','zone']).agg({'n_accidents':sum})
tabla2013 = tabla2013.reset_index()

tabla2017 = accidents[accidents['time_range'].dt.year == 2017]
tabla2017 = tabla2017.groupby(['tipo_vehiculo','zone']).agg({'n_accidents':sum})
tabla2017 = tabla2017.reset_index()

for i,row in tabla2013.iterrows():
    if row['zone'] == 'A':
        tabla2013.loc[i,'prop_accidents'] = row['n_accidents']/sum(tabla2013[tabla2013['zone']=='A']['n_accidents'])
    else:
        tabla2013.loc[i,'prop_accidents'] = row['n_accidents']/sum(tabla2013[tabla2013['zone']=='B']['n_accidents'])
        
for i,row in tabla2017.iterrows():
    if row['zone'] == 'A':
        tabla2017.loc[i,'prop_accidents'] = row['n_accidents']/sum(tabla2017[tabla2017['zone']=='A']['n_accidents'])
    else:
        tabla2017.loc[i,'prop_accidents'] = row['n_accidents']/sum(tabla2017[tabla2017['zone']=='B']['n_accidents'])

In [34]:
tabla2013['expected_real_prop'] = tabla2013['prop_accidents']*coefficients['Coefficient']
tabla2017['expected_real_prop'] = tabla2017['prop_accidents']*coefficients['Coefficient']

# Normalizating expected proportions:
for i,row in tabla2013.iterrows():
    if row['zone']== 'A':
        tabla2013.loc[i,'normalized'] = row['expected_real_prop']/sum(tabla2013[tabla2013['zone']=='A']['expected_real_prop'])
    else:
        tabla2013.loc[i,'normalized'] = row['expected_real_prop']/sum(tabla2013[tabla2013['zone']=='B']['expected_real_prop'])

for i,row in tabla2017.iterrows():
    if row['zone']== 'A':
        tabla2017.loc[i,'normalized'] = row['expected_real_prop']/sum(tabla2017[tabla2017['zone']=='A']['expected_real_prop'])
    else:
        tabla2017.loc[i,'normalized'] = row['expected_real_prop']/sum(tabla2017[tabla2017['zone']=='B']['expected_real_prop'])

## Computing circulating vehicles.

In [35]:
# Merging coeficients and accidents
accidents=pd.merge(left = accidents, right = coefficients, how = 'inner', left_on = 'key', right_on = 'key')

In [36]:
# Computing the proportion of circulating vehicles with our approximation
accidents['pre_circulating'] = accidents['prop_accidents']*accidents['Coefficient']

In [37]:
# Normalizating expected proportions (they should sum to 1 in each zone):
for i,row in accidents.iterrows():
    if row['zone']== 'A':
        accidents.loc[i,'circulating'] = row['pre_circulating']/sum(accidents[(accidents['zone']=='A')&(accidents['time_range']==row['time_range'])]['pre_circulating'])
    else:
        accidents.loc[i,'circulating'] = row['pre_circulating']/sum(accidents[(accidents['zone']=='B')&(accidents['time_range']==row['time_range'])]['pre_circulating'])


In [38]:
# Creating a list with the "keys" [vehicle-type_zone] 
vehicles = ['autobus','camion','ciclomotor','furgoneta','motocicleta','taxi','turismo']
keys = []
for i,vehicle in enumerate(vehicles):
    keys.append(vehicle+"_A")
    keys.append(vehicle+"_B")

# Defining the columns for the new dataframe (we want to have one time range as a single row)
columns = ['time_range']
columns = columns + keys

In [39]:
# Defining a new dataframe to change the format of our table (there must be an easier way to do this)

accidents_T = pd.DataFrame([[0]*len(columns)],columns=columns)
# Computing the list of weeks we have (each one will be a row)
time_ranges = pd.to_datetime(accidents.time_range.unique())

# Iterating for each time range
for i,trange in enumerate(time_ranges):
    accidents_T.loc[i,'time_range'] = trange # writing the time range
    
    # Iterating for each vehicle-type (each one will be a column) 
    for key in keys:
        # If there is data of circulating vehicles for this time range and this key, we write the data in its column
        if len(accidents.loc[(accidents['time_range']==trange)&(accidents['key']==key),'circulating'])>0:
            accidents_T.loc[i,key] = accidents.loc[(accidents['time_range']==trange)&(accidents['key']==key),'circulating'].values[0]
        # If there are no data of accidents for this time range and this key, we write a 0.
        else:
            accidents_T.loc[i,key] = 0

# We convert back to datetime so that the date is properly displayed
accidents_T['time_range'] = pd.to_datetime(accidents_T['time_range'])

accidents_T = accidents_T.sort_values(by='time_range').reset_index(drop=True)

In [40]:
accidents_T.head()

Unnamed: 0,time_range,autobus_A,autobus_B,camion_A,camion_B,ciclomotor_A,ciclomotor_B,furgoneta_A,furgoneta_B,motocicleta_A,motocicleta_B,taxi_A,taxi_B,turismo_A,turismo_B
0,2013-01-01,0.011827,0.078785,0.0,0.0,0.002439,0.0,0.048937,0.166047,0.075378,0.002713,0.079218,0.0,0.7822,0.752455
1,2013-01-16,0.017663,0.142478,0.019685,0.013783,0.004371,0.0,0.064656,0.050048,0.071103,0.001636,0.122865,0.051983,0.699657,0.740072
2,2013-01-31,0.013101,0.0,0.025309,0.022581,0.004539,0.0,0.059634,0.163989,0.08702,0.004689,0.114085,0.085165,0.696311,0.723576
3,2013-02-15,0.012305,0.0,0.02431,0.115102,0.00526,0.0,0.023143,0.111456,0.06229,0.006829,0.101152,0.028941,0.771541,0.737671
4,2013-03-02,0.012537,0.0,0.010091,0.0,0.004137,0.0,0.056199,0.022189,0.059578,0.002175,0.129463,0.023047,0.727996,0.952589


## Saving data

In [41]:
accidents_T.to_csv('./accidentes/clean_accidents.csv',index=False)