In [None]:
import json
import pandas as pd
import requests

# Effettua la richiesta GET all'API TfL
url = "https://api.tfl.gov.uk/AccidentStats/2019"  # Cambia l'anno a seconda dei dati che vuoi
response = requests.get(url)

# Carica la risposta in formato JSON
data = response.json()

# Converte il JSON in un DataFrame di pandas
df = pd.json_normalize(data)

# Salva il DataFrame in formato CSV
df.to_csv('incidenti_tfl_2019.csv', index=False)

In [None]:
df.head()

Unnamed: 0,$type,id,lat,lon,location,date,severity,borough,casualties,vehicles
0,Tfl.Api.Presentation.Entities.AccidentStats.Ac...,345906,51.511963,-0.028211,On Commercial Road Near The Junction With Burd...,2019-01-03T01:20:00Z,Slight,Tower Hamlets,[{'$type': 'Tfl.Api.Presentation.Entities.Acci...,[{'$type': 'Tfl.Api.Presentation.Entities.Acci...
1,Tfl.Api.Presentation.Entities.AccidentStats.Ac...,345907,51.371636,-0.117621,On Purley Way 20 metres north of The Junction ...,2019-01-02T22:45:00Z,Slight,Croydon,[{'$type': 'Tfl.Api.Presentation.Entities.Acci...,[{'$type': 'Tfl.Api.Presentation.Entities.Acci...
2,Tfl.Api.Presentation.Entities.AccidentStats.Ac...,345908,51.514951,-0.072747,On Whitechapel High Street Near The Junction W...,2019-01-02T18:45:00Z,Slight,Tower Hamlets,[{'$type': 'Tfl.Api.Presentation.Entities.Acci...,[{'$type': 'Tfl.Api.Presentation.Entities.Acci...
3,Tfl.Api.Presentation.Entities.AccidentStats.Ac...,345909,51.519173,-0.262356,On western Avenue 10 metres south of The Junct...,2019-01-03T08:41:00Z,Slight,Ealing,[{'$type': 'Tfl.Api.Presentation.Entities.Acci...,[{'$type': 'Tfl.Api.Presentation.Entities.Acci...
4,Tfl.Api.Presentation.Entities.AccidentStats.Ac...,345910,51.565743,-0.136308,On Macdonald Road 30 metres south of The Junct...,2019-01-03T08:37:00Z,Slight,Islington,[{'$type': 'Tfl.Api.Presentation.Entities.Acci...,[{'$type': 'Tfl.Api.Presentation.Entities.Acci...


In [None]:
df.describe()

Unnamed: 0,id,lat,lon
count,50626.0,50626.0,50626.0
mean,544054.0,51.509198,-0.118897
std,185715.647807,0.067383,0.141133
min,345828.0,51.295233,-0.509795
25%,358484.25,51.468225,-0.200805
50%,544054.0,51.514006,-0.114336
75%,729623.75,51.553555,-0.035484
max,742280.0,51.691126,0.307909


In [None]:
df.drop(columns=['$type'], inplace=True)

# converting 'severity' in cardinal indicator
df['severity'] = df['severity'].map({'Slight': 1, 'Serious': 2, 'Fatal': 3})
df.head()

Unnamed: 0,id,lat,lon,location,date,severity,borough,casualties,vehicles
0,345906,51.511963,-0.028211,On Commercial Road Near The Junction With Burd...,2019-01-03T01:20:00Z,1,Tower Hamlets,[{'$type': 'Tfl.Api.Presentation.Entities.Acci...,[{'$type': 'Tfl.Api.Presentation.Entities.Acci...
1,345907,51.371636,-0.117621,On Purley Way 20 metres north of The Junction ...,2019-01-02T22:45:00Z,1,Croydon,[{'$type': 'Tfl.Api.Presentation.Entities.Acci...,[{'$type': 'Tfl.Api.Presentation.Entities.Acci...
2,345908,51.514951,-0.072747,On Whitechapel High Street Near The Junction W...,2019-01-02T18:45:00Z,1,Tower Hamlets,[{'$type': 'Tfl.Api.Presentation.Entities.Acci...,[{'$type': 'Tfl.Api.Presentation.Entities.Acci...
3,345909,51.519173,-0.262356,On western Avenue 10 metres south of The Junct...,2019-01-03T08:41:00Z,1,Ealing,[{'$type': 'Tfl.Api.Presentation.Entities.Acci...,[{'$type': 'Tfl.Api.Presentation.Entities.Acci...
4,345910,51.565743,-0.136308,On Macdonald Road 30 metres south of The Junct...,2019-01-03T08:37:00Z,1,Islington,[{'$type': 'Tfl.Api.Presentation.Entities.Acci...,[{'$type': 'Tfl.Api.Presentation.Entities.Acci...


Now creating a separate dataframe for casualties and one for vehicles, in order to have more readable data.

In [None]:
df_casualties = df.copy()
df_casualties = df_casualties.explode('casualties')
df_casualties.drop(columns=['lat', 'lat', 'lon', 'date', 'location', 'severity', 'borough', 'vehicles'], inplace=True)
df_casualties.head()

Unnamed: 0,id,casualties
0,345906,{'$type': 'Tfl.Api.Presentation.Entities.Accid...
1,345907,{'$type': 'Tfl.Api.Presentation.Entities.Accid...
2,345908,{'$type': 'Tfl.Api.Presentation.Entities.Accid...
3,345909,{'$type': 'Tfl.Api.Presentation.Entities.Accid...
4,345910,{'$type': 'Tfl.Api.Presentation.Entities.Accid...


In [None]:
type(df_casualties['casualties'].values)
df_casualties_normalized = pd.json_normalize(df_casualties['casualties'].values)
df_casualties_normalized.drop(columns=['$type'], inplace=True)
df_casualties_normalized.head()

Unnamed: 0,age,class,severity,mode,ageBand
0,27.0,Driver,Slight,Car,Adult
1,42.0,Driver,Slight,Car,Adult
2,24.0,Driver,Slight,PedalCycle,Adult
3,48.0,Pedestrian,Slight,Pedestrian,Adult
4,18.0,Driver,Slight,PoweredTwoWheeler,Adult


In [None]:
df_casualties = df_casualties.join(df_casualties_normalized)
df_casualties.drop(columns=['casualties'], inplace=True)
df_casualties.head()

Unnamed: 0,id,age,class,severity,mode,ageBand
0,345906,27.0,Driver,Slight,Car,Adult
1,345907,42.0,Driver,Slight,Car,Adult
2,345908,24.0,Driver,Slight,PedalCycle,Adult
3,345909,48.0,Pedestrian,Slight,Pedestrian,Adult
4,345910,18.0,Driver,Slight,PoweredTwoWheeler,Adult


In [None]:
df_vehicles = df.copy()
df_vehicles = df_vehicles.explode('vehicles')
df_vehicles.drop(columns=['lat', 'lat', 'lon', 'date', 'location', 'severity', 'borough', 'casualties'], inplace=True)

df_vehicles_normalized = pd.json_normalize(df_vehicles['vehicles'].values)
df_vehicles_normalized.drop(columns=['$type'], inplace=True)

df_vehicles = df_vehicles.join(df_vehicles_normalized)
df_vehicles.drop(columns=['vehicles'], inplace=True)
df_vehicles.head()

Unnamed: 0,id,type
0,345906,Car
0,345906,Car
1,345907,Car
1,345907,Car
2,345908,Car


In [None]:
# collapse row with same id, add column with number of vehicles, add column with list of vehicles types
# it also sorts indexes idk why
df_vehicles = df_vehicles.groupby(['id']).agg({'type': lambda x: list(x)}).reset_index()
df_vehicles['num_vehicles'] = df_vehicles['type'].apply(lambda x: len(x))
df_vehicles

Unnamed: 0,id,type,num_vehicles
0,345828,"[LightGoodsVehicle, LightGoodsVehicle]",2
1,345829,"[PedalCycle, PedalCycle]",2
2,345830,"[Car, Car]",2
3,345831,[PedalCycle],1
4,345832,"[Car, Car]",2
...,...,...,...
50621,742276,[Car],1
50622,742277,[Car],1
50623,742278,"[Car, Car]",2
50624,742279,"[OtherMotorVehicle, OtherMotorVehicle]",2




In [None]:
df['borough'].unique()


array(['Tower Hamlets', 'Croydon', 'Ealing', 'Islington',
       'Barking and Dagenham', 'Wandsworth', 'Hounslow',
       'Richmond upon Thames', 'City of Westminster',
       'Kensington and Chelsea', 'Newham', 'Bexley', 'Southwark',
       'Lewisham', 'Havering', 'Merton', 'Bromley', 'Kingston',
       'Redbridge', 'Brent', 'Lambeth', 'Hillingdon', 'Enfield',
       'Waltham Forest', 'Barnet', 'Camden', 'Harrow', 'Hackney',
       'Sutton', 'Hammersmith and Fulham', 'Haringey', 'Greenwich',
       'City of London'], dtype=object)

In [None]:
null_data = df[df.isnull().any(axis=1)]
print(null_data)

Empty DataFrame
Columns: [id, lat, lon, location, date, severity, borough, casualties, vehicles]
Index: []


In [None]:
print(len(data))
exmpl = {
    "$type": "Tfl.Api.Presentation.Entities.AccidentStats.AccidentDetail, Tfl.Api.Presentation.Entities",
    "id": 345979,
    "lat": 51.570865,
    "lon": -0.231959,
    "location": "On Edgware Road Near The Junction With north Circular Road",
    "date": "2019-01-04T21:22:00Z",
    "severity": "Slight",
    "borough": "Barnet",
    "casualties": [{
        "$type": "Tfl.Api.Presentation.Entities.AccidentStats.Casualty, Tfl.Api.Presentation.Entities",
        "age": 20,
        "class": "Driver",
        "severity": "Slight",
        "mode": "PoweredTwoWheeler",
        "ageBand": "Adult"
    }],
    "vehicles": [{
        "$type": "Tfl.Api.Presentation.Entities.AccidentStats.Vehicle, Tfl.Api.Presentation.Entities",
        "type": "Motorcycle_500cc_Plus"
    }, {
        "$type": "Tfl.Api.Presentation.Entities.AccidentStats.Vehicle, Tfl.Api.Presentation.Entities",
        "type": "Car"
    }]
}

7


# NOTES SECTION

## 3 OPTIONS:

* Forcasting
 * Determine out of the dataset what would happen
 * Predict accidents most likely
 * we could use past year data and compare directly with more recent data

* Corelating variables
 * Finding relationship betweetn different variables
 * expanding knowledge base to variables such as
 * Street direction, speed limit, street light, roundabout presence
 * tackle problems that normal statistics patterns or resarch can struggle with
 * Anomaly detection


* Optimization
 * Optimize the dataset
 * Most likely determining the best way to have the London transportation system
 * A simulation, hard to determine results




We could study what subjects are more at risk of severe accidents

What parts of the city are the most at risk

For insurance purposes


IDEA:
We could study the correlation between variables and accidents -> building a cost function to determine the reliability of a certain subject
Could be useful for INSURANCE COMPANIES