# Tfl Accident Stats

*by Anas Razak*

Transport for London (tfl). accident stats 2019-2021

## Fetching data from API

import packages

In [2]:
# importing libraries; requests, time, json
import requests
import time
import json

In [7]:
def acc_stats_year(year):
    '''
    Each query is made for a specific year. Luckily, the url has the same format, thus we can write as in below. We use requests.get to make the query to the API server.
    '''
    url = 'https://api.tfl.gov.uk/AccidentStats/' + str(year)
    r = requests.get(url)
    data = r.json()

    '''
    In this function, we want to generate 3 separate json format files of data.
    accident_{year} : contains the general info of each accident (list1)
    accident_{year}_casualties : contains descriptions of individuals involved in each accident (list2)
    accident_{year}_vehicles : contains descriptions of vehicles involved in each accident (list3)
    '''
    list1 = []
    list2 = []
    list3 = []

    # collecting real-time time before the loop starts
    t1 = time.perf_counter()

    # type(data) is a list, whose elements are accident occurs during that year. We iterate throughout the list to collect and distribute into appropriate groups.
    for accident in data:
        accident_casualties, accident_vehicles = accident['casualties'], accident['vehicles']

        n, m = len(accident_casualties), len(accident_vehicles)

        # removing unnecessary part of the data.
        for i in range(n):
            del accident_casualties[i]['$type']
        
        for j in range(m):
            del accident_vehicles[j]['$type']

        # defining dictionaries that corresponds to each group of data.
        info1 = {
            'id': accident['id'],
            'lat': accident['lat'],
            'lon': accident['lon'],
            'location': accident['location'],
            'date': accident['date'],
            'severity': accident['severity'],
            'borough' : accident['borough']
        }
        
        list1.append(info1)

        info2 = {
            'id': accident['id'],
            'casualties': accident_casualties
        }

        list2.append(info2)

        info3 = {
            'id': accident['id'],
            'vehicles': accident_vehicles
        }

        list3.append(info3)

        # putting a resting time between each query, to avoid data rate limit and traffic.
        time.sleep(r.elapsed.total_seconds())

        print(f"Got accident {accident['id']} info in {r.elapsed.total_seconds()}")
    
    # collecting the real-time of when the loop finished
    t2 = time.perf_counter()

    print(f"Finished looping for {t2-t1} seconds")

    # converting list1,list2,list3 into json format files
    with open(f'accident_{year}.json', 'w') as f:
        json.dump(list1, f, indent=2)

    with open(f'accident_{year}_casualties.json', 'w') as h:
        json.dump(list2, h, indent=2)
    
    with open(f'accident_{year}_vehicles.json', 'w') as k:
        json.dump(list3, k, indent=2)

In [4]:
acc_stats_year(2019)

Got accident 345906 info in 0.222191
Got accident 345907 info in 0.222191
Got accident 345908 info in 0.222191
Got accident 345909 info in 0.222191
Got accident 345910 info in 0.222191
Got accident 345911 info in 0.222191
Got accident 345912 info in 0.222191
Got accident 345913 info in 0.222191
Got accident 345914 info in 0.222191
Got accident 345915 info in 0.222191
Got accident 345916 info in 0.222191
Got accident 345917 info in 0.222191
Got accident 345918 info in 0.222191
Got accident 345919 info in 0.222191
Got accident 345920 info in 0.222191
Got accident 345921 info in 0.222191
Got accident 345922 info in 0.222191
Got accident 345923 info in 0.222191
Got accident 345924 info in 0.222191
Got accident 345925 info in 0.222191
Got accident 345926 info in 0.222191
Got accident 345927 info in 0.222191
Got accident 345928 info in 0.222191
Got accident 345929 info in 0.222191
Got accident 345930 info in 0.222191
Got accident 345931 info in 0.222191
Got accident 345932 info in 0.222191
G

In [None]:
acc_stats_year(2018)

In [None]:
acc_stats_year(2017)

In [None]:
acc_stats_year(2016)