## COMP47670 Assignment 1

In [599]:
import os
import csv
import pandas
import json
import requests
import plotly.express as px
from requests.adapters import HTTPAdapter
from urllib3.util import Retry

### Formula 1 API

The API I have chosen to use for this project is: 
http://ergast.com/api/f1/
The API documentation is located here - https://ergast.com/mrd/

This API provides data about formula 1 since the 1950 season. 
The API does not require any form of authentication. 

### Collecting the data

In [600]:
def get_data_from_api(api_options='', api_limit='', offset=''):
    """
    This function calls the formula 1 api with
    options and a result limit passed as parameters.
    It loads the data using json and
    returns a dictionary. 
    
    param: api_options -> string
    param: api_limit -> string
    return: data -> dict
    """
    response = None
    data = None
    api_base_url = "http://ergast.com/api/f1/"

    session = requests.Session()
    retries = Retry(total=3, backoff_factor=1)
    api_url = api_base_url + api_options + api_limit + offset
    session.mount(
        prefix='http://', 
        adapter=HTTPAdapter(
        max_retries=retries))
    
    try:
        response = session.get(url=api_url, verify=False)
    except requests.RequestException:
        print("Could not connect to {}".format(api_url))

    if response:
        try:
            result = response.text
            data = json.loads(result,encoding='UTF-8')
        except (ValueError, KeyError) as error:
            print("Could not load json due to: {}".format(error))
    return data

In [601]:
def save_raw_data_to_file(data, destination_file):
    """
    This function writes json data to a file. 
    :param data:
    :param destination_file:
    """
    with open(file=destination_file, mode='w', encoding='UTF-8') as json_file:
        json.dump(data, json_file)

In [602]:
def read_json_file(file_path):
    """
    This function reads a json file and returns a dictionary
    :param file_path -> String
    :return data -> dict
    """
    try:
        with open(file_path) as file:
            data = json.load(file, encoding='UTF-8')
    except FileNotFoundError as error:
        print("Can not open file due to: {}".format(error))
        return None
    
    return data

In [603]:
def get_finishers_data(data):
    """
This function parses the raw data pulled from the api
into relevant race data that can be further analysed.

param: data -> dict
return: race_data -> dict
"""


    # creates a dictonary compatible with a pandas dataframe
    race_data = {
        "Date": [],
        "Season": [],
        "Race": [],
        "Laps": [],
        "Constructor": [],
        "Constructor_Nationality": [],
        "Driver": [],
        "Driver_Nationality": [],
        "Position" : [],
        "Status" : [],
        "Time": [],
        "Time_mins": []
    }
    
    try:
        races = data["MRData"]["RaceTable"]["Races"]
    except(KeyError, ValueError) as error:
        print("Could not parse race data due to: {}".format(error))
        return None

    for race in races:

        for result in race["Results"]:
            try:
                race_data["Laps"].append(int(result["laps"]))
            except(KeyError, ValueError):
                race_data["Laps"].append(None)

            try:
                race_data["Constructor"].append(result["Constructor"]["name"])
            except(KeyError, ValueError):
                race_data["Constructor"].append(None)

            try:
                race_data["Constructor_Nationality"].append(result["Constructor"]["nationality"])
            except(KeyError, ValueError):
                race_data["Constructor_Nationality"].append(None)

            try:
                race_data["Driver"].append("{0} {1}".format(result["Driver"]["givenName"],
                                               result["Driver"]["familyName"]))
            except(KeyError, ValueError):
                race_data["Driver"].append(None)

            try:
                race_data["Driver_Nationality"].append(result["Driver"]["nationality"])
            except(KeyError, ValueError):
                race_data["Driver_Nationality"].append(None)
            
            try:
                race_data["Position"].append(result["position"])
            except(KeyError, ValueError):
                race_data["Driver_Nationality"].append(None)
            
            try:
                race_data["Status"].append(result["status"])
            except(KeyError, ValueError):
                race_data["Status"].append(None)

            try:
                race_data["Time"].append(result["Time"]["time"])
            except(KeyError, ValueError):
                race_data["Time"].append(None)

            try:
                race_data["Time_mins"].append(int(result["Time"]["millis"]) / 60000)
            except(KeyError, ValueError):
                race_data["Time_mins"].append(None)
            
            try:
                race_data["Date"].append(race["date"])
            except(KeyError, ValueError):
                race_data["Date"].append(None)

            try:
                race_data["Season"].append(int(race["season"]))
            except(KeyError, ValueError):
                race_data["Season"].append(None)

            try:
                race_data["Race"].append(race["raceName"])
            except(KeyError, ValueError):
                race_data["Race"].append(None)

    return race_data

In [604]:
sample_data = get_data_from_api(
              api_options='status/1/results.json',
              api_limit='?limit=1000')

save_raw_data_to_file(sample_data, "/Users/nick.duggan/workspace/Notebooks/formula1_sample_finishers_race_data.json")

# Create empty data frame
finishers_race_df = pandas.DataFrame()

offset = 0
while offset <= 6000:
    f1_finishers = get_data_from_api(
        api_options='status/1/results.json',
        api_limit='?limit=1000',
        offset='&offset={0}'.format(offset))
    
    finishers_race_data = get_finishers_data(f1_finishers)

    temp_df = pandas.DataFrame(finishers_race_data)
    
    finishers_race_df = finishers_race_df.append(temp_df)

    offset=offset+1000

finishers_race_df.to_csv("/Users/nick.duggan/workspace/Notebooks/formula1_finishers_race_data.csv")

finishers_race_df[0:5]

Unnamed: 0,Date,Season,Race,Laps,Constructor,Constructor_Nationality,Driver,Driver_Nationality,Position,Status,Time,Time_mins
0,1950-05-13,1950,British Grand Prix,70,Alfa Romeo,Italian,Nino Farina,Italian,1,Finished,2:13:23.6,133.393333
1,1950-05-13,1950,British Grand Prix,70,Alfa Romeo,Italian,Luigi Fagioli,Italian,2,Finished,+2.6,133.436667
2,1950-05-13,1950,British Grand Prix,70,Alfa Romeo,Italian,Reg Parnell,British,3,Finished,+52.0,134.26
3,1950-05-21,1950,Monaco Grand Prix,100,Alfa Romeo,Italian,Juan Fangio,Argentine,1,Finished,3:13:18.7,193.311667
4,1950-05-30,1950,Indianapolis 500,138,Kurtis Kraft,American,Johnnie Parsons,American,1,Finished,2:46:55.97,166.932833


In [605]:
monaco_finishers = finishers_race_df[["Date",
                  "Season",
                  "Race",
                  "Driver",
                  "Laps",
                  "Position",
                  "Time_mins"]].query(
                  "Race == 'Monaco Grand Prix'").sort_values(
                  by="Time_mins",
                  ascending=False)
result.to_csv("/Users/nick.duggan/workspace/Notebooks/merged_data.csv")

monaco_finishers[0:5]

Unnamed: 0,Date,Season,Race,Driver,Laps,Position,Time_mins
3,1950-05-21,1950,Monaco Grand Prix,Juan Fangio,100,1,193.311667
212,1957-05-19,1957,Monaco Grand Prix,Tony Brooks,105,2,190.633333
211,1957-05-19,1957,Monaco Grand Prix,Juan Fangio,105,1,190.213333
183,1956-05-13,1956,Monaco Grand Prix,Peter Collins,100,2,180.65
184,1956-05-13,1956,Monaco Grand Prix,Juan Fangio,100,2,180.65


In [606]:
fig3 = px.scatter(monaco_finishers, x = 'Season', y = 'Time_mins',
                 title="Formula1 finisher's times at the Monaco Grand Prix",
                 hover_name='Driver', color="Laps",hover_data=["Position"],
                 trendline="lowess", trendline_color_override="turquoise")
fig3.show()

In [607]:
def get_weather_data(data):
    """
    """
    
    weather_data = {
        "Date" : [],
        "Weather" : []
    }
    for i in data:
        if i["dt_iso"].split().pop(1) == "15:00:00":
            weather_data["Date"].append(i["dt_iso"].split().pop(0))
            weather_data["Weather"].append(i["weather"][0]["main"])

    return pandas.DataFrame(weather_data)

weather_file=read_json_file("/Users/nick.duggan/workspace/Notebooks/weather_format.json")

weather_data = get_weather_data(weather_file)

weather_data[0:5]

Unnamed: 0,Date,Weather
0,1979-01-01,Clear
1,1979-01-02,Snow
2,1979-01-03,Clouds
3,1979-01-04,Clouds
4,1979-01-05,Clouds


In [608]:
result = pandas.merge(monaco_finishers,
                      weather_data,
                      how='inner',
                      on=['Date']).sort_values(by="Season",
                      ascending=True).reset_index(drop=True)
result.to_csv("/Users/nick.duggan/workspace/Notebooks/merged_data.csv")
result[0:5]

Unnamed: 0,Date,Season,Race,Driver,Laps,Position,Time_mins,Weather
0,1979-05-27,1979,Monaco Grand Prix,Clay Regazzoni,76,2,115.382,Clouds
1,1979-05-27,1979,Monaco Grand Prix,Jody Scheckter,76,1,115.374667,Clouds
2,1979-05-27,1979,Monaco Grand Prix,Carlos Reutemann,76,3,115.5175,Clouds
3,1979-05-27,1979,Monaco Grand Prix,John Watson,76,4,116.063167,Clouds
4,1980-05-18,1980,Monaco Grand Prix,Carlos Reutemann,76,1,115.57275,Clouds


In [609]:
fig4 = px.scatter(result, x = 'Season', y = 'Time_mins',
                 title="Formula1 winner's times at the Monaco Grand Prix",
                 hover_name='Driver', color="Laps",facet_col="Weather")
fig4.update_layout(plot_bgcolor='rgb(185,185,185)')
fig4.show()

In [610]:
collision_data = get_data_from_api(
              api_options='status/4/results.json',
              api_limit='?limit=1000')

save_raw_data_to_file(collision_data, "/Users/nick.duggan/workspace/Notebooks/formula1_collision_data.json")

collisions = get_finishers_data(read_json_file("/Users/nick.duggan/workspace/Notebooks/formula1_collision_data.json"))

collisions_df = pandas.DataFrame(collisions)

race_group_df = collisions_df.groupby("Race").size().reset_index(name='Collisions')

race_group_df[0:5]

Unnamed: 0,Race,Collisions
0,Abu Dhabi Grand Prix,9
1,Argentine Grand Prix,21
2,Australian Grand Prix,64
3,Austrian Grand Prix,27
4,Azerbaijan Grand Prix,6


In [611]:
fig = px.bar(race_group_df, x='Race', y='Collisions')
fig.show()

In [612]:
monaco_collisions = collisions_df[["Date",
                  "Season",
                  "Race",
                  "Driver",
                  "Laps",
                  "Position",
                  "Time_mins"]].query(
                  "Race == 'Monaco Grand Prix'").sort_values(
                  by="Time_mins",
                  ascending=False)

collisions_weather_data = pandas.merge(monaco_collisions,
                      weather_data,
                      how='inner',
                      on=['Date'])

collisions_weather_data[0:5]

Unnamed: 0,Date,Season,Race,Driver,Laps,Position,Time_mins,Weather
0,1981-05-31,1981,Monaco Grand Prix,Michele Alboreto,50,10,,Clouds
1,1981-05-31,1981,Monaco Grand Prix,Bruno Giacomelli,50,11,,Clouds
2,1981-05-31,1981,Monaco Grand Prix,Andrea de Cesaris,0,19,,Clouds
3,1981-05-31,1981,Monaco Grand Prix,Mario Andretti,0,20,,Clouds
4,1982-05-23,1982,Monaco Grand Prix,Keke Rosberg,64,11,,Clouds


In [613]:
weather_group_df = collisions_weather_data.groupby("Weather").size().reset_index(name='Collisions')

Unnamed: 0,Weather,Collisions
0,Clouds,50
1,Rain,8
2,Thunderstorm,1


In [614]:
fig6 = px.bar(weather_group_df, x='Weather', y='Collisions')
fig6.show()