# Dashboard for F1 Data Insights

### Import Packages

In [69]:
import numpy as np
import pandas as pd
import dash
import dash_core_components as dcc
import dash_html_components as html
import plotly.graph_objs as go
import plotly.express as px

# View all rows and columns of a dataframe
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

## Loading in Data

In [2]:
# Load Drivers data
drivers_df = pd.read_csv("./f1db_csv/drivers.csv").drop(columns = "url")
drivers_df

Unnamed: 0,driverId,driverRef,number,code,forename,surname,dob,nationality
0,1,hamilton,44,HAM,Lewis,Hamilton,1985-01-07,British
1,2,heidfeld,\N,HEI,Nick,Heidfeld,1977-05-10,German
2,3,rosberg,6,ROS,Nico,Rosberg,1985-06-27,German
3,4,alonso,14,ALO,Fernando,Alonso,1981-07-29,Spanish
4,5,kovalainen,\N,KOV,Heikki,Kovalainen,1981-10-19,Finnish
5,6,nakajima,\N,NAK,Kazuki,Nakajima,1985-01-11,Japanese
6,7,bourdais,\N,BOU,Sébastien,Bourdais,1979-02-28,French
7,8,raikkonen,7,RAI,Kimi,Räikkönen,1979-10-17,Finnish
8,9,kubica,88,KUB,Robert,Kubica,1984-12-07,Polish
9,10,glock,\N,GLO,Timo,Glock,1982-03-18,German


In [24]:
# Load lap times data
lap_times_df = pd.read_csv("./f1db_csv/lap_times.csv")
lap_times_df = lap_times_df[(lap_times_df.raceId == 1034) | (lap_times_df.raceId == 1033)]
lap_times_df

Unnamed: 0,raceId,driverId,lap,position,time,milliseconds
474870,1033,1,1,1,1:35.670,95670
474871,1033,1,2,1,1:31.458,91458
474872,1033,1,3,1,1:34.916,94916
474873,1033,1,4,3,1:53.821,113821
474874,1033,1,5,1,1:25.898,85898
474875,1033,1,6,1,1:23.991,83991
474876,1033,1,7,1,1:23.327,83327
474877,1033,1,8,1,1:22.170,82170
474878,1033,1,9,1,1:22.130,82130
474879,1033,1,10,1,1:21.522,81522


In [13]:
# Load results data
results_df = pd.read_csv("./f1db_csv/results.csv")
results_df = results_df[(results_df.raceId == 1034) | (results_df.raceId == 1033)]
results_df
# Filter to only race 1034
# results_1034_df = results_df[results_df.raceId == 1034]

Unnamed: 0,resultId,raceId,driverId,constructorId,number,grid,position,positionText,positionOrder,points,laps,time,milliseconds,fastestLap,rank,fastestLapTime,fastestLapSpeed,statusId
24660,24666,1033,1,131,44,1,1,1,1,26.0,70,1:36:12.473,5772473,70,1,1:16.627,205.823,1
24661,24667,1033,830,9,33,7,2,2,2,18.0,70,+8.702,5781175,60,4,1:19.184,199.176,1
24662,24668,1033,822,131,77,2,3,3,3,15.0,70,+9.452,5781925,66,2,1:17.665,203.072,1
24663,24669,1033,840,211,18,3,4,4,4,12.0,70,+57.579,5830052,68,3,1:18.973,199.708,1
24664,24670,1033,848,9,23,13,5,5,5,10.0,70,+1:18.316,5850789,68,5,1:19.440,198.534,1
24665,24671,1033,20,6,5,5,6,6,6,8.0,69,\N,\N,49,14,1:20.363,196.254,11
24666,24672,1033,815,211,11,4,7,7,7,6.0,69,\N,\N,39,10,1:20.090,196.923,11
24667,24673,1033,817,4,3,11,8,8,8,4.0,69,\N,\N,46,7,1:19.532,198.305,11
24668,24674,1033,832,1,55,9,9,9,9,2.0,69,\N,\N,68,15,1:20.477,195.976,11
24669,24675,1033,825,210,20,16,10,10,10,1.0,69,\N,\N,67,6,1:19.457,198.492,11


In [31]:
# Load constructors names
constructors_df = pd.read_csv("./f1db_csv/constructors.csv")
constructors_df

Unnamed: 0,constructorId,constructorRef,name,nationality,url
0,1,mclaren,McLaren,British,http://en.wikipedia.org/wiki/McLaren
1,2,bmw_sauber,BMW Sauber,German,http://en.wikipedia.org/wiki/BMW_Sauber
2,3,williams,Williams,British,http://en.wikipedia.org/wiki/Williams_Grand_Prix_Engineering
3,4,renault,Renault,French,http://en.wikipedia.org/wiki/Renault_in_Formula_One
4,5,toro_rosso,Toro Rosso,Italian,http://en.wikipedia.org/wiki/Scuderia_Toro_Rosso
5,6,ferrari,Ferrari,Italian,http://en.wikipedia.org/wiki/Scuderia_Ferrari
6,7,toyota,Toyota,Japanese,http://en.wikipedia.org/wiki/Toyota_Racing
7,8,super_aguri,Super Aguri,Japanese,http://en.wikipedia.org/wiki/Super_Aguri_F1
8,9,red_bull,Red Bull,Austrian,http://en.wikipedia.org/wiki/Red_Bull_Racing
9,10,force_india,Force India,Indian,http://en.wikipedia.org/wiki/Racing_Point_Force_India


In [6]:
# Load race ID and names
races_df = pd.read_csv("./f1db_csv/races.csv")

In [25]:
# Some cleaning
clean_lt_df = lap_times_df[["raceId", "driverId", "lap", "milliseconds"]]
clean_lt_df["seconds"] = clean_lt_df.milliseconds / 1000
clean_lt_df = clean_lt_df.drop(columns = "milliseconds")
clean_lt_df

# race_1034_df = clean_lt_df[clean_lt_df.raceId == 1034]
# race_1034_df["seconds"] = race_1034_df.milliseconds / 1000
# race_1034_df = race_1034_df.drop(columns = "milliseconds")
# race_1034_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,raceId,driverId,lap,seconds
474870,1033,1,1,95.67
474871,1033,1,2,91.458
474872,1033,1,3,94.916
474873,1033,1,4,113.821
474874,1033,1,5,85.898
474875,1033,1,6,83.991
474876,1033,1,7,83.327
474877,1033,1,8,82.17
474878,1033,1,9,82.13
474879,1033,1,10,81.522


In [74]:
import pandas as pd

# Import all the data
drivers_df = pd.read_csv("./f1db_csv/drivers.csv").drop(columns = "url")
lap_times_df = pd.read_csv("./f1db_csv/lap_times.csv")
results_df = pd.read_csv("./f1db_csv/results.csv")
constructors_df = pd.read_csv("./f1db_csv/constructors.csv")
races_df = pd.read_csv("./f1db_csv/races.csv")

# Clean some names and create new variables
# drivers_df
drivers_df["number"] = drivers_df["number"].replace({r"\N": None})
drivers_df["driverName"] = drivers_df["forename"].str.cat(drivers_df["surname"],sep = " ")
drivers_df = drivers_df.drop(columns = ["forename", "surname"])

# lap_times_df
clean_lt_df = lap_times_df[["raceId", "driverId", "lap", "milliseconds"]]
clean_lt_df["seconds"] = clean_lt_df.milliseconds / 1000
clean_lt_df = clean_lt_df.drop(columns = "milliseconds")



def create_race_table(year, race_name):
    races_temp = races_df[races_df.year == year]
    race_id = int(races_temp.raceId[races_temp.name == race_name])
    lap_times_1 = clean_lt_df[clean_lt_df.raceId == race_id]
    results_1 = results_df[results_df.raceId == race_id]
    df_1 = pd.merge(drivers_df[["driverId", "driverName", "number"]], lap_times_1, on = "driverId")
    df_2 = pd.merge(df_1, results_1[["resultId", "driverId", "constructorId", "position"]], on = "driverId")
    df_3 = pd.merge(df_2, constructors_df[["constructorId", "constructorRef"]], on = "constructorId")
    df_3["constructorRef"] = df_3["constructorRef"].str.title()
    df_4 = pd.merge(df_3, races_df[["raceId", "year", "name"]], on = "raceId")
    return df_4

create_race_table(2020, "British Grand Prix")

# Intermediate Merging
# df_1 = pd.merge(clean_lt_df, drivers_df[["driverId", "forename", "surname", "number"]], on = ["driverId"])
# df_1["number"] = df_1["number"].replace({r"\N": None})
# df_1["driverName"] = df_1["forename"].str.cat(df_1["surname"],sep = " ")
# df_1 = df_1.drop(columns = ["forename", "surname"])
# df_1
# df_2 = pd.merge(df_1, constructors_df[["constructorId", "constructorRef"]], on = "constructorId")
# df_2
# df_2 = pd.merge(df_1, results_df[["resultId", "driverId", "constructorId", "position"]], on = ["raceId", "driverId"])
# df_2
# df_2["position"] = df_2["position"].replace({r"\N": None})
# df_2["position"] = pd.to_numeric(df_2["position"], downcast = "integer")
# df_3 = pd.merge(df_2, constructors_df[["constructorId", "constructorRef"]], on = "constructorId")
# df_4 = pd.merge(df_3, races_df[["raceId", "year", "name"]], on = "raceId")
# df_4 = df_4.sort_values(["position", "lap"])
# df_4["driverName"] = df_4["forename"].str.cat(df_4["surname"],sep = " ")
# df_4 = df_4.drop(columns = ["forename", "surname"])

# Do some formatting to make the final graph easier to read
# df_4["constructorRef"] = df_4["constructorRef"].str.title()

# Save this as a CSV for the full series of races
# df_4.to_csv("./plotting_data.csv")



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,driverId,driverName,number,raceId,lap,seconds,resultId,constructorId,position,constructorRef,year,name
0,1,Lewis Hamilton,44,1034,1,94.01,24686,131,1,Mercedes,2020,British Grand Prix
1,1,Lewis Hamilton,44,1034,2,126.445,24686,131,1,Mercedes,2020,British Grand Prix
2,1,Lewis Hamilton,44,1034,3,151.201,24686,131,1,Mercedes,2020,British Grand Prix
3,1,Lewis Hamilton,44,1034,4,152.874,24686,131,1,Mercedes,2020,British Grand Prix
4,1,Lewis Hamilton,44,1034,5,138.933,24686,131,1,Mercedes,2020,British Grand Prix
5,1,Lewis Hamilton,44,1034,6,90.533,24686,131,1,Mercedes,2020,British Grand Prix
6,1,Lewis Hamilton,44,1034,7,91.143,24686,131,1,Mercedes,2020,British Grand Prix
7,1,Lewis Hamilton,44,1034,8,90.943,24686,131,1,Mercedes,2020,British Grand Prix
8,1,Lewis Hamilton,44,1034,9,90.868,24686,131,1,Mercedes,2020,British Grand Prix
9,1,Lewis Hamilton,44,1034,10,90.838,24686,131,1,Mercedes,2020,British Grand Prix


In [50]:
# Create table with driver, team, and team color
driver_ref_table = df_4[["raceId", "driverName", "constructorRef"]].drop_duplicates()
driver_ref_table = driver_ref_table.sort_values(by = "constructorRef")
driver_ref_table = driver_ref_table.reset_index(drop = True)
driver_ref_table


Unnamed: 0,raceId,driverName,constructorRef
0,1034,Kimi Räikkönen,Alfa
1,1034,Antonio Giovinazzi,Alfa
2,1034,Daniil Kvyat,Alphatauri
3,1034,Pierre Gasly,Alphatauri
4,1034,Sebastian Vettel,Ferrari
5,1034,Charles Leclerc,Ferrari
6,1034,Romain Grosjean,Haas
7,1034,Kevin Magnussen,Haas
8,1034,Carlos Sainz,Mclaren
9,1034,Lando Norris,Mclaren


In [72]:
fig = px.line(df_4, x = "lap", y = "seconds", color = "driverName", 
        hover_name = "driverName", hover_data = {"driverName" : False, "constructorRef" : True})

fig.show()

In [None]:
color_discrete_map = {"Lewis Hamilton" : "#00D2BE", "bottas" : "#00D2BE", 
                             "max_verstappen" : "#1E41FF", "albon" : "#1E41FF",
                             "leclerc" : "#DC0000", "vettel" : "#DC0000",
                             "sainz" : "#FF8700", "norris" : "#FF8700",
                             "ricciardo" : "#FFF500", "ocon" : "#FFF500",
                             "stroll" : "#F596C8", 
                             "gasly": "#469BFF", "kvyat" : "#469BFF",
                             "raikkonen" : "#9B0000", "giovinazzi" : "#9B0000",
                             "grosjean" : "#F0D787", "kevin_magnussen" : "#F0D787",
                             "latifi" : "white", "russell" : "white"}

NameError: name 'df_4' is not defined