In [1]:
import pandas as pd
import numpy as np
import shutil
import os
import kagglehub
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from ipywidgets import interact, Dropdown
from matplotlib.animation import FuncAnimation
from IPython.display import display, HTML


In [2]:
# Download dataset
path = kagglehub.dataset_download("rohanrao/formula-1-world-championship-1950-2020")

# Define the new directory
new_path = "/home/jovyan/F1 Analysis/formula-1/"
os.makedirs(new_path, exist_ok=True)

# Move dataset to the new directory
for file in os.listdir(path):
    shutil.move(os.path.join(path, file), new_path)

print("Path to dataset files:", new_path)

Path to dataset files: /home/jovyan/F1 Analysis/formula-1/


In [3]:
circuits = pd.read_csv("formula-1/circuits.csv")
constructor_results = pd.read_csv("formula-1/constructor_results.csv")
constructor_standings = pd.read_csv("formula-1/constructor_standings.csv")
constructors = pd.read_csv("formula-1/constructors.csv")
driver_standings = pd.read_csv("formula-1/driver_standings.csv")
drivers = pd.read_csv("formula-1/drivers.csv")
lap_times = pd.read_csv("formula-1/lap_times.csv")
pit_stops = pd.read_csv("formula-1/pit_stops.csv")
qualifying = pd.read_csv("formula-1/qualifying.csv")
races = pd.read_csv("formula-1/races.csv")
results = pd.read_csv("formula-1/results.csv")
seasons = pd.read_csv("formula-1/seasons.csv")
sprint_results = pd.read_csv("formula-1/sprint_results.csv")
status = pd.read_csv("formula-1/status.csv")


In [4]:
circuits.head()

Unnamed: 0,circuitId,circuitRef,name,location,country,lat,lng,alt,url
0,1,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...
1,2,sepang,Sepang International Circuit,Kuala Lumpur,Malaysia,2.76083,101.738,18,http://en.wikipedia.org/wiki/Sepang_Internatio...
2,3,bahrain,Bahrain International Circuit,Sakhir,Bahrain,26.0325,50.5106,7,http://en.wikipedia.org/wiki/Bahrain_Internati...
3,4,catalunya,Circuit de Barcelona-Catalunya,Montmeló,Spain,41.57,2.26111,109,http://en.wikipedia.org/wiki/Circuit_de_Barcel...
4,5,istanbul,Istanbul Park,Istanbul,Turkey,40.9517,29.405,130,http://en.wikipedia.org/wiki/Istanbul_Park


In [5]:
constructor_results.head()

Unnamed: 0,constructorResultsId,raceId,constructorId,points,status
0,1,18,1,14.0,\N
1,2,18,2,8.0,\N
2,3,18,3,9.0,\N
3,4,18,4,5.0,\N
4,5,18,5,2.0,\N


In [6]:
constructor_standings.head()

Unnamed: 0,constructorStandingsId,raceId,constructorId,points,position,positionText,wins
0,1,18,1,14.0,1,1,1
1,2,18,2,8.0,3,3,0
2,3,18,3,9.0,2,2,0
3,4,18,4,5.0,4,4,0
4,5,18,5,2.0,5,5,0


In [7]:
constructors.head()

Unnamed: 0,constructorId,constructorRef,name,nationality,url
0,1,mclaren,McLaren,British,http://en.wikipedia.org/wiki/McLaren
1,2,bmw_sauber,BMW Sauber,German,http://en.wikipedia.org/wiki/BMW_Sauber
2,3,williams,Williams,British,http://en.wikipedia.org/wiki/Williams_Grand_Pr...
3,4,renault,Renault,French,http://en.wikipedia.org/wiki/Renault_in_Formul...
4,5,toro_rosso,Toro Rosso,Italian,http://en.wikipedia.org/wiki/Scuderia_Toro_Rosso


In [8]:
driver_standings.head()

Unnamed: 0,driverStandingsId,raceId,driverId,points,position,positionText,wins
0,1,18,1,10.0,1,1,1
1,2,18,2,8.0,2,2,0
2,3,18,3,6.0,3,3,0
3,4,18,4,5.0,4,4,0
4,5,18,5,4.0,5,5,0


In [9]:
drivers.head()

Unnamed: 0,driverId,driverRef,number,code,forename,surname,dob,nationality,url
0,1,hamilton,44,HAM,Lewis,Hamilton,1985-01-07,British,http://en.wikipedia.org/wiki/Lewis_Hamilton
1,2,heidfeld,\N,HEI,Nick,Heidfeld,1977-05-10,German,http://en.wikipedia.org/wiki/Nick_Heidfeld
2,3,rosberg,6,ROS,Nico,Rosberg,1985-06-27,German,http://en.wikipedia.org/wiki/Nico_Rosberg
3,4,alonso,14,ALO,Fernando,Alonso,1981-07-29,Spanish,http://en.wikipedia.org/wiki/Fernando_Alonso
4,5,kovalainen,\N,KOV,Heikki,Kovalainen,1981-10-19,Finnish,http://en.wikipedia.org/wiki/Heikki_Kovalainen


In [10]:
lap_times.head()

Unnamed: 0,raceId,driverId,lap,position,time,milliseconds
0,841,20,1,1,1:38.109,98109
1,841,20,2,1,1:33.006,93006
2,841,20,3,1,1:32.713,92713
3,841,20,4,1,1:32.803,92803
4,841,20,5,1,1:32.342,92342


In [11]:
results.head()

Unnamed: 0,resultId,raceId,driverId,constructorId,number,grid,position,positionText,positionOrder,points,laps,time,milliseconds,fastestLap,rank,fastestLapTime,fastestLapSpeed,statusId
0,1,18,1,1,22,1,1,1,1,10.0,58,1:34:50.616,5690616,39,2,1:27.452,218.3,1
1,2,18,2,2,3,5,2,2,2,8.0,58,+5.478,5696094,41,3,1:27.739,217.586,1
2,3,18,3,3,7,7,3,3,3,6.0,58,+8.163,5698779,41,5,1:28.090,216.719,1
3,4,18,4,4,5,11,4,4,4,5.0,58,+17.181,5707797,58,7,1:28.603,215.464,1
4,5,18,5,1,23,3,5,5,5,4.0,58,+18.014,5708630,43,1,1:27.418,218.385,1


In [12]:
seasons.head()

Unnamed: 0,year,url
0,2009,http://en.wikipedia.org/wiki/2009_Formula_One_...
1,2008,http://en.wikipedia.org/wiki/2008_Formula_One_...
2,2007,http://en.wikipedia.org/wiki/2007_Formula_One_...
3,2006,http://en.wikipedia.org/wiki/2006_Formula_One_...
4,2005,http://en.wikipedia.org/wiki/2005_Formula_One_...


In [13]:
sprint_results.head()

Unnamed: 0,resultId,raceId,driverId,constructorId,number,grid,position,positionText,positionOrder,points,laps,time,milliseconds,fastestLap,fastestLapTime,statusId
0,1,1061,830,9,33,2,1,1,1,3,17,25:38.426,1538426,14,1:30.013,1
1,2,1061,1,131,44,1,2,2,2,2,17,+1.430,1539856,17,1:29.937,1
2,3,1061,822,131,77,3,3,3,3,1,17,+7.502,1545928,17,1:29.958,1
3,4,1061,844,6,16,4,4,4,4,0,17,+11.278,1549704,16,1:30.163,1
4,5,1061,846,1,4,6,5,5,5,0,17,+24.111,1562537,16,1:30.566,1


In [14]:
status.head()

Unnamed: 0,statusId,status
0,1,Finished
1,2,Disqualified
2,3,Accident
3,4,Collision
4,5,Engine


In [15]:
set(drivers).intersection(driver_standings.columns)

{'driverId'}

In [16]:
drivers_combined = drivers.merge(driver_standings, on="driverId", how = "inner")
drivers_combined.head()

Unnamed: 0,driverId,driverRef,number,code,forename,surname,dob,nationality,url,driverStandingsId,raceId,points,position,positionText,wins
0,1,hamilton,44,HAM,Lewis,Hamilton,1985-01-07,British,http://en.wikipedia.org/wiki/Lewis_Hamilton,1,18,10.0,1,1,1
1,1,hamilton,44,HAM,Lewis,Hamilton,1985-01-07,British,http://en.wikipedia.org/wiki/Lewis_Hamilton,9,19,14.0,1,1,1
2,1,hamilton,44,HAM,Lewis,Hamilton,1985-01-07,British,http://en.wikipedia.org/wiki/Lewis_Hamilton,27,20,14.0,3,3,1
3,1,hamilton,44,HAM,Lewis,Hamilton,1985-01-07,British,http://en.wikipedia.org/wiki/Lewis_Hamilton,48,21,20.0,2,2,1
4,1,hamilton,44,HAM,Lewis,Hamilton,1985-01-07,British,http://en.wikipedia.org/wiki/Lewis_Hamilton,69,22,28.0,3,3,1


In [17]:
drivers_combined = drivers_combined.rename(columns={"url":"driverURL", "postion": "driversChampionshipPosition", "points": "driversChampionshipPoints", "position": ""})

In [18]:
# Merge lap_times with drivers to get driver names
lap_driver = lap_times.merge(drivers[['driverId', 'forename', 'surname']], on='driverId', how='left')

# Merge with races to get race names and year
lap_driver_race = lap_driver.merge(races[['raceId', 'year', 'name']], on='raceId', how='left')

# Example race selection for visualization (first race in the dataset)
selected_race_id = lap_driver_race['raceId'].iloc[0]
race_data = lap_driver_race[lap_driver_race['raceId'] == selected_race_id]

# Sort by lap for visualization
race_data = race_data.sort_values(by=['lap', 'position'])

race_data.head()

Unnamed: 0,raceId,driverId,lap,position,time,milliseconds,forename,surname,year,name
0,841,20,1,1,1:38.109,98109,Sebastian,Vettel,2011,Australian Grand Prix
58,841,1,1,2,1:40.573,100573,Lewis,Hamilton,2011,Australian Grand Prix
116,841,17,1,3,1:41.467,101467,Mark,Webber,2011,Australian Grand Prix
174,841,808,1,4,1:42.835,102835,Vitaly,Petrov,2011,Australian Grand Prix
232,841,13,1,5,1:44.196,104196,Felipe,Massa,2011,Australian Grand Prix


In [19]:
# # Prepare race dropdown options
# valid_race_ids = lap_driver_race['raceId'].unique()  # Races with lap data
# filtered_races = races[races['raceId'].isin(valid_race_ids)]  # Only valid races

# # Prepare race dropdown options
# race_options = filtered_races[['raceId', 'year', 'name']].drop_duplicates()
# race_options['label'] = race_options['year'].astype(str) + " - " + race_options['name']
# race_dict = dict(zip(race_options['label'], race_options['raceId']))

# # Function to create the animated plot
# def animate_race(selected_race_label):
#     selected_race_id = race_dict[selected_race_label]
#     race_data = lap_driver_race[lap_driver_race['raceId'] == selected_race_id].sort_values(by=['lap', 'position'])

#     # Reverse y-axis positions (1 is at the top)
#     drivers_list = race_data[['driverId', 'forename', 'surname']].drop_duplicates()
    
#     # Set up the figure and axis
#     fig, ax = plt.subplots(figsize=(15, 6))
#     ax.set_title(f"Driver Positions Over Laps - {selected_race_label}")
#     ax.set_xlabel("Lap")
#     ax.set_ylabel("Position")

#     # Prepare lines for each driver
#     lines = {}
#     for driver_id in drivers_list['driverId']:
#         driver_name = drivers_list.loc[drivers_list['driverId'] == driver_id, ['forename', 'surname']].iloc[0]
#         lines[driver_id], = ax.plot([], [], label=f"{driver_name['forename'][0]}. {driver_name['surname']}")
    
#     # Place legend outside the plot and adjust layout
#     ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)
#     plt.tight_layout()  # Automatically adjusts to fit the legend
#     ax.set_xlim(1, race_data['lap'].max())
#     ax.set_ylim(0.5, drivers_list.shape[0] + 0.5)
#     ax.invert_yaxis()

#     # Update function for animation
#     def update(frame):
#         current_data = race_data[race_data['lap'] <= frame]
#         for driver_id, line in lines.items():
#             driver_data = current_data[current_data['driverId'] == driver_id]
#             line.set_data(driver_data['lap'], driver_data['position'])
        

#     # Animate the plot
#     ani = FuncAnimation(fig, update, frames=race_data['lap'].unique(), repeat=False, blit=False)
#     plt.close(fig)
#     return HTML(ani.to_jshtml())

# # Interactive widget for race selection
# dropdown = Dropdown(options=sorted(race_dict.keys()), description="Select Race:")
# interact(animate_race, selected_race_label=dropdown)

In [20]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from IPython.display import HTML
from ipywidgets import interact, Dropdown, VBox, Output
from matplotlib import colormaps
from matplotlib.ticker import MultipleLocator

# Step 1: Prepare race dropdown data
valid_race_ids = lap_driver_race['raceId'].unique()  # Races with lap data
filtered_races = races[races['raceId'].isin(valid_race_ids)]  # Only valid races

# Create dropdown options for year and races
years = sorted(filtered_races['year'].unique())  # Extract years
race_options = filtered_races[['raceId', 'year', 'name']].drop_duplicates()
race_options['label'] = race_options['year'].astype(str) + " - " + race_options['name']

# Step 2: Initialize Dropdown Widgets
year_dropdown = Dropdown(options=years, description="Select Year:")
race_dropdown = Dropdown(options=[], description="Select Race:")

# Step 3: Update race dropdown based on selected year
def update_race_dropdown(change):
    selected_year = change['new']
    filtered_race_options = race_options[race_options['year'] == selected_year]
    race_dropdown.options = [(row['name'], row['label']) for _, row in filtered_race_options.iterrows()]
    race_dropdown.value = None  # Reset race selection

year_dropdown.observe(update_race_dropdown, names='value')

# Step 4: Animation Function
race_dict = dict(zip(race_options['label'], race_options['raceId']))

def animate_race(selected_race_label):
    selected_race_id = race_dict[selected_race_label]
    race_data = lap_driver_race[lap_driver_race['raceId'] == selected_race_id].sort_values(by=['lap', 'position'])

    # Reverse y-axis positions
    drivers_list = race_data[['driverId', 'forename', 'surname']].drop_duplicates()
    
    # Set up the figure and axis
    fig, ax = plt.subplots(figsize=(15, 6))
    ax.set_title(f"Driver Positions Over Laps - {selected_race_label}")
    ax.set_xlabel("Lap")
    ax.set_ylabel("Position")
    ax.set_xlim(1, race_data['lap'].max())
    ax.set_ylim(0.5, drivers_list.shape[0]+0.5)
    ax.invert_yaxis()  # Position 1 on top
    ax.yaxis.set_major_locator(MultipleLocator(1))

    cmap = colormaps['tab20'].resampled(len(drivers_list))

    # Prepare lines for each driver
    lines = {}
    for idx, driver_id in enumerate(drivers_list['driverId']):
        driver_name = drivers_list.loc[drivers_list['driverId'] == driver_id, ['forename', 'surname']].iloc[0]
        color = cmap(idx)  # Get a unique color from the colormap
        lines[driver_id], = ax.plot([], [], label=f"{driver_name['forename'][0]}. {driver_name['surname']}", color=color)

    ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)
    plt.tight_layout()

    # Update function for animation
    def update(frame):
        current_data = race_data[race_data['lap'] <= frame]
        for driver_id, line in lines.items():
            driver_data = current_data[current_data['driverId'] == driver_id]
            line.set_data(driver_data['lap'], driver_data['position'])
    
    # Animate the plot
    ani = FuncAnimation(fig, update, frames=race_data['lap'].unique(), repeat=False, blit=False)
    plt.close(fig)
    return HTML(ani.to_jshtml())

# Step 5: Display animation when race is selected
output = Output()

def display_animation(change):
    if change['new']:
        with output:
            output.clear_output()
            display(animate_race(change['new']))

race_dropdown.observe(display_animation, names='value')

# Step 6: Combine widgets into a layout
from ipywidgets import VBox
widgets_box = VBox([year_dropdown, race_dropdown, output])
display(widgets_box)


VBox(children=(Dropdown(description='Select Year:', options=(np.int64(1996), np.int64(1997), np.int64(1998), n…