In [29]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
import seaborn as sns
import scipy.stats as sts
from datetime import datetime

In [30]:
drivers = pd.read_csv('Raw Data/drivers.csv').replace('\\N', np.nan)
drivers['full_name'] = drivers['forename'] + ' ' + drivers['surname']
drivers['dob'] = drivers['dob'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d')) 

constructors = pd.read_csv('Raw Data/constructors.csv').replace('\\N', np.nan).rename(columns={'name': 'constructorName', 'nationality': 'constructorNationality', 'url': 'constructorURL'})
circuits = pd.read_csv('Raw Data/circuits.csv').rename(columns={'name': 'circuit_name', 'url': 'circuit_url'}).replace('\\N', np.nan)

races = pd.read_csv('Raw Data/races.csv').replace('\\N', np.nan).rename(columns={'name': 'race_name', 'url': 'race_url'})

date_cols = [x for x in races.columns if x.find('date') != -1]
time_cols = [x.replace('date', 'time') for x in date_cols]
date_time_cols = [(x,y) for x,y in zip(date_cols, time_cols)]

for (date,time) in date_time_cols:
    mod_time = time
    mod_date = date
    if time[0] != 't':
        mod_time = time.split('_')[-1]
    if date[0] == 'd':
        mod_date = 'race_' + date
    
    races[f'{mod_date}{mod_time}'] = races[date] + " " + races[time]
    races[f'{mod_date}{mod_time}'] = races[f'{mod_date}{mod_time}'].apply(lambda x: datetime.strptime(str(x), '%Y-%m-%d %H:%M:%S') if pd.notna(x) else np.nan)
    
races = races.drop(columns=date_cols + time_cols)

In [99]:
results = pd.read_csv('Raw Data/results.csv').replace('\\N', np.nan)

results = results.merge(races[['raceId','race_name', 'race_datetime', 'year', 'round', 'circuitId']], how='inner', on='raceId')
results = results.merge(constructors[['constructorId','constructorName', 'constructorNationality']], how='inner', on='constructorId')
results = results.merge(drivers[['driverId','full_name', 'nationality', 'code', 'dob']], how='inner', on='driverId')
results['position_numeric'] = pd.to_numeric(results['position'], errors='coerce')

# Seasonal Stats

In [32]:
seasonal_statistics = results.groupby('year').agg(races=pd.NamedAgg('raceId', pd.Series.nunique),
                                                  drivers=pd.NamedAgg('driverId', pd.Series.nunique),
                                                  teams=pd.NamedAgg('constructorId', pd.Series.nunique),
                                                  total_points=pd.NamedAgg('points', 'sum'),
                                                  dnfs=pd.NamedAgg('position', lambda x: x.isna().sum()),
                                                  min_datetime=pd.NamedAgg('race_datetime', 'min'),
                                                  max_datetime=pd.NamedAgg('race_datetime', 'max'),
                                                  nationalities=pd.NamedAgg('nationality', pd.Series.nunique),
                                                  )

seasonal_statistics['drivers/team'] = seasonal_statistics['drivers'] / seasonal_statistics['teams']
seasonal_statistics['points/race'] = seasonal_statistics['total_points'] / seasonal_statistics['races']
seasonal_statistics['seasonal_length_days'] = (seasonal_statistics['max_datetime'] - seasonal_statistics['min_datetime']).dt.days
seasonal_statistics['mean_days_between_races'] = seasonal_statistics['seasonal_length_days'] / seasonal_statistics['races']


seasonal_statistics = seasonal_statistics.reset_index()

In [33]:
df = seasonal_statistics.drop(columns=['min_datetime', 'max_datetime']).fillna(0)
df = pd.melt(df, id_vars=['year'], value_vars=list(df.columns)[1:])

df.to_csv('data4viz/seasonal_stats.csv', index=False)

# Yearly Driver Stats

In [34]:
import numpy as np

results['position_numeric'] = pd.to_numeric(results['position'], errors='coerce')

yearly_driver_rankings = results.groupby(['year', 'full_name']).agg(
    points=pd.NamedAgg('points', 'sum'),
    races=pd.NamedAgg('resultId', 'count'),
    poles=pd.NamedAgg('grid', lambda x: (x == 1).sum()),
    wins=pd.NamedAgg('position_numeric', lambda x: (x == 1).sum()),  # Counting wins where position is 1
    podiums=pd.NamedAgg('position_numeric', lambda x: np.sum((x <= 3) & (~np.isnan(x)))),  # Counting podiums where position is less than or equal to 3
    dnfs=pd.NamedAgg('position', lambda x: x.isna().sum()),
    bestFinPos=pd.NamedAgg('position_numeric', 'min'),
    avgGridPos=pd.NamedAgg('grid', 'mean'),
    avgFinPos=pd.NamedAgg('position_numeric', 'mean')  # Calculating average finishing position excluding NaNs
).sort_values(by=['year', 'points'], ascending=False).reset_index().groupby('year')


groups = []
for idx, rows in yearly_driver_rankings:
    _sorted = rows.sort_values('points', ascending=False)
    _sorted['championshipRank'] =_sorted.points.rank(method='min', na_option='bottom', ascending=False)
    groups.append(_sorted)


yearly_driver_rankings = pd.concat(groups).reset_index()
yearly_points = yearly_driver_rankings.groupby('year').agg(totalPoints=pd.NamedAgg('points', 'sum')).reset_index()
yearly_driver_rankings = yearly_driver_rankings.merge(yearly_points, how='inner', on='year')

yearly_driver_rankings['fracPointsTotal'] = yearly_driver_rankings.points / yearly_driver_rankings.totalPoints  

top_10 = yearly_driver_rankings[yearly_driver_rankings.championshipRank < 10][['year', 'full_name', 'points', 'poles', 'wins', 'podiums', 'dnfs', 'bestFinPos', 'avgGridPos', 'avgFinPos', 'championshipRank', 'fracPointsTotal']]
other = yearly_driver_rankings[yearly_driver_rankings.championshipRank >= 10].groupby('year').agg(
    full_name=pd.NamedAgg('full_name', lambda x: 'Other'),
    points=pd.NamedAgg('points', 'sum'),
    poles=pd.NamedAgg('poles', 'sum'),
    wins=pd.NamedAgg('wins', 'sum'),
    podiums=pd.NamedAgg('podiums', 'sum'),
    dnfs=pd.NamedAgg('dnfs', 'sum'),
    bestFinPos=pd.NamedAgg('bestFinPos', 'max'),
    avgGridPos=pd.NamedAgg('avgGridPos', 'mean'),
    avgFinPos=pd.NamedAgg('avgFinPos', 'mean'),
    championshipRank=pd.NamedAgg('championshipRank', lambda x: 10.0),
    fracPointsTotal=pd.NamedAgg('fracPointsTotal', 'sum')
).reset_index()


yearly_driver_rankings = pd.concat([top_10, other]).sort_values(by=['year', 'championshipRank'], ascending=False)

yearly_driver_rankings



Unnamed: 0,year,full_name,points,poles,wins,podiums,dnfs,bestFinPos,avgGridPos,avgFinPos,championshipRank,fracPointsTotal
73,2023,Other,107.0,0,0,1,15,13.0,12.883333,13.176052,10.0,0.087490
3173,2023,Lance Stroll,41.0,0,0,0,2,4.0,10.250000,8.600000,9.0,0.033524
3172,2023,Lando Norris,66.0,0,0,2,0,2.0,8.500000,10.000000,8.0,0.053966
3171,2023,Carlos Sainz,77.0,0,0,0,1,4.0,5.000000,6.727273,7.0,0.062960
3170,2023,Charles Leclerc,88.0,2,0,3,2,2.0,6.500000,5.900000,6.0,0.071954
...,...,...,...,...,...,...,...,...,...,...,...,...
4,1950,Alberto Ascari,11.0,0,0,2,2,2.0,5.400000,3.000000,5.0,0.065476
3,1950,Louis Rosier,13.0,0,0,2,2,3.0,10.142857,4.200000,4.0,0.077381
2,1950,Juan Fangio,27.0,4,3,3,4,1.0,2.285714,1.000000,3.0,0.160714
1,1950,Luigi Fagioli,28.0,0,0,5,1,2.0,3.500000,2.200000,2.0,0.166667


In [35]:
import plotly.express as px
fig = px.area(yearly_driver_rankings, 
              x="year", 
              y="points", 
              color="championshipRank",
              title="F1 Points in Driver's Championship by Position<br><sup>Rank 10 = remaining drivers' points aggregated</sup>",  
                labels={'year': 'Year','points': 'Points'})

fig.show()

In [36]:
import plotly.express as px
fig = px.area(yearly_driver_rankings, 
              x="year", 
              y="fracPointsTotal", 
              color="championshipRank",
              title="Driver's Championship Classified Order Fraction of Total Points<br><sup>Rank 10 = remaining drivers' points aggregated</sup>",  
                labels={'year': 'Year','fracPointsTotal': 'Fraction of Total Points'})

fig.show()

# Country & Races

In [37]:
country_races = races[['year', 'round', 'circuitId', 'race_name', 'raceId']].merge(circuits, how='inner', on='circuitId').replace('United States', 'USA')
country_races = country_races.groupby(['country', 'circuit_name']).agg(
    races=pd.NamedAgg('raceId', 'count'),
    years=pd.NamedAgg('year', lambda x: list(x)),
    location=pd.NamedAgg('location', 'first'),
    lat=pd.NamedAgg('lat', 'first'),
    lng=pd.NamedAgg('lng', 'first'),
    alt=pd.NamedAgg('alt', 'first'),
).reset_index().groupby('country')


country_stats = []
for idx, rows in country_races:
    if idx.lower() == 'united states':
        idx = 'USA'
    circuit_dicts = rows[rows.columns[1:]].to_dict(orient='records')
    num_races = sum([len(x['years']) for x in circuit_dicts])
    country_stats.append({
        'country': idx,
        'races': num_races,
        'circuits': len(circuit_dicts),
        'circuit_info': circuit_dicts
    })
    
import json
with open('data4viz/races_per_country.json', 'w', encoding='utf-8') as f:
    json.dump(country_stats, f, ensure_ascii=False, indent=4)

# Race Calendar

In [38]:
import pycountry_convert as pc

race_calendar = results[['year', 'round', 'circuitId']].drop_duplicates().merge(circuits, how='inner', on='circuitId')

races_circuits = race_calendar.groupby('circuit_name').agg(
                races=pd.NamedAgg('year', 'count'),
                country=pd.NamedAgg('country', 'first'),
                lat=pd.NamedAgg('lat', 'first'),
                lng=pd.NamedAgg('lng', 'first'),
                alt=pd.NamedAgg('alt', 'first'),
                firstRace=pd.NamedAgg('year', 'min'),
                lastRace=pd.NamedAgg('year', 'max')
                ).reset_index()


def country_to_continent(country_name):
    try:
        # Convert country name to country code (ISO Alpha-2)
        country_code = pc.country_name_to_country_alpha2(country_name)
        # Convert country code to continent code
        continent_code = pc.country_alpha2_to_continent_code(country_code)
        # Convert continent code to continent name
        continent_name = pc.convert_continent_code_to_continent_name(continent_code)
        return continent_name
    except:
        print(country_name)
        # Return None or a default value if country name is not found
        return None

races_circuits = races_circuits.replace({'UK': 'United Kingdom', 'Korea': 'South Korea', 'UAE': 'United Arab Emirates'})
races_circuits['continent'] = races_circuits['country'].apply(country_to_continent)

races_circuits

Unnamed: 0,circuit_name,races,country,lat,lng,alt,firstRace,lastRace,continent
0,AVUS,1,Germany,52.4806,13.251400,53,1959,1959,Europe
1,Adelaide Street Circuit,11,Australia,-34.9272,138.617000,58,1985,1995,Oceania
2,Ain Diab,1,Morocco,33.5786,-7.687500,19,1958,1958,Africa
3,Aintree,5,United Kingdom,53.4769,-2.940560,20,1955,1962,Europe
4,Albert Park Grand Prix Circuit,26,Australia,-37.8497,144.968000,10,1996,2023,Oceania
...,...,...,...,...,...,...,...,...,...
71,Valencia Street Circuit,5,Spain,39.4589,-0.331667,4,2008,2012,Europe
72,Watkins Glen,20,USA,42.3369,-76.927200,485,1961,1980,North America
73,Yas Marina Circuit,14,United Arab Emirates,24.4672,54.603100,3,2009,2022,Asia
74,Zeltweg,1,Austria,47.2039,14.747800,676,1964,1964,Europe


In [39]:
custom_colors = ['#4F0200', '#90100C', '#AF0A05', '#E10801', '#FB0F07', '#FF2E27', '#FF5F5A', '#FF9996', '#FFC3C1', '#FFDEDD']

map_continent_color = {'Europe': '#90100C', 
                       'North America': '#E10801',
                       'Asia': '#FF2E27',
                       'South America': '#FF5F5A',
                       'Oceania': '#FF9996',
                       'Africa': '#FFDEDD',
}

In [40]:
import plotly.express as px
fig = px.scatter_geo(races_circuits, 
                     lat='lat', 
                     lon='lng', 
                     size='races',
                     color="continent",
                     hover_name="circuit_name",
                     hover_data=['continent','country','alt', 'firstRace', 'lastRace'],
                     projection="equirectangular",
                     title=f'Formula 1 Race Circuits<br><sub>Formula 1 has raced in {len(races_circuits.circuit_name.unique())} circuits, across {len(races_circuits.country.unique())} countries and {len(races_circuits.continent.unique())} continents</sub>',
                     size_max=40,
                     color_discrete_map=map_continent_color
                     )

fig.update_geos(
    resolution=50,
    showcountries=True, countrycolor="#070B0C",
    showland=True, landcolor="#1F1F27",
    showcoastlines=True, coastlinecolor="#070B0C", 
    showocean=True, oceancolor="#070B0C", 
    showlakes=False,
    showrivers=False,
)

fig.update_layout(
    template='plotly_dark',
    plot_bgcolor='rgba(7,11,12,100)',
    paper_bgcolor='rgba(7,11,12,100)',
)
# fig.update_geos(lataxis_showgrid=True, lonaxis_showgrid=True)
fig.update_layout(height=800, margin={"r":0,"t":100,"l":0,"b":100})
fig.show()

In [41]:
def plotly_barchart(df, x_col, y_col, xtitle, ytitle,  title, subtitle, color=None):
    fig = px.bar(df, x=x_col, y=y_col, color=color,
                 title=f'{title}<br><sup>{subtitle}</sup>',
                 labels={x_col: xtitle,y_col: ytitle},
                 orientation='h')
    fig.show()
    
plotly_barchart(races_circuits.sort_values('races', ascending=False), 'races', 'continent', 'Races', 'Continent', 'F1 Races Across Continents', 'Test', 'continent')
plotly_barchart(races_circuits.sort_values('races', ascending=False).iloc[0:10], 'races', 'country', 'Races', 'Country', 'F1 Races Across Countries', 'Test', 'continent')

In [42]:
continents_sort = races_circuits.groupby('continent').agg({'races': 'sum'}).reset_index().sort_values('races', ascending=False).continent.tolist()
continents_sort = [(c, i) for i,c in enumerate(continents_sort)]

In [43]:
race_calendar = race_calendar.replace({'UK': 'United Kingdom', 'Korea': 'South Korea', 'UAE': 'United Arab Emirates'})
race_calendar['continent'] = race_calendar['country'].apply(country_to_continent)

race_calendar

Unnamed: 0,year,round,circuitId,circuitRef,circuit_name,location,country,lat,lng,alt,circuit_url,continent
0,2008,1,1,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.96800,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...,Oceania
1,2007,1,1,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.96800,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...,Oceania
2,2009,1,1,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.96800,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...,Oceania
3,2010,2,1,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.96800,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...,Oceania
4,2011,1,1,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.96800,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...,Oceania
...,...,...,...,...,...,...,...,...,...,...,...,...
1086,1953,8,66,bremgarten,Circuit Bremgarten,Bern,Switzerland,46.9589,7.40194,551,http://en.wikipedia.org/wiki/Circuit_Bremgarten,Europe
1087,1952,1,66,bremgarten,Circuit Bremgarten,Bern,Switzerland,46.9589,7.40194,551,http://en.wikipedia.org/wiki/Circuit_Bremgarten,Europe
1088,1950,4,66,bremgarten,Circuit Bremgarten,Bern,Switzerland,46.9589,7.40194,551,http://en.wikipedia.org/wiki/Circuit_Bremgarten,Europe
1089,1951,8,67,pedralbes,Circuit de Pedralbes,Barcelona,Spain,41.3903,2.11667,85,http://en.wikipedia.org/wiki/Pedralbes_Circuit,Europe


In [44]:


continent_split = race_calendar.groupby(
    ['year', 'continent']
    ).agg(
        {'circuitRef': 'count'}
        ).reset_index().merge(race_calendar.groupby('year').agg({'round': 'max'}).reset_index(), 
                              how='inner', 
                              on='year').rename(columns={'circuitRef': 'races'})


import plotly.express as px
fig = px.area(continent_split, 
              x="year", 
              y="races", 
              color="continent",
              # groupnorm='percent',
              labels={'year': 'Year','races': 'Races'},
              color_discrete_sequence=custom_colors
              )
fig.update_traces(line=dict(width=0))
fig.update_layout(
    template='plotly_dark',
    plot_bgcolor='rgba(7,11,12,100)',
    paper_bgcolor='rgba(7,11,12,100)',
)

fig.update_layout(height=400, margin={"r":0,"t":100,"l":0,"b":100})
fig.show()

continent_split.query('year == 2022')

Unnamed: 0,year,continent,races,round
294,2022,Asia,6,22
295,2022,Europe,10,22
296,2022,North America,4,22
297,2022,Oceania,1,22
298,2022,South America,1,22


In [45]:
continent_split = race_calendar.groupby(
    ['year', 'continent']
    ).agg(
        {'circuitRef': 'count'}
        ).reset_index().merge(race_calendar.groupby('year').agg({'round': 'max'}).reset_index(), 
                              how='inner', 
                              on='year').rename(columns={'circuitRef': 'races'})


import plotly.express as px
fig = px.area(continent_split, 
              x="year", 
              y="races", 
              color="continent",
              # groupnorm='percent',
              labels={'year': 'Year','races': '% of Races'},
              color_discrete_map=map_continent_color
              )
fig.update_traces(line=dict(width=0))
fig.update_layout(
    template='plotly_dark',
    plot_bgcolor='rgba(7,11,12,100)',
    paper_bgcolor='rgba(7,11,12,100)',
)
fig.show()

# Race Calendars

In [46]:
race_calendar = race_calendar.replace({'UK': 'United Kingdom', 'Korea': 'South Korea', 'UAE': 'United Arab Emirates'})
race_calendar['continent'] = race_calendar['country'].apply(country_to_continent)

# race_calendar.pivot(index='year', columns='continent', values='circuit_name')

In [47]:
grouped_calendar = race_calendar.groupby(['year', 'round']).agg(
    circuitId=pd.NamedAgg('circuitId', 'first'),
    circuitRef=pd.NamedAgg('circuitRef', 'first'),
    lat=pd.NamedAgg('lat', 'first'),
    lng=pd.NamedAgg('lng', 'first')
).reset_index()

# nodes = race_calendar.circuitRef.unique()

In [48]:
import numpy as np

# Haversine formula
def haversine(lat1, lon1, lat2, lon2):
    # Radius of the Earth in kilometers
    R = 6371.0
    # Conversions to radians
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    # Differences
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    # Haversine formula
    a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    return R * c

# Apply the haversine function to calculate distances between consecutive circuits
g2022_test = grouped_calendar.query('year == 2022').reset_index(drop=True)
g2022_test['distance'] = [
    haversine(lat, lng, lat_next, lng_next)
    for lat, lng, lat_next, lng_next in zip(g2022_test['lat'], g2022_test['lng'], g2022_test['lat'].shift(-1), g2022_test['lng'].shift(-1))
]

print('2022 travel distance (km)', round(g2022_test.distance.sum(),2))

g2022_test.fillna(0)

2022 travel distance (km) 116138.57


Unnamed: 0,year,round,circuitId,circuitRef,lat,lng,distance
0,2022,1,3,bahrain,26.0325,50.5106,1258.429838
1,2022,2,77,jeddah,21.6319,39.1044,12817.101721
2,2022,3,1,albert_park,-37.8497,144.968,16086.33936
3,2022,4,21,imola,44.3439,11.7167,8172.907609
4,2022,5,79,miami,25.9581,-80.2389,7536.276018
5,2022,6,4,catalunya,41.57,2.26111,485.641294
6,2022,7,6,monaco,43.7347,7.42056,3484.881714
7,2022,8,73,baku,40.3725,49.8533,8930.516888
8,2022,9,7,villeneuve,45.5,-73.5228,5137.213078
9,2022,10,9,silverstone,52.0786,-1.01694,1254.640116


In [49]:
G = nx.Graph()

# Add nodes
for index, row in g2022_test.iterrows():
    G.add_node(row['circuitId'], label=row['circuitRef'], pos=(row['lng'], row['lat']))

# Add edges with weights
for i, row_i in g2022_test.iterrows():
    for j, row_j in g2022_test.iterrows():
        if i != j:
            dist = haversine(row_i['lat'], row_i['lng'], row_j['lat'], row_j['lng'])
            G.add_edge(row_i['circuitId'], row_j['circuitId'], weight=dist)

# Print the graph edges with weights
for (u, v, wt) in G.edges.data('weight'):
    print(f"Distance from {G.nodes[u]['label']} to {G.nodes[v]['label']} is {wt:.2f} km")

Distance from bahrain to jeddah is 1258.43 km
Distance from bahrain to albert_park is 12112.62 km
Distance from bahrain to imola is 4018.26 km
Distance from bahrain to miami is 12185.61 km
Distance from bahrain to catalunya is 4710.92 km
Distance from bahrain to monaco is 4332.64 km
Distance from bahrain to baku is 1595.69 km
Distance from bahrain to villeneuve is 10258.95 km
Distance from bahrain to silverstone is 5158.07 km
Distance from bahrain to red_bull_ring is 3910.83 km
Distance from bahrain to ricard is 4450.61 km
Distance from bahrain to hungaroring is 3628.98 km
Distance from bahrain to spa is 4640.38 km
Distance from bahrain to zandvoort is 4805.05 km
Distance from bahrain to monza is 4242.72 km
Distance from bahrain to marina_bay is 6327.18 km
Distance from bahrain to suzuka is 8054.33 km
Distance from bahrain to americas is 12908.76 km
Distance from bahrain to rodriguez is 13990.12 km
Distance from bahrain to interlagos is 11813.58 km
Distance from bahrain to yas_marina i

In [50]:
def nearest_neighbor(G, start_node):
    path = [start_node]
    current_node = start_node
    total_distance = 0
    intermediate_distances = []

    unvisited = set(G.nodes) - {start_node}

    while unvisited:
        next_node = min(unvisited, key=lambda node: G[current_node][node]['weight'])
        edge_distance = G[current_node][next_node]['weight']
        intermediate_distances.append(edge_distance)
        total_distance += edge_distance
        path.append(next_node)
        current_node = next_node
        unvisited.remove(next_node)

    return path, total_distance, intermediate_distances

start_node = 3  # This corresponds to the 'bahrain' circuit.
knn_path, knn_total_distance, knn_intermediate_distances = nearest_neighbor(G, start_node)

knn_circuit_names = [G.nodes[node]['label'] for node in knn_path]
print("Visited path:", " -> ".join(knn_circuit_names))
print("Total travel distance:", round(knn_total_distance, 2), "km")

Visited path: bahrain -> yas_marina -> jeddah -> baku -> hungaroring -> red_bull_ring -> imola -> monza -> monaco -> ricard -> catalunya -> spa -> zandvoort -> silverstone -> villeneuve -> miami -> americas -> rodriguez -> interlagos -> albert_park -> marina_bay -> suzuka
Total travel distance: 52248.92 km


In [51]:
def two_opt(route, G):
    best = route
    improved = True
    while improved:
        improved = False
        for i in range(1, len(route) - 2):
            for j in range(i + 1, len(route)):
                if j - i == 1: continue  # Skip adjacent nodes (no change)
                new_route = route[:i] + route[i:j][::-1] + route[j:]
                if sum(G[best[k]][best[k + 1]]['weight'] for k in range(len(best) - 1)) > \
                   sum(G[new_route[k]][new_route[k + 1]]['weight'] for k in range(len(new_route) - 1)):
                    best = new_route
                    improved = True
        route = best
    return best

# Apply 2-opt on the initial path found
org_path = g2022_test.circuitId.to_list()
optimized_path = two_opt(org_path, G)
optimized_names = [G.nodes[node]['label'] for node in optimized_path]
optimized_distance = sum(G[optimized_path[i]][optimized_path[i + 1]]['weight'] for i in range(len(optimized_path) - 1))

print("Optimized path:", " -> ".join(optimized_names))
print("Optimized travel distance:", round(optimized_distance, 2), "km")


optimized_path = two_opt(knn_path, G)
optimized_names = [G.nodes[node]['label'] for node in optimized_path]
optimized_distance = sum(G[optimized_path[i]][optimized_path[i + 1]]['weight'] for i in range(len(optimized_path) - 1))

print("Optimized path:", " -> ".join(optimized_names))
print("Optimized travel distance:", round(optimized_distance, 2), "km")

Optimized path: bahrain -> jeddah -> baku -> hungaroring -> red_bull_ring -> imola -> monaco -> ricard -> catalunya -> monza -> spa -> zandvoort -> silverstone -> villeneuve -> miami -> interlagos -> rodriguez -> americas -> suzuka -> albert_park -> marina_bay -> yas_marina
Optimized travel distance: 63156.81 km
Optimized path: bahrain -> yas_marina -> jeddah -> baku -> hungaroring -> red_bull_ring -> imola -> monza -> monaco -> ricard -> catalunya -> spa -> zandvoort -> silverstone -> villeneuve -> miami -> americas -> rodriguez -> interlagos -> albert_park -> marina_bay -> suzuka
Optimized travel distance: 52248.92 km


In [52]:
original_cal = g2022_test[['round', 'circuitId', 'distance']].fillna(0)
original_cal['distance'] = [0] + original_cal['distance'].iloc[:-1].tolist()
original_cal['type'] = 'original'

if knn_intermediate_distances[0] != 0:
    knn_intermediate_distances = [0] + knn_intermediate_distances

knn_cal = pd.DataFrame([{'round': i+1, 'circuitId': idx, 'distance': d, 'type': 'knn'} for i, (idx, d) in enumerate(zip(knn_path, knn_intermediate_distances))])

calendar = pd.concat([original_cal, knn_cal])

calendar['cumulative_distance'] = calendar.groupby('type')['distance'].cumsum()

calendar = calendar.merge(race_calendar[['circuitId','circuitRef', 'country', 'lat', 'lng', 'alt', 'continent']], how='inner', on='circuitId').drop_duplicates().reset_index(drop=True).sort_values(['type', 'round'], ascending=True)

calendar


Unnamed: 0,round,circuitId,distance,type,cumulative_distance,circuitRef,country,lat,lng,alt,continent
1,1,3,0.0,knn,0.0,bahrain,Bahrain,26.0325,50.5106,7.0,Asia
43,2,24,446.841633,knn,446.841633,yas_marina,United Arab Emirates,24.4672,54.6031,3.0,Asia
3,3,77,1615.852755,knn,2062.694387,jeddah,Saudi Arabia,21.6319,39.1044,15.0,Asia
15,4,73,2317.67222,knn,4380.366607,baku,Azerbaijan,40.3725,49.8533,-7.0,Asia
25,5,11,2557.37939,knn,6937.745997,hungaroring,Hungary,47.5789,19.2486,264.0,Europe
21,6,70,339.792438,knn,7277.538435,red_bull_ring,Austria,47.2197,14.7647,678.0,Europe
7,7,21,397.576296,knn,7675.114732,imola,Italy,44.3439,11.7167,37.0,Europe
31,8,14,238.088477,knn,7913.203208,monza,Italy,45.6156,9.28111,162.0,Europe
13,9,6,255.688853,knn,8168.892061,monaco,Monaco,43.7347,7.42056,7.0,Europe
23,10,34,141.994214,knn,8310.886275,ricard,France,43.2506,5.79167,432.0,Europe


In [53]:
import plotly.express as px

knn_dist = calendar.query('round == 22 & type == "knn"')['cumulative_distance'].iloc[0]
org_dist = calendar.query('round == 22 & type == "original"')['cumulative_distance'].iloc[0]

def plotly_linechart(df, x_col, y_col, xtitle, ytitle, title, subtitle, color=None, hover_data=None):
    fig = px.line(df, 
                  x=x_col, 
                  y=y_col,
                  color=color,
                  title=f'{title}<br><sup>{subtitle}</sup>',  
                  labels={x_col: xtitle,y_col: ytitle},
                  hover_data=hover_data, 
                  markers=True)
    fig.show()
    
plotly_linechart(calendar, 
                 'round', 
                 'cumulative_distance', 
                 'Round', 
                 'Cumulative Distance (km)', 
                 'F1 2022 Race Calendar Optimization', 
                 f'Original Race Calendar vs. KNN Optimized Race Calendar. Original path length: {int(org_dist)} km, KNN path length: {int(knn_dist)} km -> delta: {int(knn_dist - org_dist)} km ({round((knn_dist - org_dist)/org_dist * 100, 2)}%)', 
                 'type',
                 ['circuitRef', 'country', 'lat', 'lng', 'continent']
                 )

In [54]:
import plotly.express as px
fig = px.area(calendar, 
              x="round", 
              y="cumulative_distance", 
              color="type",
            #   groupnorm='percent',
              labels={'round': 'Round','cumulative_distance': 'Travel Distance (Cumulative)'},
              color_discrete_sequence=custom_colors
              )
fig.update_traces(line=dict(width=0))
fig.update_layout(
    template='plotly_dark',
    plot_bgcolor='rgba(7,11,12,100)',
    paper_bgcolor='rgba(7,11,12,100)',
)

fig.update_layout(height=400, margin={"r":0,"t":100,"l":0,"b":100})
fig.show()

In [55]:
knn_route = calendar.query('type == "knn"').sort_values(by='round')[
    ['round', 'circuitId', 'distance', 'cumulative_distance']
    ].rename(columns={'round': 'knn.round', 'distance': 'knn.distance', 'cumulative_distance': 'knn.distance.cumulative'})
original_route = calendar.query('type == "original"').sort_values(by='round').rename(columns={'round': 'org.round', 'distance': 'org.distance', 'cumulative_distance': 'org.distance.cumulative'})
calendar_nodes = original_route.merge(knn_route, how='inner', on='circuitId')[['circuitRef','country','continent','lat', 'lng', 'alt', 'org.round', 'org.distance', 'org.distance.cumulative', 'knn.round', 'knn.distance', 'knn.distance.cumulative']]

In [58]:
calendar_nodes

Unnamed: 0,circuitRef,country,continent,lat,lng,alt,org.round,org.distance,org.distance.cumulative,knn.round,knn.distance,knn.distance.cumulative
0,bahrain,Bahrain,Asia,26.0325,50.5106,7.0,1,0.0,0.0,1,0.0,0.0
1,jeddah,Saudi Arabia,Asia,21.6319,39.1044,15.0,2,1258.429838,1258.429838,3,1615.852755,2062.694387
2,albert_park,Australia,Oceania,-37.8497,144.968,10.0,3,12817.101721,14075.531559,20,13062.910869,41155.257207
3,imola,Italy,Europe,44.3439,11.7167,37.0,4,16086.33936,30161.870919,7,397.576296,7675.114732
4,miami,USA,North America,25.9581,-80.2389,,5,8172.907609,38334.778527,16,2253.881776,17691.815085
5,catalunya,Spain,Europe,41.57,2.26111,109.0,6,7536.276018,45871.054546,11,344.825987,8655.712261
6,monaco,Monaco,Europe,43.7347,7.42056,7.0,7,485.641294,46356.695839,9,255.688853,8168.892061
7,baku,Azerbaijan,Asia,40.3725,49.8533,-7.0,8,3484.881714,49841.577554,4,2317.67222,4380.366607
8,villeneuve,Canada,North America,45.5,-73.5228,13.0,9,8930.516888,58772.094442,15,5137.213078,15437.933309
9,silverstone,United Kingdom,Europe,52.0786,-1.01694,153.0,10,5137.213078,63909.30752,14,379.96615,10300.720232


In [59]:
import plotly.graph_objects as go

fig = go.Figure()


fig.add_traces(go.Scattergeo(
    locationmode = 'country names',
    lon = original_route.lng,
    lat = original_route.lat,
    mode = 'lines',
    line = dict(width = 2, color=custom_colors[1], dash='dash'), # Adjust line thickness here
    marker=dict(size=10,symbol= "arrow-bar-up", angleref="previous"),
    name = 'Original Calendar'
))

fig.add_traces(go.Scattergeo(
    locationmode = 'country names',
    lon = knn_route.lng,
    lat = knn_route.lat,
    mode = 'lines',
    line = dict(width = 2, color=custom_colors[-5]), # Adjust line thickness here
    marker=dict(size=10,symbol= "arrow-bar-up", angleref="previous"),
    name = 'KNN Calendar'
))

# Assuming 'calendar_nodes' is your DataFrame
fig.add_trace(go.Scattergeo(
    locationmode = 'country names',
    lon = calendar_nodes['lng'],
    lat = calendar_nodes['lat'],
    mode = 'markers+text',
    marker = dict(size = 6, color='white'),
    name = 'Circuits',
    hoverinfo = 'text',
    hovertext = calendar_nodes.apply(lambda row: (
        f"<b>Circuit:</b> {row['circuitRef']}<br>"
        f"<b>Country:</b> {row['country']}<br>"
        f"<b>Continent:</b> {row['continent']}<br>"
        f"<b>Latitude:</b> {row['lat']}<br>"
        f"<b>Longitude:</b> {row['lng']}<br>"
        f"<b>Altitude:</b> {row['alt']}<br>"
        f"- - - - - - - - - - - - - - - - - -<br>"
        f"<b>Org Round:</b> {row['org.round']}<br>"
        f"<b>Org Distance:</b> {row['org.distance']}<br>"
        f"<b>Org Cumulative Distance:</b> {row['org.distance.cumulative']}<br>"
        f"- - - - - - - - - - - - - - - - - -<br>"
        f"<b>KNN Round:</b> {row['knn.round']}<br>"
        f"<b>KNN Distance:</b> {row['knn.distance']}<br>"
        f"<b>KNN Cumulative Distance:</b> {row['knn.distance.cumulative']}"
    ), axis=1)
))

# Layout settings
fig.update_layout(
    title_text = 'F1 Race Calendar',
    showlegend = True,
    geo = dict(
        projection_type = "natural earth",
        showland = True,
        landcolor = 'rgb(243, 243, 243)',
        countrycolor = 'rgb(204, 204, 204)',
    ),
)

fig.update_geos(
    resolution=50,
    showcountries=True, countrycolor="#070B0C",
    showland=True, landcolor="#1F1F27",
    showcoastlines=True, coastlinecolor="#070B0C", 
    showocean=True, oceancolor="#070B0C", 
    showlakes=False,
    showrivers=False,
)

fig.update_layout(
    template='plotly_dark',
    plot_bgcolor='rgba(7,11,12,100)',
    paper_bgcolor='rgba(7,11,12,100)',
    hoverlabel=dict(
        bgcolor="#E10801",
    )
)
# fig.update_geos(lataxis_showgrid=True, lonaxis_showgrid=True)
fig.update_layout(height=800, margin={"r":0,"t":100,"l":0,"b":100})
fig.show()

AttributeError: 'DataFrame' object has no attribute 'lng'

In [62]:
points2022 = results.query('year == 2022').groupby(['full_name', 'round']).agg({'points': 'sum'}).reset_index()
total_points = results.query('year == 2022').groupby('full_name').agg({'points': 'sum'}).reset_index().rename(columns={'points': 'total'}).sort_values(by='total', ascending=False)
pivot = points2022.pivot(index='full_name', columns='round', values='points').merge(total_points, how='inner', on='full_name').sort_values(by='total', ascending=False)

fig = px.imshow(pivot.drop(columns=['full_name']).to_numpy(),
                labels=dict(x="Round", y="Drivers", color="Points"),
                x=[str(x) for x in list(pivot.columns[1:])],
                y=pivot.full_name,
                text_auto=True, aspect="auto",
                color_continuous_scale='RdBu_r'
               )

fig.update_layout(
    template='plotly_dark',
    plot_bgcolor='rgba(7,11,12,100)',
    paper_bgcolor='rgba(7,11,12,100)',
)

fig.update_xaxes(side="top")
fig.update_layout(height=800, margin={"r":0,"t":100,"l":0,"b":100})
fig.show()



In [63]:
points2022['cumulative_points'] = points2022.groupby('full_name')['points'].cumsum()


fig = px.line(points2022, 
                  x='round', 
                  y='cumulative_points',
                  color='full_name',
                  title=f"Formula 1 Driver's Championship 2022<br><sup>Championship evolution 2022</sup>",  
                  hover_data=['full_name', 'round', 'points', 'cumulative_points'], 
                  markers=True)

fig.update_layout(
    template='plotly_dark',
    plot_bgcolor='rgba(7,11,12,100)',
    paper_bgcolor='rgba(7,11,12,100)',
)

fig.update_layout(height=600, margin={"r":0,"t":100,"l":0,"b":100})
fig.show()

In [64]:
tmp = {'1st': 25, '2nd': 18, '3rd': 15, '4th': 12, '5th': 10, '6th': 8, '7th': 6, '8th': 4, '9th': 2,'10th': 1}

tmp = pd.DataFrame([{'position': k, 'points': v} for k,v in tmp.items()])

fig = px.bar(tmp, x='position', y='points', text='points')

# Update layout with dark theme
fig.update_layout(
    template='plotly_dark',
    plot_bgcolor='rgba(7,11,12,100)',
    paper_bgcolor='rgba(7,11,12,100)',
    height=600,
    margin={"r":0, "t":100, "l":0, "b":100}
)

fig.update_layout(height=400, margin={"r":0,"t":00,"l":0,"b":00})
fig.show()

# Circuit Statistics

In [80]:
circuits

Unnamed: 0,circuitId,circuitRef,circuit_name,location,country,lat,lng,alt,circuit_url
0,1,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.84970,144.96800,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...
1,2,sepang,Sepang International Circuit,Kuala Lumpur,Malaysia,2.76083,101.73800,18,http://en.wikipedia.org/wiki/Sepang_Internatio...
2,3,bahrain,Bahrain International Circuit,Sakhir,Bahrain,26.03250,50.51060,7,http://en.wikipedia.org/wiki/Bahrain_Internati...
3,4,catalunya,Circuit de Barcelona-Catalunya,Montmeló,Spain,41.57000,2.26111,109,http://en.wikipedia.org/wiki/Circuit_de_Barcel...
4,5,istanbul,Istanbul Park,Istanbul,Turkey,40.95170,29.40500,130,http://en.wikipedia.org/wiki/Istanbul_Park
...,...,...,...,...,...,...,...,...,...
72,75,portimao,Autódromo Internacional do Algarve,Portimão,Portugal,37.22700,-8.62670,108,http://en.wikipedia.org/wiki/Algarve_Internati...
73,76,mugello,Autodromo Internazionale del Mugello,Mugello,Italy,43.99750,11.37190,255,http://en.wikipedia.org/wiki/Mugello_Circuit
74,77,jeddah,Jeddah Corniche Circuit,Jeddah,Saudi Arabia,21.63190,39.10440,15,http://en.wikipedia.org/wiki/Jeddah_Street_Cir...
75,78,losail,Losail International Circuit,Al Daayen,Qatar,25.49000,51.45420,,http://en.wikipedia.org/wiki/Losail_Internatio...


In [91]:
circuits = circuits.replace({'UK': 'United Kingdom', 'Korea': 'South Korea', 'UAE': 'United Arab Emirates'})
circuits['continent'] = circuits['country'].apply(country_to_continent)

circuits

Unnamed: 0,circuitId,circuitRef,circuit_name,location,country,lat,lng,alt,circuit_url,continent
0,1,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.84970,144.96800,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...,Oceania
1,2,sepang,Sepang International Circuit,Kuala Lumpur,Malaysia,2.76083,101.73800,18,http://en.wikipedia.org/wiki/Sepang_Internatio...,Asia
2,3,bahrain,Bahrain International Circuit,Sakhir,Bahrain,26.03250,50.51060,7,http://en.wikipedia.org/wiki/Bahrain_Internati...,Asia
3,4,catalunya,Circuit de Barcelona-Catalunya,Montmeló,Spain,41.57000,2.26111,109,http://en.wikipedia.org/wiki/Circuit_de_Barcel...,Europe
4,5,istanbul,Istanbul Park,Istanbul,Turkey,40.95170,29.40500,130,http://en.wikipedia.org/wiki/Istanbul_Park,Asia
...,...,...,...,...,...,...,...,...,...,...
72,75,portimao,Autódromo Internacional do Algarve,Portimão,Portugal,37.22700,-8.62670,108,http://en.wikipedia.org/wiki/Algarve_Internati...,Europe
73,76,mugello,Autodromo Internazionale del Mugello,Mugello,Italy,43.99750,11.37190,255,http://en.wikipedia.org/wiki/Mugello_Circuit,Europe
74,77,jeddah,Jeddah Corniche Circuit,Jeddah,Saudi Arabia,21.63190,39.10440,15,http://en.wikipedia.org/wiki/Jeddah_Street_Cir...,Asia
75,78,losail,Losail International Circuit,Al Daayen,Qatar,25.49000,51.45420,,http://en.wikipedia.org/wiki/Losail_Internatio...,Asia


In [105]:
treemap.groupby('circuit_name').agg({'points': 'sum'}).reset_index()

Unnamed: 0,circuit_name,points
0,AVUS,24.0
1,Adelaide Street Circuit,265.0
2,Ain Diab,24.0
3,Aintree,122.0
4,Albert Park Grand Prix Circuit,1670.0
...,...,...
71,Valencia Street Circuit,381.0
72,Watkins Glen,500.0
73,Yas Marina Circuit,1457.0
74,Zeltweg,25.0


In [141]:
results.merge(circuits[['circuitId', 'circuit_name', 'country', 'continent']], how='inner', on='circuitId').copy()

Unnamed: 0,resultId,raceId,driverId,constructorId,number,grid,position,positionText,positionOrder,points,...,constructorName,constructorNationality,full_name,nationality,code,dob,position_numeric,circuit_name,country,continent
0,1,18,1,1,22,1,1,1,1,10.0,...,McLaren,British,Lewis Hamilton,British,HAM,1985-01-07,1.0,Albert Park Grand Prix Circuit,Australia,Oceania
1,371,36,1,1,2,4,3,3,3,6.0,...,McLaren,British,Lewis Hamilton,British,HAM,1985-01-07,3.0,Albert Park Grand Prix Circuit,Australia,Oceania
2,7573,1,1,1,1,18,,D,20,0.0,...,McLaren,British,Lewis Hamilton,British,HAM,1985-01-07,,Albert Park Grand Prix Circuit,Australia,Oceania
3,20352,338,1,1,2,11,6,6,6,8.0,...,McLaren,British,Lewis Hamilton,British,HAM,1985-01-07,6.0,Albert Park Grand Prix Circuit,Australia,Oceania
4,20780,841,1,1,3,2,2,2,2,18.0,...,McLaren,British,Lewis Hamilton,British,HAM,1985-01-07,2.0,Albert Park Grand Prix Circuit,Australia,Oceania
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26075,20024,832,782,105,46,18,,W,20,0.0,...,Maserati,Italian,Juan Jover,Spanish,,1903-11-23,,Circuit de Pedralbes,Spain,Europe
26076,19407,807,650,128,48,16,,R,14,0.0,...,Gordini,French,Jacques Pollet,French,,1922-07-02,,Circuit de Pedralbes,Spain,Europe
26077,19398,807,648,131,4,12,5,5,5,2.0,...,Mercedes,German,Karl Kling,German,,1910-09-16,5.0,Circuit de Pedralbes,Spain,Europe
26078,20021,832,704,154,32,14,,R,17,0.0,...,Talbot-Lago,French,Yves Cabantous,French,,1904-10-08,,Circuit de Pedralbes,Spain,Europe


In [143]:
treemap = results.merge(circuits[['circuitId', 'circuit_name', 'country', 'continent']], how='inner', on='circuitId').copy()


Unnamed: 0,circuit_name,constructorName,full_name,country,continent,races,points,poles,wins,podiums,dnfs
0,Albert Park Grand Prix Circuit,Alfa Romeo,Antonio Giovinazzi,Australia,Oceania,1,0.0,0,0,0,0
1,Albert Park Grand Prix Circuit,Alfa Romeo,Guanyu Zhou,Australia,Oceania,2,2.0,0,0,0,0
2,Albert Park Grand Prix Circuit,Alfa Romeo,Kimi Räikkönen,Australia,Oceania,1,4.0,0,0,0,0
3,Albert Park Grand Prix Circuit,Alfa Romeo,Valtteri Bottas,Australia,Oceania,2,4.0,0,0,0,0
4,Albert Park Grand Prix Circuit,AlphaTauri,Nyck de Vries,Australia,Oceania,1,0.0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
2595,Yas Marina Circuit,Williams,Pastor Maldonado,United Arab Emirates,Asia,3,10.0,0,0,0,0
2596,Yas Marina Circuit,Williams,Robert Kubica,United Arab Emirates,Asia,1,0.0,0,0,0,0
2597,Yas Marina Circuit,Williams,Rubens Barrichello,United Arab Emirates,Asia,1,0.0,0,0,0,0
2598,Yas Marina Circuit,Williams,Sergey Sirotkin,United Arab Emirates,Asia,1,0.0,0,0,0,0


In [151]:
treemap = results.merge(circuits[['circuitId', 'circuit_name', 'country', 'continent']], how='inner', on='circuitId').copy()
treemap = treemap[treemap.year > 2010].groupby(['circuit_name', 'constructorName', 'full_name', 'country', 'continent']).agg(
    races=pd.NamedAgg('resultId', 'count'),
    points=pd.NamedAgg('points', 'sum'),
    poles=pd.NamedAgg('grid', lambda x: (x == 1).sum()),
    wins=pd.NamedAgg('position_numeric', lambda x: (x == 1).sum()),
    podiums=pd.NamedAgg('position_numeric', lambda x: np.sum((x <= 3) & (~np.isnan(x)))),  # Counting podiums where position is less than or equal to 3
    dnfs=pd.NamedAgg('position', lambda x: x.isna().sum()),
    ).reset_index()
# treemap = treemap[treemap.year > 2010].groupby(['circuit_name', 'constructorName', 'full_name']).agg({'points': 'sum', 'country': 'first', 'continent': 'first'}).reset_index()
treemap = treemap[treemap.points > 0.0]

fig = px.treemap(treemap, path=[px.Constant("world"), 'continent', 'country', 'circuit_name', 'full_name'], values='points',
                  color='points', color_continuous_scale='Reds',) # hover_data=['iso_alpha'],)

fig.update_layout(margin = dict(t=50, l=25, r=25, b=25))
fig.update_layout(
    template='plotly_dark',
    plot_bgcolor='rgba(7,11,12,100)',
    paper_bgcolor='rgba(7,11,12,100)',
)

# Adding text labels with 'points'
fig.data[0].textinfo = 'label+text+value'

fig.update_traces(texttemplate='%{label}<br>%{value}')
fig.update_traces(marker=dict(cornerradius=5))

fig.update_layout(height=800, margin={"r":100,"t":100,"l":100,"b":100})
fig.show()

In [157]:
results.merge(circuits[['circuitId', 'circuit_name', 'country', 'continent']], how='inner', on='circuitId').copy()

Unnamed: 0,resultId,raceId,driverId,constructorId,number,grid,position,positionText,positionOrder,points,...,constructorName,constructorNationality,full_name,nationality,code,dob,position_numeric,circuit_name,country,continent
0,1,18,1,1,22,1,1,1,1,10.0,...,McLaren,British,Lewis Hamilton,British,HAM,1985-01-07,1.0,Albert Park Grand Prix Circuit,Australia,Oceania
1,371,36,1,1,2,4,3,3,3,6.0,...,McLaren,British,Lewis Hamilton,British,HAM,1985-01-07,3.0,Albert Park Grand Prix Circuit,Australia,Oceania
2,7573,1,1,1,1,18,,D,20,0.0,...,McLaren,British,Lewis Hamilton,British,HAM,1985-01-07,,Albert Park Grand Prix Circuit,Australia,Oceania
3,20352,338,1,1,2,11,6,6,6,8.0,...,McLaren,British,Lewis Hamilton,British,HAM,1985-01-07,6.0,Albert Park Grand Prix Circuit,Australia,Oceania
4,20780,841,1,1,3,2,2,2,2,18.0,...,McLaren,British,Lewis Hamilton,British,HAM,1985-01-07,2.0,Albert Park Grand Prix Circuit,Australia,Oceania
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26075,20024,832,782,105,46,18,,W,20,0.0,...,Maserati,Italian,Juan Jover,Spanish,,1903-11-23,,Circuit de Pedralbes,Spain,Europe
26076,19407,807,650,128,48,16,,R,14,0.0,...,Gordini,French,Jacques Pollet,French,,1922-07-02,,Circuit de Pedralbes,Spain,Europe
26077,19398,807,648,131,4,12,5,5,5,2.0,...,Mercedes,German,Karl Kling,German,,1910-09-16,5.0,Circuit de Pedralbes,Spain,Europe
26078,20021,832,704,154,32,14,,R,17,0.0,...,Talbot-Lago,French,Yves Cabantous,French,,1904-10-08,,Circuit de Pedralbes,Spain,Europe


In [158]:
treemap = results.merge(circuits[['circuitId', 'circuit_name', 'country', 'continent']], how='inner', on='circuitId').copy()
treemap = treemap[treemap.year > 2010].groupby(['circuit_name', 'constructorName', 'full_name', 'country', 'continent']).agg(
    races=pd.NamedAgg('resultId', 'count'),
    points=pd.NamedAgg('points', 'sum'),
    poles=pd.NamedAgg('grid', lambda x: (x == 1).sum()),
    wins=pd.NamedAgg('position_numeric', lambda x: (x == 1).sum()),
    podiums=pd.NamedAgg('position_numeric', lambda x: np.sum((x <= 3) & (~np.isnan(x)))),  # Counting podiums where position is less than or equal to 3
    dnfs=pd.NamedAgg('position', lambda x: x.isna().sum()),
    ).reset_index()
# treemap = treemap[treemap.year > 2010].groupby(['circuit_name', 'constructorName', 'full_name']).agg({'points': 'sum', 'country': 'first', 'continent': 'first'}).reset_index()
treemap = treemap[treemap.points > 0.0]

fig = px.treemap(treemap, path=[px.Constant("world"), 'continent', 'country', 'circuit_name', 'full_name'], values='points',
                  color='points', color_continuous_scale='Reds')

fig.update_layout(margin = dict(t=50, l=25, r=25, b=25))
fig.update_layout(
    template='plotly_dark',
    plot_bgcolor='rgba(7,11,12,100)',
    paper_bgcolor='rgba(7,11,12,100)',
)

# Adding text labels with 'points'
fig.data[0].textinfo = 'label+text+value'

fig.update_traces(texttemplate='%{label}<br>%{value}')
fig.update_traces(marker=dict(cornerradius=5))

fig.update_layout(height=800, margin={"r":100,"t":100,"l":100,"b":100})
fig.show()

In [165]:
fig = px.sunburst(treemap, path=['continent', 'country', 'circuit_name', 'full_name'], values='points',
                  color='points', color_continuous_scale='Reds')

fig.update_layout(margin = dict(t=50, l=25, r=25, b=25))
fig.update_layout(
    template='plotly_dark',
    plot_bgcolor='rgba(7,11,12,100)',
    paper_bgcolor='rgba(7,11,12,100)',
)

# Adding text labels with 'points'
fig.data[0].textinfo = 'label+text+value'

fig.update_traces(texttemplate='%{label}<br>%{value}')

fig.update_layout(height=800, width=800, margin={"r":0,"t":100,"l":0,"b":100})
fig.show()

In [84]:
results.groupby(['circuit_name', 'constructorName', 'full_name']).agg({'points': 'sum', 'country': 'first', 'continent': 'first'})

Unnamed: 0_level_0,Unnamed: 1_level_0,points
constructorName,full_name,Unnamed: 2_level_1
Alfa Romeo,Antonio Giovinazzi,0.0
Alfa Romeo,Guanyu Zhou,2.0
Alfa Romeo,Kimi Räikkönen,4.0
Alfa Romeo,Valtteri Bottas,4.0
AlphaTauri,Nyck de Vries,0.0
...,...,...
Williams,Ralf Schumacher,14.0
Williams,Robert Kubica,0.0
Williams,Rubens Barrichello,4.0
Williams,Sergey Sirotkin,0.0


In [83]:
results.query('circuitId == 1')

Unnamed: 0,resultId,raceId,driverId,constructorId,number,grid,position,positionText,positionOrder,points,...,year,round,circuitId,constructorName,constructorNationality,full_name,nationality,code,dob,position_numeric
0,1,18,1,1,22,1,1,1,1,10.0,...,2008,1,1,McLaren,British,Lewis Hamilton,British,HAM,1985-01-07,1.0
18,371,36,1,1,2,4,3,3,3,6.0,...,2007,1,1,McLaren,British,Lewis Hamilton,British,HAM,1985-01-07,3.0
35,7573,1,1,1,1,18,,D,20,0.0,...,2009,1,1,McLaren,British,Lewis Hamilton,British,HAM,1985-01-07,
53,20352,338,1,1,2,11,6,6,6,8.0,...,2010,2,1,McLaren,British,Lewis Hamilton,British,HAM,1985-01-07,6.0
71,20780,841,1,1,3,2,2,2,2,18.0,...,2011,1,1,McLaren,British,Lewis Hamilton,British,HAM,1985-01-07,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25887,21729,880,823,207,21,21,18,18,18,0.0,...,2013,1,1,Caterham,Malaysian,Giedo van der Garde,Dutch,VDG,1985-04-25,18.0
25944,22935,948,837,209,88,22,,R,19,0.0,...,2016,1,1,Manor Marussia,British,Rio Haryanto,Indonesian,HAR,1993-01-22,
25982,25458,1076,854,210,47,15,13,13,13,0.0,...,2022,3,1,Haas F1 Team,American,Mick Schumacher,German,MSC,1999-03-22,13.0
26048,25460,1076,852,213,22,13,15,15,15,0.0,...,2022,3,1,AlphaTauri,Italian,Yuki Tsunoda,Japanese,TSU,2000-05-11,15.0


# Race Weekend Reports

In [65]:
pd.read_csv('Raw Data/qualifying.csv')

Unnamed: 0,qualifyId,raceId,driverId,constructorId,number,position,q1,q2,q3
0,1,18,1,1,22,1,1:26.572,1:25.187,1:26.714
1,2,18,9,2,4,2,1:26.103,1:25.315,1:26.869
2,3,18,5,1,23,3,1:25.664,1:25.452,1:27.079
3,4,18,13,6,2,4,1:25.994,1:25.691,1:27.178
4,5,18,2,2,3,5,1:25.960,1:25.518,1:27.236
...,...,...,...,...,...,...,...,...,...
9810,9868,1110,848,3,23,16,2:00.314,\N,\N
9811,9869,1110,855,51,24,17,2:00.832,\N,\N
9812,9870,1110,858,3,2,18,2:01.535,\N,\N
9813,9871,1110,817,213,3,19,2:02.159,\N,\N


In [69]:
import fastf1 as f1

session = f1.get_session(2024, 'China', 'FP1')
session.load()

core           INFO 	Loading data for Chinese Grand Prix - Practice 1 [v3.3.2]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for track_status_data. Loading data...
_api           INFO 	Fetching track status data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for _extended_timing_data. Loading data...
_api           INFO 	Fetching timing data...
_api           INFO 	Parsing timing data...
req            INFO 

In [71]:
free_practice_results = []

for x in ['FP1', 'FP2', 'FP3']:
    session = f1.get_session(2024, 'Bahrain', x)
    session.load()
    free_practice_results.append(session)

core           INFO 	Loading data for Bahrain Grand Prix - Practice 1 [v3.3.2]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	Data has been written to cache!
Request for URL https://ergast.com/api/f1/2024/1/results.json failed; using cached response
Traceback (most recent call last):
  File "/Users/hanskristianbjorgokvaerum/opt/anaconda3/envs/ada/lib/python3.9/site-packages/requests_cache/session.py", line 290, in _resend
    response.raise_for_status()
  File "/Users/hanskristianbjorgokvaerum/opt/anaconda3/envs/ada/lib/python3.9/site-packages/requests/models.py", line 1021, in raise_for_status
    raise HTTPError(http_error_msg, response=self)
requests.exceptions.HTTPError: 503 Server Error: Backend fetch failed for

In [72]:
free_practice_results



[2024 Season Round 1: Bahrain Grand Prix - Practice 1,
 2024 Season Round 1: Bahrain Grand Prix - Practice 2,
 2024 Season Round 1: Bahrain Grand Prix - Practice 3]

In [78]:
laptimes = pd.concat([x.laps for x in free_practice_results])

laptimes[(~laptimes.LapTime.isna()) & (~laptimes.Deleted)].columns

Index(['Time', 'Driver', 'DriverNumber', 'LapTime', 'LapNumber', 'Stint',
       'PitOutTime', 'PitInTime', 'Sector1Time', 'Sector2Time', 'Sector3Time',
       'Sector1SessionTime', 'Sector2SessionTime', 'Sector3SessionTime',
       'SpeedI1', 'SpeedI2', 'SpeedFL', 'SpeedST', 'IsPersonalBest',
       'Compound', 'TyreLife', 'FreshTyre', 'Team', 'LapStartTime',
       'LapStartDate', 'TrackStatus', 'Position', 'Deleted', 'DeletedReason',
       'FastF1Generated', 'IsAccurate'],
      dtype='object')

In [None]:
laptimes[(~laptimes.LapTime.isna()) & (~laptimes.Deleted)].groupby()