In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import folium
import warnings
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import matplotlib.colors as mcolors
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, HoverTool
from datetime import datetime
%matplotlib inline
warnings.simplefilter("ignore")
pd.set_option('display.max_columns', None)

circuits_df = pd.read_csv('circuits.csv')
results_df = pd.read_csv('results.csv')
status_df = pd.read_csv('status.csv')
drivers_df = pd.read_csv('drivers.csv')
races_df = pd.read_csv('races.csv')
constructors_df = pd.read_csv('constructors.csv')
constructor_standings_df = pd.read_csv('constructor_standings.csv')
lap_times_df = pd.read_csv('lap_times.csv')
qualifying_df = pd.read_csv('qualifying.csv')
driver_standings_df = pd.read_csv('driver_standings.csv')
pd.get_option('display.max_columns', None)

In [None]:
#Displaying circuits in the map
m = folium.Map(location=[circuits_df['lat'].mean(), circuits_df['lng'].mean()], zoom_start=6)
for i in range(len(circuits_df)):
    folium.Marker([circuits_df['lat'].iloc[i], circuits_df['lng'].iloc[i]], popup=circuits_df['name'].iloc[i]).add_to(m)
icon = folium.Icon(color='red', icon_color='white')
for i in range(len(circuits_df)):
    folium.Marker([circuits_df['lat'].iloc[i], circuits_df['lng'].iloc[i]], popup=circuits_df['name'].iloc[i], icon=icon).add_to(m)
from IPython.display import display
display(m)

In [None]:
#Nationality
nationality_count = drivers_df['nationality'].value_counts().reset_index()
nationality_count.columns = ['nationality','count']
nationality_count = nationality_count.sort_values(by='count',ascending=False)

fig = px.bar(nationality_count, x='nationality', y='count',
             title='Total No of F1 Drivers Participated by Nationality',
             labels={'count': 'Number of Drivers', 'nationality': 'Nationality'},
             color='count',  
             hover_data=['nationality', 'count'])  
fig.update_layout(xaxis={'categoryorder': 'total descending'}, 
                  xaxis_title="Nationality",
                  yaxis_title="Number of Drivers",
                  coloraxis_colorbar=dict(title="Number of Drivers"),
                  plot_bgcolor="white")
fig.show()

In [None]:
#Race winners nationality over years
race_and_result_df = pd.merge(races_df, results_df, on='raceId')
race_and_result_df = pd.merge(race_and_result_df, drivers_df, on='driverId')
winner_pole = race_and_result_df[race_and_result_df['positionOrder'] == 1]
race_winner_nationalities = winner_pole.groupby(['year', 'nationality'])['raceId'].count().reset_index()
data_arrange = race_winner_nationalities.pivot(index='year', columns='nationality', values='raceId')
data_arrange = data_arrange.fillna(0)

fig = px.bar(data_arrange, x=data_arrange.index, y=[col for col in data_arrange.columns],
             title="Number of Race Wins by Nationality Over Years",
             labels={'variable': 'Nationality', 'value': 'Wins', 'year': 'Year'}) 
fig.update_layout(barmode='stack')
fig.update_layout(xaxis_title="Year", yaxis_title="Number of Wins",
                  legend_title="Nationality")
fig.show()

In [None]:
#Top 10 constructors in F1 history
top_10_constructors = pd.merge(constructor_standings_df, constructors_df, on='constructorId')
top_10_constructors = top_10_constructors.groupby('name')['points'].sum()
top_10_constructors = top_10_constructors.sort_values(ascending=False).head(10)

plt.figure(figsize=(14, 9))
colors = plt.cm.viridis(np.linspace(0, 1, 10))
top_10_constructors.plot(kind='barh', color=colors, edgecolor='black')
plt.title('Top 10 Constructors in F1 History by Points', fontsize=20, fontweight='bold', color='navy')
plt.xlabel('Total Points', fontsize=15)
plt.ylabel('Constructor Name', fontsize=15)
plt.xticks(fontsize=13)
plt.yticks(fontsize=13)
plt.gca().invert_yaxis()
plt.grid(axis='x', linestyle='--', alpha=0.7)
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_color('gray')
ax.spines['bottom'].set_color('gray')
plt.show()

In [None]:
#Top 25 drivers according to points
driver_standing_and_driver_df = pd.merge(driver_standings_df, drivers_df, on='driverId')
total_points = driver_standing_and_driver_df.groupby('driverId')['points'].sum().reset_index()
total_points = pd.merge(total_points, drivers_df[['driverId', 'forename', 'surname']], on='driverId')
top_drivers = total_points.sort_values(by='points', ascending=False).head(10)
top_drivers = top_drivers[['forename', 'surname', 'points']]
top_drivers['forename_surname'] = top_drivers['forename'] + ' ' + top_drivers['surname']
top_drivers = top_drivers[['forename_surname', 'points']]


driver_names = top_drivers['forename_surname'].tolist()
source = ColumnDataSource(data=dict(
    forename_surname=top_drivers['forename_surname'],
    points=top_drivers['points']
))
p = figure(y_range=driver_names, title="Top F1 Drivers by Points",
           height=800, width=800,  # Specify the plot dimensions
           toolbar_location=None, tools="")
p.hbar(y='forename_surname', right='points', height=0.4, source=source, color="navy")
hover = HoverTool()
hover.tooltips = [
    ("Driver", "@forename_surname"),
    ("Points", "@points"),
]
p.add_tools(hover)
p.y_range.range_padding = 0.1
p.ygrid.grid_line_color = None
p.xaxis.axis_label = "Total Points"
p.yaxis.axis_label = "Driver Name"
p.xaxis.major_label_orientation = 1
output_notebook()
show(p)

In [None]:
#Top F1 driver win over years
driver_position = drivers_df.merge(driver_standings_df,left_on='driverId',right_on='driverId',how = 'left')
driver_position = driver_position.merge(races_df,on = 'raceId',how = 'left')

positions = driver_position[driver_position['position'] == 1].groupby(
    ['surname','year'])['wins'].max().sort_values(ascending=False).reset_index(name = 'Wins')
positions.rename(columns={'surname':'name'},inplace=True)
positions.Wins = positions.Wins.astype('int64')


fig = px.bar(positions.head(20), y='name', x='Wins', color='Wins', text='Wins',
             orientation='h', title='Top F1 Drivers by Wins Over Years',
             hover_data=['year', 'Wins'], labels={'name': 'Driver Name', 'Wins': 'Number of Wins'})

fig.update_layout(yaxis={'categoryorder': 'total ascending'}, xaxis_title='Number of Wins',
                  yaxis_title='Driver Name', coloraxis_showscale=False)
fig.update_traces(texttemplate='%{text}', textposition='outside')
fig.show()

In [None]:
#Races per team
races_per_team=constructors_df.merge(results_df,on = 'constructorId',how = 'left')
races_per_team = races_per_team[['name','points','raceId']]
races_per_team = races_per_team.groupby('name')['raceId'].nunique().sort_values(ascending = False).reset_index(name = 'races')
races_per_team = races_per_team[races_per_team['races'] >= 100]


fig = px.scatter(races_per_team, x='name', y='races', size='races', 
                 color='name', hover_name='name', size_max=60)
fig.update_layout(title='Number of Races per F1 Team',
                  xaxis_title='Team',
                  yaxis_title='Number of Races',
                  xaxis={'categoryorder':'total descending'},
                  yaxis=dict(type='log'))
fig.show()

In [None]:
#driver_most_races
compiled_df = pd.merge(results_df,races_df[['raceId','year','name','round','date']],on = 'raceId',how = 'left')
compiled_df=pd.merge(compiled_df,drivers_df[['driverId','driverRef','forename','surname','nationality','dob']],on='driverId',how='left')
compiled_df=pd.merge(compiled_df,constructors_df[['constructorId','name','nationality']],on='constructorId',how='left')
compiled_df=pd.merge(compiled_df,status_df[['statusId','status']],on='statusId',how='left')
compiled_df.drop(['number','position','positionText','statusId','resultId','driverId','constructorId'],axis =1,inplace = True)
compiled_df.rename(columns = {'rank':'lap_rank','name_x':'grandprix_name','nationality_x':'nationality_driver',
                    'name_y':'name_of_cons','nationality_y':'nationality_cons','driverRef':'driver'},inplace = True)
compiled_df['full_name']=compiled_df['forename']+ ' '+ compiled_df['surname']
compiled_df.drop(['forename','surname'],axis=1,inplace=True)
driver_most_races = compiled_df.groupby('full_name')[['raceId']].count().reset_index()
driver_most_races = driver_most_races.sort_values('raceId', ascending= False).head(20)
driver_most_races = driver_most_races.rename(columns ={'raceId': 'total_no_of_races'})

fig = go.Figure(data=[
    go.Scatter(
        x=driver_most_races['full_name'],
        y=driver_most_races['total_no_of_races'],
        mode='markers',
        marker=dict(
            size=driver_most_races['total_no_of_races'],
            color=driver_most_races['total_no_of_races']
        )
    )
])
fig.update_layout(
    updatemenus=[
        dict(
            buttons=list([
                dict(
                    args=[{"y": [driver_most_races['total_no_of_races']]}],
                    label="All",
                    method="update"
                ),
                
            ]),
            direction="down",
            pad={"r": 10, "t": 30},
            showactive=True,
            x=0.5,
            xanchor="left",
            y=1.1,
            yanchor="top"
        )
    ]
)
fig.update_layout(
    title_text="F1 Drivers: Total Number of Races Participated",
    xaxis_title="Driver",
    yaxis_title="Total Number of Races",
    margin=dict(t=100) 
)
fig.show()

In [None]:
#Distribution of Wins Among F1 Teams
team_winning_history = results_df[['raceId','driverId','constructorId','positionOrder','points']].copy()
team_winning_history = pd.merge(team_winning_history,constructors_df,how='inner',left_on = 'constructorId', right_on = 'constructorId')
team_winning_history.drop(team_winning_history.columns[7:],axis = 1,inplace=True)
team_wins = team_winning_history[team_winning_history['positionOrder']==1]
team_wins = team_wins['name'].value_counts()
team_wins = team_wins.head(20)
team_wins_df = team_wins.reset_index()
team_wins_df.columns = ['Team', 'Wins']


plt.figure(figsize=(14, 10))
plot = sns.scatterplot(data=team_wins_df, x='Team', y='Wins', s=100, color='blue', edgecolor='black')
# Annotate each point with the number of wins, with some offset to avoid overlap
for line in range(0, team_wins_df.shape[0]):
     plot.text(team_wins_df.Team[line], team_wins_df.Wins[line] + 3,  # Adding a small offset on y-axis
               team_wins_df.Wins[line], horizontalalignment='center', size='small', color='black', weight='semibold')
# Rotate x-axis labels for better readability
plt.xticks(rotation=45)
# Set plot title and labels
plt.title('Distribution of Wins Among F1 Teams')
plt.xlabel('Team')
plt.ylabel('Wins')
#plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
#Total Number of Pole Positions by F1 Teams
team_poll_position = team_winning_history[(team_winning_history['positionOrder']==1) | (team_winning_history['positionOrder']==2) | (team_winning_history['positionOrder']==3)]
team_poll_position['polls'] = 1
team_poll_position = team_poll_position.groupby('name').sum()
team_poll_position = team_poll_position['polls'].sort_values(ascending=False).head(20)

team_poll_position = pd.DataFrame(list(team_poll_position.items()), columns=['Team', 'Pole Positions'])
team_poll_position_sorted = team_poll_position.sort_values('Pole Positions', ascending=False)
plt.figure(figsize=(14, 8))
barplot = sns.barplot(x='Pole Positions', y='Team', data=team_poll_position_sorted, palette='viridis')
for index, value in enumerate(team_poll_position_sorted['Pole Positions']):
    plt.text(value, index, str(value), color='black', va="center")
plt.title('Total Number of Pole Positions by F1 Teams')
plt.xlabel('Pole Positions')
plt.ylabel('Team')
plt.show()

In [None]:
#Driver standing from 1950 to 2023
driver_standings_df = driver_standings_df.merge(races_df,on='raceId',how='left')
drivers_df['full_name'] = drivers_df['forename'] + ' ' + drivers_df['surname']
driver_standings_df = driver_standings_df.merge(drivers_df[['driverId','full_name']],on='driverId',
                                     how='left')
racerStandings = driver_standings_df.groupby(['year','full_name']).agg({'points':'max'}).reset_index()
racerStandings = racerStandings.sort_values(['year','points'],ascending=[True,False])
racerStandings = racerStandings.rename({'full_name':'driver'},axis=1)
years = sorted(racerStandings['year'].unique())

fig = make_subplots(rows=1, cols=1, specs=[[{}]])

for year in years:
    filtered_df = racerStandings[racerStandings['year'] == year]
    fig.add_trace(
        go.Bar(
            x=filtered_df['driver'],
            y=filtered_df['points'],
            name=str(year),
            marker=dict(color=filtered_df['points'], colorscale='Viridis'),
            visible=(year == years[0])  
        )
    )
buttons = []

for year in years:
    buttons.append(
        dict(
            label=str(year),
            method='update',
            args=[
                {'visible': [year == y for y in years]},
                {'title': f'F1 Driver Standings in {year}'}
            ]
        )
    )
fig.update_layout(
    updatemenus=[
        dict(
            buttons=buttons,
            direction='down',
            pad={'r': 10, 't': 10},
            showactive=True,
            x=0.3,
            xanchor='left',
            y=1.15,
            yanchor='top'
        )
    ],
    title=f'F1 Driver Standings in {years[0]}',
    xaxis_title='Driver',
    yaxis_title='Points',
    barmode='group'
)
fig.show()

In [None]:
#constructor standing from 1950 to 2023
constructor_standings_df = constructor_standings_df.merge(races_df,on='raceId',how='left')
constructor_standings_df = constructor_standings_df.drop(columns='name')
constructor_standings_df = constructor_standings_df.merge(constructors_df[['constructorId','name']],on='constructorId',
                                     how='left')
constructor_standings = constructor_standings_df.groupby(['year','name']).agg({'points':'max'}).reset_index()
constructor_standings = constructor_standings.sort_values(['year','points'],ascending=[True,False])
constructor_standings = constructor_standings.rename({'name':'constructor'},axis=1)
years = sorted(constructor_standings['year'].unique())


fig = make_subplots(rows=1, cols=1, specs=[[{}]])
for year in years:
    filtered_df = constructor_standings[constructor_standings['year'] == year]
    fig.add_trace(
        go.Bar(
            x=filtered_df['constructor'],
            y=filtered_df['points'],
            name=str(year),
            marker=dict(color=filtered_df['points'], colorscale='Viridis'),
            visible=(year == years[0]) 
        )
    )
buttons = []

for year in years:
    buttons.append(
        dict(
            label=str(year),
            method='update',
            args=[
                {'visible': [year == y for y in years]},
                {'title': f'F1 Constructor Standings in {year}'}
            ]
        )
    )
fig.update_layout(
    title=dict(
        text='F1 Constructor Standings',
        y=0.9,  
        x=0.5,  
        xanchor='center',
        yanchor='top'
    ),
    updatemenus=[
        dict(
            buttons=buttons,
            direction='down',
            pad={'r': 10, 't': 100},  
            showactive=True,
            x=0.3, 
            xanchor='left',
            y=1.7, 
            yanchor='top'
        )
    ],
    margin=dict(l=20, r=20, t=100, b=20) 
)
fig.show()

In [None]:
#Circuit Lap leaders
lap_and_race_df = pd.merge(lap_times_df, races_df, on='raceId')
drivers_df['full_name'] = drivers_df['forename'] + ' ' + drivers_df['surname']
lap_and_race_df = pd.merge(lap_and_race_df, drivers_df, on='driverId')
circuit_Lap_Leaders = lap_and_race_df.groupby(['circuitId', 'name'])['milliseconds'].min().reset_index()
circuit_Lap_Leaders = pd.merge(circuit_Lap_Leaders, lap_and_race_df, on=['circuitId', 'name','milliseconds'])
circuit_Lap_Leaders.drop(circuit_Lap_Leaders.columns[2:29],axis = 1,inplace=True)
circuit_Lap_Leaders = circuit_Lap_Leaders.drop(['circuitId','nationality','url_y'], axis=1)

pivot_table = circuit_Lap_Leaders.pivot_table(index='name', columns='full_name', aggfunc=lambda x: 1, fill_value=0)
plt.figure(figsize=(10, 8))
sns.heatmap(pivot_table, cbar=False, cmap="YlGnBu", linewidths=.5)
plt.title('Presence of Fastest Lap by Driver and Circuit')
plt.xlabel('Driver')
plt.ylabel('Circuit')
plt.xticks(rotation=45, ha="right")
plt.yticks(rotation=0)
plt.show()

In [None]:
#Fastest lap counts of all circuits
lap_and_race_df = pd.merge(lap_times_df, races_df, on='raceId')
lap_and_race_df = pd.merge(lap_and_race_df, circuits_df, on='circuitId')
lap_and_race_df = pd.merge(lap_and_race_df, drivers_df, on='driverId')
top_fastest_laps_driver = lap_and_race_df.groupby(['circuitId', 'driverId', 'surname'])['milliseconds'].min().reset_index()
lap_counts = top_fastest_laps_driver.groupby('surname').size().reset_index(name='fastest_lap_count')
lap_counts = lap_counts.sort_values('fastest_lap_count', ascending=True)

fig = px.bar(lap_counts, x='surname', y='fastest_lap_count', title='Fastest Laps Count by Driver')
fig.update_layout(xaxis_tickangle=-45)
fig.show()

In [None]:
#Overtake Difficulty Index
worst_tracks = circuits_df.merge(races_df, left_on='circuitId',right_on='circuitId',how = 'inner') 
worst_tracks = worst_tracks.merge(results_df,on = 'raceId',how = 'left')
worst_tracks = worst_tracks[['name_x','circuitId','driverId','position','grid','raceId','year']]
worst_tracks.rename(columns={'name_x':'circuit'},inplace=True)
worst_tracks.dropna(inplace = True)
worst_tracks['year'] = worst_tracks['year'].astype(int)
worst_tracks = worst_tracks.loc[worst_tracks['year'] >= 2000]
worst_tracks['position_status'] = np.where(worst_tracks['position'] == worst_tracks['grid'],1,0)

total_no_of_races = races_df.loc[(races_df['year'] >= 2000)]
total_no_of_races = total_no_of_races.circuitId.value_counts().reset_index()

worst_tracks = worst_tracks.merge(total_no_of_races,on='circuitId',how = 'left')  
rating_of_circuit = worst_tracks[worst_tracks['circuitId'] >= 5].pivot_table(index=['circuit','circuitId'], columns='position_status', aggfunc='size', 
                                             fill_value=0)
rating_of_circuit = rating_of_circuit.reset_index()

rating_of_circuit['overtake_difficulty_index'] = rating_of_circuit[0].divide(rating_of_circuit['circuitId'])
rating_of_circuit = rating_of_circuit.sort_values('overtake_difficulty_index',ascending=False)
rating_of_circuit = rating_of_circuit.drop(columns=0)
rating_of_circuit = rating_of_circuit.head(20)


fig = px.bar(rating_of_circuit, x='circuit', y='overtake_difficulty_index',
             title="Overtake Difficulty Index of Circuits")
fig.update_layout(xaxis_title="Circuit", yaxis_title="Overtake Difficulty Index",
                  showlegend=False)
fig.update_traces(texttemplate='%{y:.2s}', textposition='outside')
fig.show()


In [None]:
#grandprix_circuit_distribution
grandprix_circuit_distribution = races_df['name'].value_counts()[:15]
ist = []
for grandprix in grandprix_circuit_distribution.index:
    circuitID = races_df.loc[races_df['name'] == grandprix, 'circuitId'].value_counts()
    dfs = circuits_df.loc[circuits_df['circuitId'].isin(circuitID.index), ['circuitId', 'location', 'country', 'name']]
    dfs['number of times'] = circuitID.values
    ist.append(dfs)
grandprix_circuit_distribution = pd.concat(ist)

percentages = grandprix_circuit_distribution.groupby('country')['number of times'].transform(lambda x: (x / x.sum()) * 100)
grandprix_circuit_distribution['percent'] = percentages
countries = grandprix_circuit_distribution['country'].unique()
cols = 5
rows = len(countries) // cols + (len(countries) % cols > 0)


fig = make_subplots(rows=rows, cols=cols, specs=[[{'type': 'pie'} for _ in range(cols)] for _ in range(rows)])
for i, country in enumerate(countries, start=1):
    country_data = grandprix_circuit_distribution[grandprix_circuit_distribution['country'] == country]
    values = country_data['percent']
    labels = country_data['name']
    
    
    row = (i - 1) // cols + 1
    col = (i - 1) % cols + 1
    
    
    fig.add_trace(
        go.Pie(labels=labels, values=values, name=country),
        row=row, col=col
    )

fig.update_layout(
    title_text='Percentage of Grand Prix by Country and Circuit',
    height=300 * rows
)
fig.show()

In [None]:
#Driver average point per race
results_and_driver_df =pd.merge(results_df,drivers_df,on='driverId')
merged_df=pd.merge(results_and_driver_df,races_df,on='raceId')
merged_df=merged_df[merged_df.year>2010]
merged_df["full_name"] = merged_df["forename"] + " " + merged_df["surname"]

average_points   = merged_df[['full_name','points']].groupby("full_name").mean()
total_points = merged_df[['full_name','points']].groupby("full_name").sum()

number_of_races = merged_df[['full_name', 'raceId']].groupby("full_name").count()
number_of_races = number_of_races[number_of_races.raceId > 100]

final_df = pd.merge(average_points, total_points, on='full_name')
final_df = pd.merge(final_df, number_of_races, on='full_name')
final_df = final_df.reset_index()

final_df.iloc[7, 3] = 180 
final_df.iloc[6, 3] = 125


final_df['average_points'] = final_df['points_x']
final_df['total_points'] = final_df['points_y']
final_df['number_of_races'] = final_df['raceId']

fig = px.scatter(final_df, 
                 x='average_points', 
                 y='total_points', 
                 size='number_of_races', 
                 color='full_name',
                 hover_name='full_name',
                 title='F1 Drivers: Average Points vs Total Points vs Races Participated')

fig.update_layout(xaxis_title='Average Points',
                  yaxis_title='Total Points',
                  legend_title='Drivers')
fig.show()

In [None]:
#Number of grid poles
qualification_round = results_df.merge(drivers_df,left_on = 'driverId',right_on='driverId',how = 'left')
qualification_round['full_name'] = qualification_round['forename'] + ' ' + qualification_round['surname']
qualification_round = qualification_round[['full_name','grid','position']]
qualification_wins = qualification_round[qualification_round['grid'] == 1].groupby('full_name')['grid'].sum().reset_index(
                                        name = 'grid poles').sort_values('grid poles',ascending = False)
qualification_wins = qualification_wins.head(25)

fig = px.bar(
    qualification_wins,
    x='full_name',
    y='grid poles',
    title='F1 Drivers: Number of Grid Poles',
    labels={'full_name': 'Driver', 'grid poles': 'Grid Poles'},
    color='grid poles',
    color_continuous_scale=px.colors.sequential.Viridis
)
fig.update_layout(
    xaxis_tickangle=-45,
    xaxis_title='Driver',
    yaxis_title='Grid Poles',
    coloraxis_showscale=False,
    plot_bgcolor="white"
)
fig.show()

In [None]:
#top f1 drivers according to pole positions
driver_race_wins = drivers_df.merge(results_df,left_on='driverId',right_on='driverId',how = 'left')
driver_race_wins['full_name'] = driver_race_wins['forename'] + ' ' + driver_race_wins['surname']
driver_race_wins = driver_race_wins[['full_name','position']]
driver_race_wins['position'] = pd.to_numeric(driver_race_wins['position'], errors='coerce')
total_race_wins = driver_race_wins[driver_race_wins['position'] == 1].groupby('full_name').size().reset_index(name='wins')
total_race_wins = total_race_wins.sort_values(by='wins', ascending=False)
total_race_wins = total_race_wins.head(25)

fig = px.bar(total_race_wins, x='full_name', y='wins',
             title='Top 20 F1 Drivers by Race Wins',
             labels={'full_name': 'Driver', 'wins': 'Race Wins'},
             color='wins',  
             text='wins')  
fig.update_layout(xaxis_title="Driver",
                  yaxis_title="Race Wins",
                  xaxis={'categoryorder':'total descending'},  
                  xaxis_tickangle=-45,  
                  coloraxis_colorbar=dict(title="Wins"))  
fig.show()

In [None]:
#Race to qualification pole ratio
race_pole_comparison = total_race_wins.merge(qualification_wins,on = 'full_name',how = 'left')
race_pole_comparison = race_pole_comparison[race_pole_comparison['grid poles'] > 10]                    
race_pole_comparison.dropna(inplace=True)                                         
race_pole_comparison.rename(columns={'wins': 'race wins'}, inplace=True)


race_pole_comparison['race wins'] = race_pole_comparison['race wins'].astype(int)  
race_pole_comparison['race_x_qualification'] = (race_pole_comparison['race wins'] / race_pole_comparison['grid poles']).round(2)
race_pole_comparison = race_pole_comparison.sort_values('race_x_qualification', ascending=False).reset_index(drop=True)
race_pole_comparison = race_pole_comparison.head(20)


fig = px.scatter(race_pole_comparison, x='race wins', y='grid poles',
                 size='race_x_qualification', 
                 color='race wins',  
                 hover_name='full_name',  
                 title='Race Wins vs. Grid Poles with Race to Qualification Ratio',
                 labels={'race wins': 'Race Wins', 'grid poles': 'Grid Poles', 'race_x_qualification': 'Race/Qualification Ratio'},
                 size_max=60)  
fig.update_layout(xaxis_title="Race Wins",
                  yaxis_title="Grid Poles",
                  coloraxis_colorbar=dict(title="Race Wins"),
                  plot_bgcolor="white")
fig.show()

In [None]:
drivers_df['full_name'] = drivers_df['forename'] + ' ' + drivers_df['surname']
youngest_driver_by_age = drivers_df.sort_values(by='dob', ascending=False)[:20]
youngest_driver_by_age = youngest_driver_by_age.drop(columns=['driverId','driverRef','number','code','url','forename','surname'])
today = datetime.now()
youngest_driver_by_age['dob'] = pd.to_datetime(youngest_driver_by_age['dob'])
youngest_driver_by_age['age'] = (today - youngest_driver_by_age['dob']).dt.days // 365
youngest_driver_by_age['dob'] = youngest_driver_by_age['dob'].dt.date

fig = px.bar(youngest_driver_by_age, y='full_name', x='age',
             title='Youngest Formula 1 Drivers by Age',
             labels={'full_name': 'Driver', 'age': 'Age'},
             orientation='h',
             color='age', 
             color_continuous_scale='Viridis',  
             hover_data=['nationality', 'dob'])  
fig.update_layout(yaxis={'categoryorder': 'total ascending'},  
                  xaxis_title="Age",
                  yaxis_title="Driver",
                  plot_bgcolor="white")
fig.show()

In [None]:
#points per race per team
cons_and_results_df = constructors_df.merge(results_df,on='constructorId',how = 'left')

races_participated_by_team = cons_and_results_df[['name','points','raceId']]
races_participated_by_team = races_participated_by_team.groupby('name')['raceId'].nunique().sort_values(ascending=False).reset_index(name = 'races')
races_participated_by_team = races_participated_by_team[races_participated_by_team['races'] >= 200]
races_participated_by_team = races_participated_by_team.head(20) 

function = lambda x: x.points.sum()/x.raceId.nunique()
team_points_per_race = cons_and_results_df[cons_and_results_df['name'].isin(races_participated_by_team.name)].groupby('name').apply(function).sort_values(ascending=False).reset_index(name = 'points_per_race')
team_points_per_race['points_per_race'] = team_points_per_race['points_per_race'].round(2)
team_points_per_race = team_points_per_race.head(20)


plt.figure(figsize=(10, 8))
barplot = sns.barplot(x='points_per_race', y='name', data=team_points_per_race,
                       palette='mako', edgecolor='black')
for index, value in enumerate(team_points_per_race['points_per_race']):
    plt.text(value, index, str(value), color='black', va="center")
plt.title('Points Per Race by Team', fontsize=16)
plt.xlabel('Points Per Race', fontsize=14)
plt.ylabel('Team Name', fontsize=14)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
sns.despine(left=True)
plt.tight_layout()
plt.show()