In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


drivers = pd.read_csv('drivers.csv')
constructors = pd.read_csv('constructors.csv')
results = pd.read_csv('results.csv')
races=pd.read_csv('races.csv')
imp_df = [constructors, drivers,  results,races]

drivers['dob'] = pd.to_datetime(drivers['dob'])
for df in imp_df:
    df.replace('\\N', np.nan, inplace=True)
    print(df.dtypes)    
    
races.drop(columns=['fp1_date', 'fp1_time', 'fp2_date', 'fp2_time', 'fp3_date', 'fp3_time', 'quali_date', 'quali_time', 'sprint_date', 'sprint_time'],inplace=True)

df=pd.merge(results,races[['raceId','year','name','round']],on='raceId',how='left')
df=pd.merge(df,drivers[['driverId','driverRef','nationality']],on='driverId',how='left')
df=pd.merge(df,constructors[['constructorId','constructorRef','name','nationality']],on='constructorId',how='left')
df.drop(columns=['number','position','positionText','laps','fastestLap','statusId','resultId','driverId','constructorId'],axis=1,inplace=True)
df.rename(columns={'rank':'fastest_lap_rank','name_x':'GP_Name','nationality_x':'driver_nationality','name_y':'constructor_name','nationality_y':'constructors_nationality','driverRef':'driver'},inplace=True)

df=df[['year','raceId','GP_Name','round','driver','constructor_name','grid','positionOrder','points','time','milliseconds','fastest_lap_rank','fastestLapTime','fastestLapSpeed','driver_nationality','constructors_nationality']]
df=df.sort_values(by=['year','round','positionOrder'],ascending=[False,True,True])
df.fastestLapSpeed=df.fastestLapSpeed.astype(float)
df.fastest_lap_rank=df.fastest_lap_rank.astype(float)
df.milliseconds=df.milliseconds.astype(float)
print(df.shape)
df.reset_index(drop=True,inplace=True)
print(df.info)

In [None]:
season_points = df.groupby(['year', 'driver']).agg({'points': 'sum'}).reset_index()

# Identify Season Winners (Max Points per Year)
season_winners = season_points.loc[season_points.groupby('year')['points'].idxmax()].reset_index(drop=True)

# Sort by Year for Back-to-Back Check
season_winners.sort_values('year', inplace=True)

#Preview Season Winners
print(season_winners)

In [None]:
# Add Next Season's Winner for Comparison
season_winners['next_season_winner'] = season_winners['driver'].shift(-1)
season_winners['next_season_year'] = season_winners['year'].shift(-1)

# Check for Back-to-Back Titles
season_winners['retained_title'] = season_winners['driver'] == season_winners['next_season_winner']

# Preview with Retention Info
print(season_winners[['year', 'driver', 'points', 'next_season_year', 'next_season_winner', 'retained_title']].head())


In [None]:
# Total Seasons Analyzed
total_seasons = season_winners.shape[0] - 1  # Exclude last season (no "next season" to compare)

# Count Back-to-Back Wins
retained_titles = season_winners['retained_title'].sum()

# Calculate Probability
probability = (retained_titles / total_seasons) * 100

print(f"Probability of Retaining the Title: {probability:.2f}%")


In [None]:
plt.figure(figsize=(14, 6))
sns.lineplot(data=season_winners, x='year', y='points', hue='driver', marker='o', palette='tab20')
plt.title('F1 Champions Over Time')
plt.xlabel('Season')
plt.ylabel('Total Points')
plt.legend(title='Driver', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()


In [None]:
# Count Back-to-Back Title Wins Per Driver
back_to_back_counts = season_winners[season_winners['retained_title']].groupby('driver').size().reset_index(name='back_to_back_titles')

# Barplot
plt.figure(figsize=(12, 6))
sns.barplot(data=back_to_back_counts, x='driver', y='back_to_back_titles', palette='viridis')
plt.title('Drivers with Back-to-Back Championships')
plt.xlabel('Driver')
plt.ylabel('Number of Back-to-Back Titles')
plt.xticks(rotation=45)
plt.show()
