In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb

%matplotlib inline

In [7]:
results=pd.read_csv("results.csv")
results.head()

In [8]:
races=pd.read_csv("races.csv")
drivers=pd.read_csv("drivers.csv")


In [9]:
constructors=pd.read_csv("constructors.csv")

In [10]:
df=pd.merge(results, races[['raceId','year','round','name']], on='raceId', how='left')
df=pd.merge(df, drivers[['driverId','driverRef','nationality']], on='driverId', how='left')
df=pd.merge(df, constructors[['constructorId','constructorRef']], on='constructorId', how='left')

In [11]:
df.drop(['number','position','positionText','laps','fastestLap','statusId','driverId','resultId','raceId','constructorId'], axis=1, inplace=True)

In [12]:
df=df[['year','name','round','driverRef','constructorRef','grid','positionOrder','points','time','milliseconds','rank','fastestLapTime','fastestLapSpeed']]

In [13]:
df=df[df['year']!=2019]

In [14]:
df=df.sort_values(by=['year','round','positionOrder'],ascending=[False,True,True])

In [15]:
df.time.replace('\\N', np.nan, inplace=True)
df.milliseconds.replace('\\N', np.nan, inplace=True)
df['rank'].replace('\\N', np.nan, inplace=True)
df.fastestLapSpeed.replace('\\N', np.nan, inplace=True)
df.fastestLapTime.replace('\\N', np.nan, inplace=True)

In [16]:
df.fastestLapSpeed=df.fastestLapSpeed.astype(float)

In [17]:
df['rank'] = df['rank'].astype(float)


In [18]:
df.milliseconds=df.milliseconds.astype(float)

In [19]:
df.reset_index(drop=True, inplace=True)

In [20]:
print(df.shape)

In [21]:
df.info()

In [22]:
df.head()

In [23]:
df=df[df['year']!=2024]

In [24]:
df.head()

In [25]:
sb.set_palette('Set3')

In [26]:
plt.rcParams['figure.figsize']=10,6

In [27]:
driver_winner=df.loc[df['positionOrder']==1].groupby('driverRef')['positionOrder'].count().sort_values(ascending=False).to_frame().reset_index()

In [28]:
sb.barplot(data=driver_winner,y='driverRef',x='positionOrder',color='green',alpha=0.8)
plt.title('Most GP winners')
plt.ylabel('DriverName')
plt.xlabel('Number of GP won')
plt.yticks([])

In [29]:
top10Driver=driver_winner.head(10)
print(top10Driver)

In [30]:
sb.barplot(data=top10Driver,y='driverRef',x='positionOrder', color='blue',alpha=0.8, linewidth=.8,edgecolor='black')
plt.title("most GP winner")
plt.ylabel("Driver")
plt.xlabel("Number of wins")

In [31]:
constructor_winner=df.loc[df['positionOrder']==1].groupby('constructorRef')['positionOrder'].count().sort_values(ascending=False).to_frame().reset_index()
sb.barplot(data=constructor_winner,y='constructorRef',x='positionOrder',color='green',alpha=0.8)
plt.title('Most GP winners')
plt.ylabel('ConstructorName')
plt.xlabel('Number of GP won')
plt.yticks([])

In [32]:
top10Constructor=constructor_winner.head(10)
print(top10Constructor)

In [33]:
sb.barplot(data=top10Constructor,y='constructorRef',x='positionOrder', color='blue',alpha=0.8, linewidth=.8,edgecolor='black')
plt.title("most GP winner")
plt.ylabel("team")
plt.xlabel("Number of wins")

In [34]:
df_no_zero=df[df['grid']!=0]

In [37]:
plt.figure(figsize=[12,7])
sb.regplot(data=df_no_zero,x='grid',y='positionOrder',x_jitter=0.3,y_jitter=0.3,scatter_kws={'alpha': 1/5})
plt.title('Starting Position vs Finish')
plt.ylabel('Finish Place')
plt.xlabel('Staring position')

In [38]:
df_speed=df[df['year']>= 2004]
df_group_speed=df_speed.groupby(['name','year'])['fastestLapSpeed'].mean().to_frame().reset_index()


g=sb.FacetGrid(data=df_group_speed, col='name', col_wrap=5)
g.map(plt.scatter,'year','fastestLapSpeed', alpha=0.8, linewidth=.8, edgecolor="black", s=100)
g.set_titles("{col_name}")
g.set_xlabels('Year')
g.set_ylabels('Average fastest speed(km/h)')
plt.subplots_adjust(top=0.92)
g.fig.suptitle('Average Speed among all teams during the fastest lap at individual GPs')