# Relationship Between Sprint Race Results and Overall Driver Race Success:

In [None]:
import numpy as np

sprint = pd.read_csv('cleaned_sprint_results.csv')
results = pd.read_csv('cleaned_results.csv')
drivers = pd.read_csv('cleaned_drivers.csv')

races_columns = races[['raceId', 'year']]
results_with_year = results.merge(races_columns, on='raceId', how='inner')

filtered_results = results_with_year[results_with_year['year'] >= 2021]
filtered_results = filtered_results[~filtered_results['raceId'].isin(sprint['raceId'])]

sprint_columns = sprint[['driverId', 'points']]
results_columns = filtered_results[['driverId', 'points']]
drivers['driver_name'] = drivers['driver_forename'] + ' ' + drivers['driver_surname']
driver_columns = drivers[['driverId', 'driver_name']]

grouped_sprint = sprint_columns.groupby('driverId', as_index=False)['points'].sum()
grouped_results = results_columns.groupby('driverId', as_index=False)['points'].sum()

merged_df = grouped_sprint.merge(grouped_results, on='driverId', how='inner')
merged_df = merged_df.merge(driver_columns, on='driverId', how='inner')
merged_df = merged_df.rename(columns={'points_x': 'sprint_points', 'points_y': 'season_points'})

merged_df.head(30)

In [None]:
coefficients = np.polyfit(merged_df['sprint_points'], merged_df['season_points'], 1) #LSRL Code adapted from ChatGpt
m = coefficients[0]
b = coefficients[1]
regression_line = m * merged_df['sprint_points'] + b

merged_df.plot(kind='scatter', x='sprint_points', y='season_points', s=32, alpha=.8)
plt.gca().spines[['top', 'right',]].set_visible(False)
plt.title("Relationship Between Sprint Points and Season Points (2021-Present)")
plt.xlabel("Total Sprint Points")
plt.ylabel("Total Season Race Wins")
plt.plot(merged_df['sprint_points'], regression_line, color='red', label='Least Squares Regression Line')
plt.legend()

In [None]:
import seaborn as sns

# Load data
sprint = pd.read_csv('cleaned_sprint_results.csv')
results = pd.read_csv('cleaned_results.csv')
drivers = pd.read_csv('cleaned_drivers.csv')

races_columns = races[['raceId', 'year']]
results_with_year = results.merge(races_columns, on='raceId', how='inner')

filtered_results = results_with_year[results_with_year['year'] >= 2021]
filtered_results = filtered_results[~filtered_results['raceId'].isin(sprint['raceId'])]

sprint_columns = sprint[['driverId', 'position', 'points']]
results_columns = filtered_results[['driverId', 'points']]
drivers['driver_name'] = drivers['driver_forename'] + ' ' + drivers['driver_surname']
driver_columns = drivers[['driverId', 'driver_name']]

sprint_columns['finished_top_3'] = (sprint_columns['position'] <= ).astype(int)

sprint_top3 = sprint_columns.groupby('driverId', as_index=False)['finished_top_3'].max()

grouped_results = results_columns.groupby('driverId', as_index=False)['points'].sum()

merged_df = grouped_results.merge(driver_columns, on='driverId', how='left')
merged_df = merged_df.merge(sprint_top3, on='driverId', how='left').fillna(0)

merged_df = merged_df.rename(columns={'points': 'season_points', 'finished_top_3': 'Finished in Top 3 Sprint'})

merged_df['Finished in Top 3 Sprint'] = merged_df['Finished in Top 3 Sprint'].replace(
    {1: 'Yes', 0: 'No'}
)

plt.figure(figsize=(8, 5))
sns.boxplot(x='Finished in Top 3 Sprint', y='season_points', data=merged_df)

plt.title('Championship Points Distribution by Top 3 Sprint Finishes')
plt.xlabel('Did the Driver Finish in Top 3 of a Sprint Race?')
plt.ylabel('Total Season Championship Points')
plt.grid(axis='y', linestyle='--', alpha=0.7)

plt.show()