# Driver Standing and Constructor Standings Predictions

This projects aims to predict future standings based on past performance trends

In [1]:
import pandas as pd

# Load the datasets
driver_standings = pd.read_csv('f1/driver_standings.csv')
constructor_standings = pd.read_csv('f1/constructor_standings.csv')
races = pd.read_csv('f1/races.csv')
drivers = pd.read_csv('f1/drivers.csv')
constructors = pd.read_csv('f1/constructors.csv')
results = pd.read_csv('f1/results.csv')

# Cleaning the datasets
driver_standings.drop(['positionText'], axis=1, inplace=True)
constructor_standings.drop(['positionText'], axis=1, inplace=True)
races.drop(['url', 'fp1_date', 'fp1_time', 'fp2_date', 'fp2_time', 'fp3_date', 'fp3_time', 'quali_date', 'quali_time', 'sprint_date', 'sprint_time'], axis=1, inplace=True)
drivers.drop(['driverRef', 'number', 'forename', 'dob', 'nationality', 'url'], axis=1, inplace=True)
constructors.drop(['url', 'name', 'nationality'], axis=1, inplace=True)

# Merge the datasets
merged = pd.merge(driver_standings, races, on='raceId', how='outer')
merged = pd.merge(merged, drivers, on='driverId')
merged = pd.merge(merged, constructor_standings, on='raceId')
merged = pd.merge(merged, constructors, on='constructorId')
merged = pd.merge(merged, results, on=['raceId', 'driverId', 'constructorId'])

# Filter to specific years
merged = merged[(merged['year'] >= 2018) & (merged['year'] <= 2022)]

# Sort the merged dataset by date and round
merged.sort_values(by=['year', 'round'], ascending=[True, True], inplace=True)

# Further data cleaning as per your previous snippet
merged.rename(columns={'points_x': 'driver_points', 'points_y': 'constructor_points',
                       'wins_x': 'driver_wins', 'wins_y': 'constructor_wins'}, inplace=True)


# Save the cleaned and preprocessed merged dataset
merged.to_csv('f1/merged_dataset.csv', index=False)
