# Feature Engineering (F1 Prediction Project)

This script transforms clean race-level data into season-level driver statistics, ready for model training (for predicting champions).

In [None]:
# ðŸ§® FEATURE ENGINEERING: CREATE DRIVER-SEASON LEVEL DATA

import pandas as pd

# load cleaned race-level dataset
# NOTE: make sure to upload "f1_results_2014_2023.csv" to Google Colab so this script can read the file
df = pd.read_csv('/content/f1_results_2014_2023.csv')

# ensure correct datatypes
df['Points'] = df['Points'].astype(float)
df['Position'] = df['Position'].astype(int)

# aggregate key performance features per driver per season
features = (
    df.groupby(['Year', 'Driver'])
      .agg(
          Total_Points=('Points', 'sum'),
          Wins=('Position', lambda x: (x == 1).sum()),
          Podiums=('Position', lambda x: (x <= 3).sum()),
          Top10s=('Position', lambda x: (x <= 10).sum()),
          Races_Entered=('Race', 'count'),
          Avg_Position=('Position', 'mean')
      )
      .reset_index()
)

# add a feature for consistency (lower = better)
features['Position_STD'] = (
    df.groupby(['Year', 'Driver'])['Position'].std().reset_index(drop=True)
)

# find the champion for each season (label = 1 if champion, else 0)
champions = (
    features.loc[features.groupby('Year')['Total_Points'].idxmax(), ['Year', 'Driver']]
    .assign(Champion=1)
)

# merge back to create binary label column
features = features.merge(champions, on=['Year', 'Driver'], how='left')
features['Champion'] = features['Champion'].fillna(0).astype(int)

# sort and save
features = features.sort_values(['Year', 'Total_Points'], ascending=[True, False])
features.to_csv('/content/f1_driver_features_2014_2023.csv', index=False)

print("âœ… Feature engineering complete!")
print("ðŸ“„ Saved: f1_driver_features_2014_2023.csv")
print(f"Rows: {len(features)}, Columns: {features.shape[1]}")
print("\nSample preview:")
print(features.head(10))

# this script is not tested; you may need to debug this script when necessary
