In [1]:
# Cell 1: Import necessary libraries

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from IPython.display import display, Markdown # For nice table output
import warnings # To suppress potential warnings
import joblib # For saving/loading model
import os # For path handling

warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

In [2]:
# Cell 2: Load Data
try:
    # Ensure these paths are correct for your environment
    qualifying_df = pd.read_csv("../data/processed/updated_qualifying.csv", parse_dates=["date"])
    races_df = pd.read_csv("../data/processed/updated_races.csv", parse_dates=["date"])
    print("CSV files loaded successfully.")
    print(f"Races data includes {races_df['season'].max()} season up to round {races_df[races_df['season'] == races_df['season'].max()]['round'].max()}")

except FileNotFoundError as e:
    print(f"Error: File not found. {e}")
    print("Please ensure 'qualifying.csv' and your races CSV are in the correct directory.")
    raise
except Exception as e:
    print(f"An error occurred during file loading: {e}")
    raise

# --- Basic Data Cleaning (Applied to loaded DataFrames) ---
print("Applying basic data cleaning...")
races_df['points'] = pd.to_numeric(races_df['points'], errors='coerce').fillna(0)
races_df['position'] = pd.to_numeric(races_df['position'], errors='coerce')
races_df['grid'] = pd.to_numeric(races_df['grid'], errors='coerce')

def clean_driver_name(name):
    if isinstance(name, str):
        name = name.replace(' Jr.', '').replace('Hülkenberg', 'Hulkenberg').replace('Perez', 'Pérez').replace('Raikkonen', 'Räikkönen')
        if "Antonelli" in name: # This should normalize "antonelli" and "Kimi Antonelli"
            return "Kimi Antonelli"
        # Add other known variations if necessary
    return name

races_df['driverFullName'] = races_df['driverFullName'].apply(clean_driver_name)
qualifying_df['driverFullName'] = qualifying_df['driverFullName'].apply(clean_driver_name)

# Ensure IDs are strings for consistency
id_cols = ['driverId', 'constructorId']
for col in id_cols:
    if col in races_df.columns:
        races_df[col] = races_df[col].astype(str)
    if col in qualifying_df.columns:
        qualifying_df[col] = qualifying_df[col].astype(str)
print("Basic data cleaning complete.")

CSV files loaded successfully.
Races data includes 2025 season up to round 10
Applying basic data cleaning...
Basic data cleaning complete.


In [3]:
# Cell 3: Feature Engineering Function & Initial Calculation

def calculate_features(df_races, df_qualifying):
    """
    Calculates rolling metrics and standings features on race data.
    Args:
        df_races (pd.DataFrame): DataFrame with race results.
        df_qualifying (pd.DataFrame): DataFrame with qualifying info for names.
    Returns:
        pd.DataFrame: DataFrame with added features.
    """
    print("Calculating features...")
    df_races_processed = df_races.sort_values(by=["season", "round", "date"]).copy()

    # --- Ensure Full Names are Present (Merge if necessary) ---
    # This is primarily for the historical data being processed.
    # For future predictions, names will come from the raw_grid_list.
    if 'driverFullName' not in df_races_processed.columns or 'constructorName' not in df_races_processed.columns:
        print("Full names not in races_df, attempting to merge from qualifying_df...")
        latest_qual_names = df_qualifying.sort_values(
            by="date", ascending=False
        ).drop_duplicates(subset=["driverId", "constructorId"])
        name_map_df = latest_qual_names[[
            "driverId", "constructorId", "driverFullName", "constructorName"
        ]].copy()

        driver_id_to_name = name_map_df.drop_duplicates(subset="driverId").set_index("driverId")["driverFullName"]
        constructor_id_to_name = name_map_df.drop_duplicates(subset="constructorId").set_index("constructorId")["constructorName"]

        df_races_processed["driverFullName"] = df_races_processed["driverId"].map(driver_id_to_name)
        df_races_processed["constructorName"] = df_races_processed["constructorId"].map(constructor_id_to_name)

        df_races_processed["driverFullName"].fillna(df_races_processed["driverId"], inplace=True)
        df_races_processed["constructorName"].fillna(df_races_processed["constructorId"], inplace=True)
        print("Names merged/filled in races_df.")
    else:
        print("Full names already present in races_df for feature calculation.")

    # --- Target Variable ---
    df_races_processed["is_winner"] = (df_races_processed["position"] == 1).astype(int)

    # --- Grid Handling ---
    df_races_processed["grid"] = df_races_processed["grid"].replace(0, 21).fillna(21)
    df_races_processed["grid"] = df_races_processed["grid"].astype(int)

    # --- Rolling Performance Metrics (per driver) ---
    df_races_processed = df_races_processed.sort_values(by=["driverId", "season", "round", "date"])
    rolling_features_cols = ["points", "position", "grid"]
    for feature_col in rolling_features_cols:
        # Calculate rolling mean
        roll_mean = df_races_processed.groupby("driverId")[feature_col].rolling(window=5, min_periods=1).mean()
        # Drop the driverId level created by groupby, shift, and assign back
        
        # Shift within each group (driverId is the first level of roll_mean's MultiIndex)
        # then drop the driverId index level to align for assignment.
        df_races_processed[f"avg_{feature_col}_last_5"] = roll_mean.groupby(level=0).shift(1).reset_index(level=0, drop=True)


    # --- Championship Standings (Points Before Race) ---
    df_races_processed["season_points"] = df_races_processed.groupby(["season", "driverId"])["points"].cumsum()
    df_races_processed["points_standings_prev_race"] = df_races_processed.groupby(["season", "driverId"])["season_points"].shift(1)

    # --- Handle NaNs created by shift/rolling ---
    df_races_processed["avg_points_last_5"].fillna(0, inplace=True)
    df_races_processed["avg_position_last_5"].fillna(21, inplace=True)
    df_races_processed["avg_grid_last_5"].fillna(21, inplace=True)
    df_races_processed["points_standings_prev_race"].fillna(0, inplace=True)

    df_races_processed = df_races_processed.drop(columns=["season_points"], errors='ignore')
    print("Features calculated.")
    return df_races_processed.sort_values(by=["season", "round", "date"])

# --- Execute Feature Engineering on Initial Data ---
# Pass copies to avoid modifying original DataFrames in memory if re-running cells
data_df_featured = calculate_features(races_df.copy(), qualifying_df.copy())

# --- Create Name -> ID maps (for prepare_grid_for_prediction) ---
# These are built from the *final* data_df_featured which should have your manually cleaned names
# and all driver/constructor IDs present in your historical data.
latest_driver_entries = data_df_featured.drop_duplicates(subset="driverFullName", keep="last")
latest_driver_name_to_id_map = latest_driver_entries.set_index("driverFullName")["driverId"].to_dict()

latest_constructor_entries = data_df_featured.drop_duplicates(subset="constructorName", keep="last")
latest_constructor_name_to_id_map = latest_constructor_entries.set_index("constructorName")["constructorId"].to_dict()

print("\nName -> ID maps created from processed data for prediction input handling.")
print("Sample of featured data:")
print(data_df_featured[[
    "season", "round", "driverFullName", "constructorName", "grid", "position",
    "avg_points_last_5", "points_standings_prev_race", "is_winner"
]].tail())

Calculating features...
Full names already present in races_df for feature calculation.
Features calculated.

Name -> ID maps created from processed data for prediction input handling.
Sample of featured data:
      season  round  driverFullName               constructorName  grid  \
2322    2025     10   Oscar Piastri              McLaren-Mercedes     3   
2319    2025     10  George Russell                      Mercedes     1   
2328    2025     10    Carlos Sainz             Williams-Mercedes    17   
2335    2025     10    Lance Stroll  Aston Martin Aramco-Mercedes    18   
2330    2025     10    Yuki Tsunoda    Red Bull Racing-Honda RBPT    11   

      position  avg_points_last_5  points_standings_prev_race  is_winner  
2322         4               21.0                       172.0          0  
2319         1                8.6                       101.0          1  
2328        10                2.2                        12.0          0  
2335        17                0.0      

In [4]:
# Cell 4: Model Definition and Preprocessing Setup

# Define features including the new ones
features = [
    "grid",
    "circuitId",
    "driverId",
    "constructorId",
    "avg_points_last_5",
    "avg_position_last_5",
    "avg_grid_last_5",
    "points_standings_prev_race",
]
target = "is_winner"

# Define numerical and categorical features FOR THE PREPROCESSOR
numerical_features = [
    "grid",
    "avg_points_last_5",
    "avg_position_last_5",
    "avg_grid_last_5",
    "points_standings_prev_race",
]
categorical_features = ["circuitId", "driverId", "constructorId"]

# Create preprocessing pipelines
numerical_transformer = SimpleImputer(strategy="median")
categorical_transformer = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="most_frequent")),
    (
        "onehot",
        OneHotEncoder(
            handle_unknown="ignore", sparse_output=False
        ),
    ),
])

# Create the preprocessor object
preprocessor = ColumnTransformer(transformers=[
    ("num", numerical_transformer, numerical_features),
    ("cat", categorical_transformer, categorical_features),
])

print("Preprocessor configured with new features.")
print("Features for model:", features)

Preprocessor configured with new features.
Features for model: ['grid', 'circuitId', 'driverId', 'constructorId', 'avg_points_last_5', 'avg_position_last_5', 'avg_grid_last_5', 'points_standings_prev_race']


In [5]:
# Cell 5 (modified for XGBoost)

from xgboost import XGBClassifier  # ← import XGBClassifier instead of sklearn’s GradientBoostingClassifier

# (Keep the rest of your imports the same: pandas, joblib, os, etc.)

# Select features and target from the data with calculated features
X = data_df_featured[features]
y = data_df_featured[target]

# Create the full model pipeline, swapping in XGBClassifier
model_pipeline = Pipeline(steps=[
    ("preprocessor", preprocessor),
    (
        "classifier",
        XGBClassifier(
            n_estimators=200,       # you can tune this
            learning_rate=0.1,      # equivalent to GBC’s learning_rate
            max_depth=3,            # same as before
            subsample=0.8,          # fraction of rows per tree
            colsample_bytree=0.8,   # fraction of columns per tree (optional)
            random_state=42,
            use_label_encoder=False,  # suppress deprecation warning
            eval_metric="logloss"     # recommended for binary classification
        ),
    ),
])

print("Training the XGBoost model with new features...")
if "X" in locals() and "y" in locals() and not X.empty and not y.empty:
    if X.shape[0] != y.shape[0]:
        print(f"Error: X and y have mismatched samples. X: {X.shape[0]}, y: {y.shape[0]}")
    else:
        # Fit
        model_pipeline.fit(X, y)
        print("Model training complete.")

        # Save the trained XGBoost pipeline
        model_filename = "joblogs/f1_winner_predictor_model_xgb.joblib"
        try:
            os.makedirs("joblogs", exist_ok=True)
            joblib.dump(model_pipeline, model_filename)
            print(f"Trained XGBoost model saved to {model_filename}")
        except Exception as e:
            print(f"Error saving model: {e}")
else:
    print(
        "Error: Feature data (X) or target data (y) not found or empty."
        " Please run the feature engineering cell (Cell 3) successfully."
    )


Training the XGBoost model with new features...
Model training complete.
Trained XGBoost model saved to joblogs/f1_winner_predictor_model_xgb.joblib


In [6]:
# Cell 6: Reusable Grid Preparation Function

# Define known team name changes/mappings for future seasons if needed
# Map NEW team name (key) to the constructorId used in TRAINING data (value)
TEAM_REBRAND_MAP = {
    "Red Bull Racing Honda RBPT": "red_bull",
    
    "McLaren Mercedes": "mclaren",
    "McLaren-Mercedes": "mclaren", # Added variation
    
    "Ferrari": "ferrari",
    
    "Mercedes": "mercedes",
    
    "Racing Bulls Honda RBPT": "rb",
    "Racing Bulls-Honda RBPT": "rb", # Added variation
    
    "Williams Mercedes": "williams",
    "Williams-Mercedes": "williams", # Added variation
    
    "Haas Ferrari": "haas",
    "Haas-Ferrari": "haas", # Added variation
    
    "Alpine Renault": "alpine",
    "Alpine-Renault": "alpine", # Added variation
    
    "Aston Martin Aramco Mercedes": "aston_martin",
    "Aston Martin Aramco-Mercedes": "aston_martin", # Added variation
    
    "Kick Sauber Ferrari": "sauber",
    "Kick Sauber-Ferrari": "sauber", # Added variation
    
    
    # Historical for completeness
    "AlphaTauri": "alphatauri",
    "Racing Point": "racing_point",
    "Alfa Romeo": "alfa",
    "Renault": "renault",
    "RB F1 Team": "rb",
    "Sauber": "sauber",
}

def prepare_grid_for_prediction(
    raw_grid_list, # List of dictionaries with driver and team names
    driver_name_to_id_map_hist, # Historical Name -> ID map
    constructor_name_to_id_map_hist, # Historical Name -> ID map
    team_rebrand_map_current # Current Season Team Name -> Historical ID map
):
    """
    Processes a raw grid list.
    It expects 'driverFullName' and 'constructorName' in raw_grid_list.
    It will try to find existing 'driverId' and 'constructorId' using the maps.
    If not found, it creates placeholder IDs.
    """
    prepared_grid = []
    print("Preparing grid for prediction...")

    for entry in raw_grid_list:
        driver_name = entry["driver"] # Expects 'driver' key for full name
        team_name = entry["team"]     # Expects 'team' key for current full team name
        grid_pos = entry["grid"]      # Expects 'grid' key for grid position

        # --- Determine driverId ---
        driver_id = driver_name_to_id_map_hist.get(driver_name)
        if driver_id is None:
            driver_id = f"new_driver_{driver_name.lower().replace(' ', '_')}"
            print(f"Note: Using placeholder ID for new/unmapped driver: {driver_name} -> {driver_id}")

        # --- Determine constructorId for the model ---
        
        # 1. Try current season rebrand map
        constructor_id_for_model = team_rebrand_map_current.get(team_name)
        
        # 2. If not in rebrand, try historical name map (in case an old name is used)
        if constructor_id_for_model is None:
            constructor_id_for_model = constructor_name_to_id_map_hist.get(team_name)
            
        # 3. If still not found, it's a truly new/unmapped team for the model
        if constructor_id_for_model is None:
            constructor_id_for_model = f"new_team_{team_name.lower().replace(' ', '_')}"
            print(f"Note: Using placeholder ID for new/unmapped team: {team_name} -> {constructor_id_for_model}")

        prepared_grid.append({
            "driverId": str(driver_id),
            "constructorId": str(constructor_id_for_model),
            "grid": grid_pos,
            "driverFullName": driver_name, # This is the name for display
            "constructorName": team_name, # This is the team name for display
        })
    print("Grid preparation complete.")
    return prepared_grid

print("Grid preparation function defined.")

Grid preparation function defined.


In [7]:
# Cell 7: Prediction Function Definition

def predict_race_winner_probabilities(
    circuit_id, # Informational
    predict_rows_featured,
    model,
    model_features_list,
    driver_detail_map # Map of {model_driver_id: {'FullName': ..., 'ConstructorName': ..., 'Grid': ...}}
    ):
    """
    Predicts the win probability for each driver in a given grid DataFrame.
    Accepts pre-calculated features and a driver detail map for display names.
    """
    if predict_rows_featured.empty:
        print("Error: predict_rows_featured DataFrame is empty.")
        return {}
    if not model:
        print("Error: Model is not provided or not trained.")
        return {}
    if not driver_detail_map:
        print("Error: driver_detail_map not provided.")
        return {}

    required_cols = model_features_list + ["driverId"] # 'grid' is in model_features_list
    if not all(col in predict_rows_featured.columns for col in required_cols):
        missing = [col for col in required_cols if col not in predict_rows_featured.columns]
        print(f"Error: predict_rows_featured DataFrame is missing required columns: {missing}")
        return {}

    try:
        predict_X = predict_rows_featured[model_features_list]
        probabilities = model.predict_proba(predict_X)
        win_probabilities = probabilities[:, 1]

        total_prob = np.sum(win_probabilities)
        if total_prob > 0:
            normalized_probs = win_probabilities / total_prob
        else:
            print("Warning: Model predicted zero probability for all drivers. Assigning equal probability.")
            normalized_probs = np.ones(len(predict_X)) / len(predict_X)

        results = {}
        for i, index in enumerate(predict_rows_featured.index):
            model_driver_id = predict_rows_featured.loc[index, "driverId"]
            details = driver_detail_map.get(model_driver_id)

            if details:
                 results[model_driver_id] = {
                    "Probability": normalized_probs[i],
                    "DriverFullName": details["FullName"],
                    "ConstructorName": details["ConstructorName"],
                    "Grid": details["Grid"],
                }
            else:
                print(f"Warning: Could not find display details for driverId '{model_driver_id}'. Using ID as name.")
                results[model_driver_id] = {
                    "Probability": normalized_probs[i],
                    "DriverFullName": model_driver_id,
                    "ConstructorName": predict_rows_featured.loc[index, "constructorId"] if "constructorId" in predict_rows_featured.columns else "Unknown",
                    "Grid": predict_rows_featured.loc[index, "grid"],
                }

        return dict(sorted(results.items(), key=lambda item: item[1]["Probability"], reverse=True))

    except Exception as e:
        print(f"An error occurred during prediction: {e}")
        import traceback
        traceback.print_exc()
        return {}

print("Prediction function updated.")

Prediction function updated.


In [8]:
# Cell 8: Wrapper Function for Prediction and Display (Corrected Arguments)

import os
import joblib # For loading model if you choose to load it inside this wrapper
from IPython.display import display, Markdown

def predict_and_display_results(
    circuit_id,
    future_season,
    future_round,
    raw_grid_list,
    model, # Expecting the trained model pipeline to be passed
    base_races_df,
    base_qualifying_df,
    # Corrected argument names to match what prepare_grid_for_prediction expects
    driver_name_to_id_hist_map,
    constructor_name_to_id_hist_map,
    team_rebrand_map_current,
    model_features_list, # The list of features the model was trained on
    race_description="Future Race",
    save_path="predictions_XGB" # Folder to save CSV predictions
):
    """
    Orchestrates prediction: prepares grid, adds it to history, recalculates features,
    runs prediction using the provided model, displays results, and saves results to CSV.
    """
    # --- Create save directory if it doesn't exist ---
    if save_path and not os.path.exists(save_path):
        try:
            os.makedirs(save_path)
            print(f"Created directory: {save_path}")
        except OSError as e:
            print(f"Error creating directory {save_path}: {e}")
            save_path = None # Disable saving if directory creation fails
    # -------------------------------------------------

    print(f"--- Predicting for: {race_description} ({circuit_id}) ---")

    print("Preparing future grid data (mapping names to IDs)...")
    # Pass the correctly named map arguments here
    prepared_grid_list_with_names = prepare_grid_for_prediction(
        raw_grid_list,
        driver_name_to_id_hist_map,
        constructor_name_to_id_hist_map,
        team_rebrand_map_current
    )
    if not prepared_grid_list_with_names:
        print("Grid preparation failed. Cannot predict.")
        return

    # Create the definitive map for display names
    driver_detail_map_for_display = {
        item["driverId"]: {
            "FullName": item["driverFullName"],
            "ConstructorName": item["constructorName"],
            "Grid": item["grid"]
        } for item in prepared_grid_list_with_names
    }

    print("Creating temporary DataFrame for feature calculation...")
    future_race_df_for_features = pd.DataFrame(prepared_grid_list_with_names)[
        ['driverId', 'constructorId', 'grid'] # Use model IDs here
    ].copy()
    future_race_df_for_features["season"] = future_season
    future_race_df_for_features["round"] = future_round
    future_race_df_for_features["circuitId"] = circuit_id
    future_race_df_for_features["date"] = pd.Timestamp.now() # Placeholder
    future_race_df_for_features["position"] = np.nan
    future_race_df_for_features["points"] = 0.0

    print("Combining with historical data...")
    required_base_cols = ['season', 'round', 'date', 'driverId', 'constructorId', 'circuitId', 'grid', 'position', 'points']
    if not all(col in base_races_df.columns for col in required_base_cols):
        print(f"Error: base_races_df is missing required columns. Needed: {required_base_cols}")
        return
    combined_df = pd.concat([base_races_df[required_base_cols], future_race_df_for_features], ignore_index=True)

    print("Recalculating features on combined data...")
    combined_featured_df = calculate_features(combined_df, base_qualifying_df) # base_qualifying_df is used for name mapping within calculate_features if needed

    print("Isolating prediction rows...")
    predict_rows_featured = combined_featured_df[
        (combined_featured_df["season"] == future_season) &
        (combined_featured_df["round"] == future_round)
    ].copy()

    if predict_rows_featured.empty:
        print("Error: Could not find rows for the future race after feature calculation.")
        return

    model_ready = model is not None and hasattr(model, 'predict_proba') and hasattr(model, 'steps')

    if model_ready:
        print(f"\nRunning prediction for {race_description}...")
        winner_probs_dict = predict_race_winner_probabilities(
            circuit_id,
            predict_rows_featured,
            model,
            model_features_list,
            driver_detail_map_for_display
        )

        print(f"\nPredicted Win Probabilities ({race_description}):")
        if winner_probs_dict:
            results_list = [
                {
                    "Driver": details["DriverFullName"],
                    "Grid": details["Grid"],
                    "Team": details["ConstructorName"],
                    "Probability_Num": details["Probability"],
                }
                for _, details in winner_probs_dict.items()
            ]
            results_df = pd.DataFrame(results_list)

            if save_path:
                safe_filename = "".join(c if c.isalnum() else "_" for c in race_description)
                csv_filename = os.path.join(save_path, f"{future_season}_R{future_round:02d}_{safe_filename}_{circuit_id}_predictions_XGB.csv")
                try:
                    results_df.to_csv(csv_filename, index=False, float_format='%.6f')
                    print(f"Predictions saved to: {csv_filename}")
                except Exception as e:
                    print(f"Error saving predictions to CSV: {e}")

            display_df = results_df.copy()
            display_df["Probability"] = display_df["Probability_Num"].map("{:.2%}".format)
            display_df = display_df.drop(columns=["Probability_Num"])

            markdown_table = "| Driver             | Grid | Team                           | Probability |\n"
            markdown_table += "|--------------------|------|--------------------------------|-------------|\n"
            for _, row in display_df.iterrows():
                 markdown_table += (
                    f"| {row['Driver']:<18} | {row['Grid']:<4} |"
                    f" {row['Team']:<30} | {row['Probability']:>11} |\n"
                )
            display(Markdown(markdown_table))
        else:
            print("Prediction failed or returned no results.")
    else:
         print("Model was not provided or does not appear to be a valid trained model.")

print("Prediction and display wrapper function arguments corrected and defined.")

Prediction and display wrapper function arguments corrected and defined.


In [9]:
albert_park_2025_raw_grid = [
    # McLaren Mercedes
    {'driver': 'Lando Norris',       'team': 'McLaren Mercedes',              'grid': 1},
    {'driver': 'Oscar Piastri',      'team': 'McLaren Mercedes',              'grid': 2},
    
    # Red Bull Racing Honda RBPT
    {'driver': 'Max Verstappen',     'team': 'Red Bull Racing Honda RBPT',    'grid': 3},
    {'driver': 'Liam Lawson',        'team': 'Red Bull Racing Honda RBPT',    'grid': 18},
    
    # Ferrari
    {'driver': 'Charles Leclerc',    'team': 'Ferrari',                        'grid': 7},
    {'driver': 'Lewis Hamilton',     'team': 'Ferrari',                        'grid': 8},
    
    # Mercedes
    {'driver': 'George Russell',     'team': 'Mercedes',                       'grid': 4},
    {'driver': 'Kimi Antonelli',     'team': 'Mercedes',                       'grid': 16},
    
    # Alpine Renault
    {'driver': 'Pierre Gasly',       'team': 'Alpine Renault',                 'grid': 9},
    {'driver': 'Jack Doohan',        'team': 'Alpine Renault',                 'grid': 14},
    
    # Williams Mercedes
    {'driver': 'Alexander Albon',    'team': 'Williams Mercedes',              'grid': 6},
    {'driver': 'Carlos Sainz',       'team': 'Williams Mercedes',              'grid': 10},
    
    # Racing Bulls Honda RBPT
    {'driver': 'Yuki Tsunoda',       'team': 'Racing Bulls Honda RBPT',        'grid': 5},
    {'driver': 'Isack Hadjar',       'team': 'Racing Bulls Honda RBPT',        'grid': 11},
    
    # Aston Martin Aramco Mercedes
    {'driver': 'Fernando Alonso',    'team': 'Aston Martin Aramco Mercedes',   'grid': 12},
    {'driver': 'Lance Stroll',       'team': 'Aston Martin Aramco Mercedes',   'grid': 13},
    
    # Kick Sauber Ferrari
    {'driver': 'Gabriel Bortoleto',  'team': 'Kick Sauber Ferrari',            'grid': 15},
    {'driver': 'Nico Hulkenberg',    'team': 'Kick Sauber Ferrari',            'grid': 17},
    
    # Haas Ferrari
    {'driver': 'Esteban Ocon',       'team': 'Haas Ferrari',                   'grid': 19},
    {'driver': 'Oliver Bearman',     'team': 'Haas Ferrari',                   'grid': 20},
]

# Prepare the grid for prediction using the defined function
future_race_circuit_albert_park = "albert_park"
future_season_albert_park = 2025
future_round_albert_park = 1
race_description_albert_park = "2025 Australian Grand Prix"

print(f"Grid defined for {race_description_albert_park}")

# --- Load the Saved Model ---
model_filename = "joblogs/f1_winner_predictor_model_XGB.joblib" # Changed filename for GBC model
loaded_model = None
try:
    loaded_model = joblib.load(model_filename)
    print(f"Loaded trained model from {model_filename}")
except FileNotFoundError:
    print(f"Error: Saved model file '{model_filename}' not found. Please train the model first (run Cell 5).")
    # Optionally, you could trigger training here if the model isn't found,
    # but for now, we'll assume it should exist.
except Exception as e:
    print(f"Error loading model: {e}")
# --------------------------

# --- Execute Prediction ---
# Check if model loaded successfully before proceeding
if loaded_model:
    predict_and_display_results(
        circuit_id=future_race_circuit_albert_park,
        future_season=future_season_albert_park,
        future_round=future_round_albert_park,
        raw_grid_list=albert_park_2025_raw_grid, # Defined in Cell 9
        model=loaded_model, # Use the loaded model
        base_races_df=races_df, # Original races_df from Cell 2 (with 2025 Bahrain data)
        base_qualifying_df=qualifying_df, # Original qualifying_df from Cell 2
        driver_name_to_id_hist_map=latest_driver_name_to_id_map, # From Cell 3
        constructor_name_to_id_hist_map=latest_constructor_name_to_id_map, # From Cell 3
        team_rebrand_map_current=TEAM_REBRAND_MAP, # From Cell 6
        model_features_list=features, # Defined in Cell 4
        race_description=race_description_albert_park, # Defined in Cell 9
        save_path="predictions_XGB" # Specify folder to save CSV
    )
else:
    print("Prediction skipped because the model could not be loaded.")

Grid defined for 2025 Australian Grand Prix
Loaded trained model from joblogs/f1_winner_predictor_model_XGB.joblib
--- Predicting for: 2025 Australian Grand Prix (albert_park) ---
Preparing future grid data (mapping names to IDs)...
Preparing grid for prediction...
Grid preparation complete.
Creating temporary DataFrame for feature calculation...
Combining with historical data...
Recalculating features on combined data...
Calculating features...
Full names not in races_df, attempting to merge from qualifying_df...
Names merged/filled in races_df.
Features calculated.
Isolating prediction rows...

Running prediction for 2025 Australian Grand Prix...

Predicted Win Probabilities (2025 Australian Grand Prix):
Predictions saved to: predictions_XGB/2025_R01_2025_Australian_Grand_Prix_albert_park_predictions_XGB.csv


| Driver             | Grid | Team                           | Probability |
|--------------------|------|--------------------------------|-------------|
| Lando Norris       | 1    | McLaren Mercedes               |      29.74% |
| Oscar Piastri      | 2    | McLaren Mercedes               |       7.51% |
| Max Verstappen     | 3    | Red Bull Racing Honda RBPT     |       3.83% |
| George Russell     | 4    | Mercedes                       |       2.54% |
| Charles Leclerc    | 7    | Ferrari                        |       0.20% |
| Lewis Hamilton     | 8    | Ferrari                        |       0.17% |
| Yuki Tsunoda       | 5    | Racing Bulls Honda RBPT        |       0.07% |
| Alexander Albon    | 6    | Williams Mercedes              |       0.07% |
| Pierre Gasly       | 9    | Alpine Renault                 |       0.06% |
| Carlos Sainz       | 10   | Williams Mercedes              |       0.06% |
| Kimi Antonelli     | 16   | Mercedes                       |       0.05% |
| Jack Doohan        | 14   | Alpine Renault                 |       0.03% |
| Isack Hadjar       | 11   | Racing Bulls Honda RBPT        |       0.02% |
| Gabriel Bortoleto  | 15   | Kick Sauber Ferrari            |       0.02% |
| Liam Lawson        | 18   | Red Bull Racing Honda RBPT     |       0.02% |
| Oliver Bearman     | 20   | Haas Ferrari                   |       0.01% |
| Lance Stroll       | 13   | Aston Martin Aramco Mercedes   |       0.01% |
| Fernando Alonso    | 12   | Aston Martin Aramco Mercedes   |       0.01% |
| Esteban Ocon       | 19   | Haas Ferrari                   |       0.01% |
| Nico Hulkenberg    | 17   | Kick Sauber Ferrari            |       0.01% |


In [10]:
# Define Grid for 2025 Chinese Grand Prix

shanghai_2025_raw_grid = [
    # McLaren Mercedes
    {'driver': 'Oscar Piastri',      'team': 'McLaren Mercedes',              'grid': 1},
    {'driver': 'Lando Norris',       'team': 'McLaren Mercedes',              'grid': 3},

    # Mercedes
    {'driver': 'George Russell',     'team': 'Mercedes',                      'grid': 2},
    {'driver': 'Kimi Antonelli',     'team': 'Mercedes',                      'grid': 8},

    # Red Bull Racing Honda RBPT
    {'driver': 'Max Verstappen',     'team': 'Red Bull Racing Honda RBPT',    'grid': 4},
    {'driver': 'Liam Lawson',        'team': 'Red Bull Racing Honda RBPT',    'grid': 20},

    # Ferrari
    {'driver': 'Lewis Hamilton',     'team': 'Ferrari',                       'grid': 5},
    {'driver': 'Charles Leclerc',    'team': 'Ferrari',                       'grid': 6},

    # Racing Bulls Honda RBPT
    {'driver': 'Isack Hadjar',       'team': 'Racing Bulls Honda RBPT',       'grid': 7},
    {'driver': 'Yuki Tsunoda',       'team': 'Racing Bulls Honda RBPT',       'grid': 9},

    # Williams Mercedes
    {'driver': 'Alexander Albon',    'team': 'Williams Mercedes',             'grid': 10},
    {'driver': 'Carlos Sainz',       'team': 'Williams Mercedes',             'grid': 15},

    # Haas Ferrari
    {'driver': 'Esteban Ocon',       'team': 'Haas Ferrari',                  'grid': 11},
    {'driver': 'Oliver Bearman',     'team': 'Haas Ferrari',                  'grid': 17},

    # Kick Sauber Ferrari
    {'driver': 'Nico Hulkenberg',    'team': 'Kick Sauber Ferrari',           'grid': 12},
    {'driver': 'Gabriel Bortoleto',  'team': 'Kick Sauber Ferrari',           'grid': 19},

    # Aston Martin Aramco Mercedes
    {'driver': 'Fernando Alonso',    'team': 'Aston Martin Aramco Mercedes', 'grid': 13},
    {'driver': 'Lance Stroll',       'team': 'Aston Martin Aramco Mercedes', 'grid': 14},

    # Alpine Renault
    {'driver': 'Pierre Gasly',       'team': 'Alpine Renault',                'grid': 16},
    {'driver': 'Jack Doohan',        'team': 'Alpine Renault',                'grid': 18},
]


# Prepare the grid for prediction using the defined function
future_race_circuit_shanghai = "shanghai"
future_season_shanghai = 2025
future_round_shanghai = 2
race_description_shanghai = "2025 Chinese Grand Prix"

print(f"Grid defined for {race_description_shanghai}")

# Cell 10: Execute Prediction for 2025 Chinese Grand Prix

# --- Load the Saved Model ---
model_filename = "joblogs/f1_winner_predictor_model_XGB.joblib" # Changed filename for GBC model
loaded_model = None
try:
    loaded_model = joblib.load(model_filename)
    print(f"Loaded trained model from {model_filename}")
except FileNotFoundError:
    print(f"Error: Saved model file '{model_filename}' not found. Please train the model first (run Cell 5).")
    # Optionally, you could trigger training here if the model isn't found,
    # but for now, we'll assume it should exist.
except Exception as e:
    print(f"Error loading model: {e}")
# --------------------------

# --- Execute Prediction ---
# Check if model loaded successfully before proceeding
if loaded_model:
    predict_and_display_results(
        circuit_id=future_race_circuit_shanghai,
        future_season=future_season_shanghai,
        future_round=future_round_shanghai,
        raw_grid_list=shanghai_2025_raw_grid, # Defined in Cell 9
        model=loaded_model, # Use the loaded model
        base_races_df=races_df, # Original races_df from Cell 2 (with 2025 Bahrain data)
        base_qualifying_df=qualifying_df, # Original qualifying_df from Cell 2
        driver_name_to_id_hist_map=latest_driver_name_to_id_map, # From Cell 3
        constructor_name_to_id_hist_map=latest_constructor_name_to_id_map, # From Cell 3
        team_rebrand_map_current=TEAM_REBRAND_MAP, # From Cell 6
        model_features_list=features, # Defined in Cell 4
        race_description=race_description_shanghai, # Defined in Cell 9
        save_path="predictions_XGB" # Specify folder to save CSV
    )
else:
    print("Prediction skipped because the model could not be loaded.")

Grid defined for 2025 Chinese Grand Prix
Loaded trained model from joblogs/f1_winner_predictor_model_XGB.joblib
--- Predicting for: 2025 Chinese Grand Prix (shanghai) ---
Preparing future grid data (mapping names to IDs)...
Preparing grid for prediction...
Grid preparation complete.
Creating temporary DataFrame for feature calculation...
Combining with historical data...
Recalculating features on combined data...
Calculating features...
Full names not in races_df, attempting to merge from qualifying_df...
Names merged/filled in races_df.
Features calculated.
Isolating prediction rows...

Running prediction for 2025 Chinese Grand Prix...

Predicted Win Probabilities (2025 Chinese Grand Prix):
Predictions saved to: predictions_XGB/2025_R02_2025_Chinese_Grand_Prix_shanghai_predictions_XGB.csv


| Driver             | Grid | Team                           | Probability |
|--------------------|------|--------------------------------|-------------|
| Oscar Piastri      | 1    | McLaren Mercedes               |      33.52% |
| George Russell     | 2    | Mercedes                       |       6.77% |
| Max Verstappen     | 4    | Red Bull Racing Honda RBPT     |       3.64% |
| Lando Norris       | 3    | McLaren Mercedes               |       2.22% |
| Kimi Antonelli     | 8    | Mercedes                       |       0.26% |
| Charles Leclerc    | 6    | Ferrari                        |       0.18% |
| Alexander Albon    | 10   | Williams Mercedes              |       0.11% |
| Isack Hadjar       | 7    | Racing Bulls Honda RBPT        |       0.11% |
| Lewis Hamilton     | 5    | Ferrari                        |       0.09% |
| Yuki Tsunoda       | 9    | Racing Bulls Honda RBPT        |       0.08% |
| Carlos Sainz       | 15   | Williams Mercedes              |       0.05% |
| Jack Doohan        | 18   | Alpine Renault                 |       0.03% |
| Gabriel Bortoleto  | 19   | Kick Sauber Ferrari            |       0.02% |
| Liam Lawson        | 20   | Red Bull Racing Honda RBPT     |       0.02% |
| Oliver Bearman     | 17   | Haas Ferrari                   |       0.01% |
| Fernando Alonso    | 13   | Aston Martin Aramco Mercedes   |       0.01% |
| Lance Stroll       | 14   | Aston Martin Aramco Mercedes   |       0.01% |
| Pierre Gasly       | 16   | Alpine Renault                 |       0.01% |
| Nico Hulkenberg    | 12   | Kick Sauber Ferrari            |       0.01% |
| Esteban Ocon       | 11   | Haas Ferrari                   |       0.01% |


In [11]:
# Cell 9: Define Grid for 2025 Japanese Grand Prix

suzuka_2025_raw_grid = [
    # Red Bull Racing Honda RBPT
    {'driver': 'Max Verstappen',     'team': 'Red Bull Racing Honda RBPT',    'grid': 1},
    {'driver': 'Yuki Tsunoda',       'team': 'Red Bull Racing Honda RBPT',    'grid': 14},

    # McLaren Mercedes
    {'driver': 'Lando Norris',       'team': 'McLaren Mercedes',              'grid': 2},
    {'driver': 'Oscar Piastri',      'team': 'McLaren Mercedes',              'grid': 3},

    # Ferrari
    {'driver': 'Charles Leclerc',    'team': 'Ferrari',                       'grid': 4},
    {'driver': 'Lewis Hamilton',     'team': 'Ferrari',                       'grid': 8},

    # Mercedes
    {'driver': 'George Russell',     'team': 'Mercedes',                      'grid': 5},
    {'driver': 'Kimi Antonelli',     'team': 'Mercedes',                      'grid': 6},

    # Racing Bulls Honda RBPT
    {'driver': 'Isack Hadjar',       'team': 'Racing Bulls Honda RBPT',       'grid': 7},
    {'driver': 'Liam Lawson',        'team': 'Racing Bulls Honda RBPT',       'grid': 13},

    # Williams Mercedes
    {'driver': 'Alexander Albon',    'team': 'Williams Mercedes',             'grid': 9},
    {'driver': 'Carlos Sainz',       'team': 'Williams Mercedes',             'grid': 15},

    # Haas Ferrari
    {'driver': 'Oliver Bearman',     'team': 'Haas Ferrari',                  'grid': 10},
    {'driver': 'Esteban Ocon',       'team': 'Haas Ferrari',                  'grid': 18},

    # Alpine Renault
    {'driver': 'Pierre Gasly',       'team': 'Alpine Renault',                'grid': 11},
    {'driver': 'Jack Doohan',        'team': 'Alpine Renault',                'grid': 19},

    # Aston Martin Aramco Mercedes
    {'driver': 'Fernando Alonso',    'team': 'Aston Martin Aramco Mercedes',  'grid': 12},
    {'driver': 'Lance Stroll',       'team': 'Aston Martin Aramco Mercedes',  'grid': 20},

    # Kick Sauber Ferrari
    {'driver': 'Nico Hulkenberg',    'team': 'Kick Sauber Ferrari',           'grid': 16},
    {'driver': 'Gabriel Bortoleto',  'team': 'Kick Sauber Ferrari',           'grid': 17},
]


# Prepare the grid for prediction using the defined function
future_race_circuit_suzuka = "suzuka"
future_season_suzuka = 2025
future_round_suzuka = 3
race_description_suzuka = "2025 Japanese Grand Prix"

print(f"Grid defined for {race_description_suzuka}")

# Cell 10: Execute Prediction for 2025 Japanese Grand Prix

# --- Load the Saved Model ---
model_filename = "joblogs/f1_winner_predictor_model_XGB.joblib" # Changed filename for GBC model
loaded_model = None
try:
    loaded_model = joblib.load(model_filename)
    print(f"Loaded trained model from {model_filename}")
except FileNotFoundError:
    print(f"Error: Saved model file '{model_filename}' not found. Please train the model first (run Cell 5).")
    # Optionally, you could trigger training here if the model isn't found,
    # but for now, we'll assume it should exist.
except Exception as e:
    print(f"Error loading model: {e}")
# --------------------------

# --- Execute Prediction ---
# Check if model loaded successfully before proceeding
if loaded_model:
    predict_and_display_results(
        circuit_id=future_race_circuit_suzuka,
        future_season=future_season_suzuka,
        future_round=future_round_suzuka,
        raw_grid_list=suzuka_2025_raw_grid, # Defined in Cell 9
        model=loaded_model, # Use the loaded model
        base_races_df=races_df, # Original races_df from Cell 2 (with 2025 Bahrain data)
        base_qualifying_df=qualifying_df, # Original qualifying_df from Cell 2
        driver_name_to_id_hist_map=latest_driver_name_to_id_map, # From Cell 3
        constructor_name_to_id_hist_map=latest_constructor_name_to_id_map, # From Cell 3
        team_rebrand_map_current=TEAM_REBRAND_MAP, # From Cell 6
        model_features_list=features, # Defined in Cell 4
        race_description=race_description_suzuka, # Defined in Cell 9
        save_path="predictions_XGB" # Specify folder to save CSV
    )
else:
    print("Prediction skipped because the model could not be loaded.")

Grid defined for 2025 Japanese Grand Prix
Loaded trained model from joblogs/f1_winner_predictor_model_XGB.joblib
--- Predicting for: 2025 Japanese Grand Prix (suzuka) ---
Preparing future grid data (mapping names to IDs)...
Preparing grid for prediction...
Grid preparation complete.
Creating temporary DataFrame for feature calculation...
Combining with historical data...
Recalculating features on combined data...
Calculating features...
Full names not in races_df, attempting to merge from qualifying_df...
Names merged/filled in races_df.
Features calculated.
Isolating prediction rows...

Running prediction for 2025 Japanese Grand Prix...

Predicted Win Probabilities (2025 Japanese Grand Prix):
Predictions saved to: predictions_XGB/2025_R03_2025_Japanese_Grand_Prix_suzuka_predictions_XGB.csv


| Driver             | Grid | Team                           | Probability |
|--------------------|------|--------------------------------|-------------|
| Max Verstappen     | 1    | Red Bull Racing Honda RBPT     |      36.63% |
| Oscar Piastri      | 3    | McLaren Mercedes               |      15.03% |
| Lando Norris       | 2    | McLaren Mercedes               |       7.03% |
| Charles Leclerc    | 4    | Ferrari                        |       0.58% |
| Kimi Antonelli     | 6    | Mercedes                       |       0.44% |
| George Russell     | 5    | Mercedes                       |       0.42% |
| Alexander Albon    | 9    | Williams Mercedes              |       0.13% |
| Oliver Bearman     | 10   | Haas Ferrari                   |       0.11% |
| Lewis Hamilton     | 8    | Ferrari                        |       0.06% |
| Isack Hadjar       | 7    | Racing Bulls Honda RBPT        |       0.05% |
| Jack Doohan        | 19   | Alpine Renault                 |       0.03% |
| Gabriel Bortoleto  | 17   | Kick Sauber Ferrari            |       0.02% |
| Liam Lawson        | 13   | Racing Bulls Honda RBPT        |       0.02% |
| Esteban Ocon       | 18   | Haas Ferrari                   |       0.02% |
| Yuki Tsunoda       | 14   | Red Bull Racing Honda RBPT     |       0.01% |
| Lance Stroll       | 20   | Aston Martin Aramco Mercedes   |       0.01% |
| Fernando Alonso    | 12   | Aston Martin Aramco Mercedes   |       0.01% |
| Nico Hulkenberg    | 16   | Kick Sauber Ferrari            |       0.01% |
| Pierre Gasly       | 11   | Alpine Renault                 |       0.01% |
| Carlos Sainz       | 15   | Williams Mercedes              |       0.01% |


In [12]:
# Cell 9: Define Grid for 2025 Bahrain Grand Prix

bahrain_2025_raw_grid = [
    # McLaren Mercedes
    {'driver': 'Oscar Piastri',      'team': 'McLaren Mercedes',              'grid': 1},
    {'driver': 'Lando Norris',       'team': 'McLaren Mercedes',              'grid': 6},

    # Ferrari
    {'driver': 'Charles Leclerc',    'team': 'Ferrari',                       'grid': 2},
    {'driver': 'Lewis Hamilton',     'team': 'Ferrari',                       'grid': 9},

    # Mercedes
    {'driver': 'George Russell',     'team': 'Mercedes',                      'grid': 3},
    {'driver': 'Kimi Antonelli',     'team': 'Mercedes',                      'grid': 5},

    # Alpine Renault
    {'driver': 'Pierre Gasly',       'team': 'Alpine Renault',                'grid': 4},
    {'driver': 'Jack Doohan',        'team': 'Alpine Renault',                'grid': 11},

    # Red Bull Racing Honda RBPT
    {'driver': 'Max Verstappen',     'team': 'Red Bull Racing Honda RBPT',    'grid': 7},
    {'driver': 'Yuki Tsunoda',       'team': 'Red Bull Racing Honda RBPT',    'grid': 10},

    # Williams Mercedes
    {'driver': 'Carlos Sainz',       'team': 'Williams Mercedes',             'grid': 8},
    {'driver': 'Alexander Albon',    'team': 'Williams Mercedes',             'grid': 15},

    # Racing Bulls Honda RBPT
    {'driver': 'Isack Hadjar',       'team': 'Racing Bulls Honda RBPT',       'grid': 12},
    {'driver': 'Liam Lawson',        'team': 'Racing Bulls Honda RBPT',       'grid': 17},

    # Aston Martin Aramco Mercedes
    {'driver': 'Fernando Alonso',    'team': 'Aston Martin Aramco Mercedes',  'grid': 13},
    {'driver': 'Lance Stroll',       'team': 'Aston Martin Aramco Mercedes',  'grid': 19},

    # Haas Ferrari
    {'driver': 'Esteban Ocon',       'team': 'Haas Ferrari',                  'grid': 14},
    {'driver': 'Oliver Bearman',     'team': 'Haas Ferrari',                  'grid': 20},

    # Kick Sauber Ferrari
    {'driver': 'Nico Hulkenberg',    'team': 'Kick Sauber Ferrari',           'grid': 16},
    {'driver': 'Gabriel Bortoleto',  'team': 'Kick Sauber Ferrari',           'grid': 18},
]



# Prepare the grid for prediction using the defined function
future_race_circuit_bahrain = "bahrain"
future_season_bahrain= 2025
future_round_bahrain = 4
race_description_bahrain = "2025 Bahrain Grand Prix"

print(f"Grid defined for {race_description_bahrain}")

# Cell 10: Execute Prediction for 2025 Bahrain Grand Prix

# --- Load the Saved Model ---
model_filename = "joblogs/f1_winner_predictor_model_XGB.joblib" # Changed filename for GBC model
loaded_model = None
try:
    loaded_model = joblib.load(model_filename)
    print(f"Loaded trained model from {model_filename}")
except FileNotFoundError:
    print(f"Error: Saved model file '{model_filename}' not found. Please train the model first (run Cell 5).")
    # Optionally, you could trigger training here if the model isn't found,
    # but for now, we'll assume it should exist.
except Exception as e:
    print(f"Error loading model: {e}")
# --------------------------

# --- Execute Prediction ---
# Check if model loaded successfully before proceeding
if loaded_model:
    predict_and_display_results(
        circuit_id=future_race_circuit_bahrain,
        future_season=future_season_bahrain,
        future_round=future_round_bahrain,
        raw_grid_list=bahrain_2025_raw_grid, # Defined in Cell 9
        model=loaded_model, # Use the loaded model
        base_races_df=races_df, # Original races_df from Cell 2 (with 2025 Bahrain data)
        base_qualifying_df=qualifying_df, # Original qualifying_df from Cell 2
        driver_name_to_id_hist_map=latest_driver_name_to_id_map, # From Cell 3
        constructor_name_to_id_hist_map=latest_constructor_name_to_id_map, # From Cell 3
        team_rebrand_map_current=TEAM_REBRAND_MAP, # From Cell 6
        model_features_list=features, # Defined in Cell 4
        race_description=race_description_bahrain, # Defined in Cell 9
        save_path="predictions_XGB" # Specify folder to save CSV
    )
else:
    print("Prediction skipped because the model could not be loaded.")

Grid defined for 2025 Bahrain Grand Prix
Loaded trained model from joblogs/f1_winner_predictor_model_XGB.joblib
--- Predicting for: 2025 Bahrain Grand Prix (bahrain) ---
Preparing future grid data (mapping names to IDs)...
Preparing grid for prediction...
Grid preparation complete.
Creating temporary DataFrame for feature calculation...
Combining with historical data...
Recalculating features on combined data...
Calculating features...
Full names not in races_df, attempting to merge from qualifying_df...
Names merged/filled in races_df.
Features calculated.
Isolating prediction rows...

Running prediction for 2025 Bahrain Grand Prix...

Predicted Win Probabilities (2025 Bahrain Grand Prix):
Predictions saved to: predictions_XGB/2025_R04_2025_Bahrain_Grand_Prix_bahrain_predictions_XGB.csv


| Driver             | Grid | Team                           | Probability |
|--------------------|------|--------------------------------|-------------|
| Oscar Piastri      | 1    | McLaren Mercedes               |      36.48% |
| Charles Leclerc    | 2    | Ferrari                        |       8.83% |
| Max Verstappen     | 7    | Red Bull Racing Honda RBPT     |       1.03% |
| George Russell     | 3    | Mercedes                       |       0.59% |
| Lando Norris       | 6    | McLaren Mercedes               |       0.56% |
| Pierre Gasly       | 4    | Alpine Renault                 |       0.18% |
| Kimi Antonelli     | 5    | Mercedes                       |       0.15% |
| Lewis Hamilton     | 9    | Ferrari                        |       0.14% |
| Yuki Tsunoda       | 10   | Red Bull Racing Honda RBPT     |       0.10% |
| Carlos Sainz       | 8    | Williams Mercedes              |       0.06% |
| Gabriel Bortoleto  | 18   | Kick Sauber Ferrari            |       0.03% |
| Jack Doohan        | 11   | Alpine Renault                 |       0.02% |
| Liam Lawson        | 17   | Racing Bulls Honda RBPT        |       0.02% |
| Alexander Albon    | 15   | Williams Mercedes              |       0.02% |
| Esteban Ocon       | 14   | Haas Ferrari                   |       0.02% |
| Lance Stroll       | 19   | Aston Martin Aramco Mercedes   |       0.01% |
| Oliver Bearman     | 20   | Haas Ferrari                   |       0.01% |
| Fernando Alonso    | 13   | Aston Martin Aramco Mercedes   |       0.01% |
| Isack Hadjar       | 12   | Racing Bulls Honda RBPT        |       0.01% |
| Nico Hulkenberg    | 16   | Kick Sauber Ferrari            |       0.01% |


In [13]:
# Cell 9: Define Grid for 2025 Saudi Arabian Grand Prix

jeddah_2025_raw_grid = [
    # Red Bull Racing Honda RBPT
    {'driver': 'Max Verstappen',     'team': 'Red Bull Racing Honda RBPT',    'grid': 1},
    {'driver': 'Yuki Tsunoda',       'team': 'Red Bull Racing Honda RBPT',    'grid': 8},

    # McLaren Mercedes
    {'driver': 'Oscar Piastri',      'team': 'McLaren Mercedes',              'grid': 2},
    {'driver': 'Lando Norris',       'team': 'McLaren Mercedes',              'grid': 10},

    # Mercedes
    {'driver': 'George Russell',     'team': 'Mercedes',                      'grid': 3},
    {'driver': 'Kimi Antonelli',     'team': 'Mercedes',                      'grid': 5},

    # Ferrari
    {'driver': 'Charles Leclerc',    'team': 'Ferrari',                       'grid': 4},
    {'driver': 'Lewis Hamilton',     'team': 'Ferrari',                       'grid': 7},

    # Williams Mercedes
    {'driver': 'Carlos Sainz',       'team': 'Williams Mercedes',             'grid': 6},
    {'driver': 'Alexander Albon',    'team': 'Williams Mercedes',             'grid': 11},

    # Alpine Renault
    {'driver': 'Pierre Gasly',       'team': 'Alpine Renault',                'grid': 9},
    {'driver': 'Jack Doohan',        'team': 'Alpine Renault',                'grid': 17},

    # Racing Bulls Honda RBPT
    {'driver': 'Liam Lawson',        'team': 'Racing Bulls Honda RBPT',       'grid': 12},
    {'driver': 'Isack Hadjar',       'team': 'Racing Bulls Honda RBPT',       'grid': 14},

    # Aston Martin Aramco Mercedes
    {'driver': 'Fernando Alonso',    'team': 'Aston Martin Aramco Mercedes',  'grid': 13},
    {'driver': 'Lance Stroll',       'team': 'Aston Martin Aramco Mercedes',  'grid': 16},

    # Haas Ferrari
    {'driver': 'Oliver Bearman',     'team': 'Haas Ferrari',                  'grid': 15},
    {'driver': 'Esteban Ocon',       'team': 'Haas Ferrari',                  'grid': 19},

    # Kick Sauber Ferrari
    {'driver': 'Nico Hulkenberg',    'team': 'Kick Sauber Ferrari',           'grid': 18},
    {'driver': 'Gabriel Bortoleto',  'team': 'Kick Sauber Ferrari',           'grid': 20},
]




# Prepare the grid for prediction using the defined function
future_race_circuit_jeddah = "jeddah"
future_season_jeddah= 2025
future_round_jeddah = 5
race_description_jeddah = "2025 Saudi Arabian Grand Prix"

print(f"Grid defined for {race_description_jeddah}")

# Cell 10: Execute Prediction for 2025 Saudi Arabian Grand Prix

# --- Load the Saved Model ---
model_filename = "joblogs/f1_winner_predictor_model_XGB.joblib" # Changed filename for GBC model
loaded_model = None
try:
    loaded_model = joblib.load(model_filename)
    print(f"Loaded trained model from {model_filename}")
except FileNotFoundError:
    print(f"Error: Saved model file '{model_filename}' not found. Please train the model first (run Cell 5).")
    # Optionally, you could trigger training here if the model isn't found,
    # but for now, we'll assume it should exist.
except Exception as e:
    print(f"Error loading model: {e}")
# --------------------------

# --- Execute Prediction ---
# Check if model loaded successfully before proceeding
if loaded_model:
    predict_and_display_results(
        circuit_id=future_race_circuit_jeddah,
        future_season=future_season_jeddah,
        future_round=future_round_jeddah,
        raw_grid_list=jeddah_2025_raw_grid, # Defined in Cell 9
        model=loaded_model, # Use the loaded model
        base_races_df=races_df, # Original races_df from Cell 2 (with 2025 Bahrain data)
        base_qualifying_df=qualifying_df, # Original qualifying_df from Cell 2
        driver_name_to_id_hist_map=latest_driver_name_to_id_map, # From Cell 3
        constructor_name_to_id_hist_map=latest_constructor_name_to_id_map, # From Cell 3
        team_rebrand_map_current=TEAM_REBRAND_MAP, # From Cell 6
        model_features_list=features, # Defined in Cell 4
        race_description=race_description_jeddah, # Defined in Cell 9
        save_path="predictions_XGB" # Specify folder to save CSV
    )
else:
    print("Prediction skipped because the model could not be loaded.")

Grid defined for 2025 Saudi Arabian Grand Prix
Loaded trained model from joblogs/f1_winner_predictor_model_XGB.joblib
--- Predicting for: 2025 Saudi Arabian Grand Prix (jeddah) ---
Preparing future grid data (mapping names to IDs)...
Preparing grid for prediction...
Grid preparation complete.
Creating temporary DataFrame for feature calculation...
Combining with historical data...
Recalculating features on combined data...
Calculating features...
Full names not in races_df, attempting to merge from qualifying_df...
Names merged/filled in races_df.
Features calculated.
Isolating prediction rows...

Running prediction for 2025 Saudi Arabian Grand Prix...

Predicted Win Probabilities (2025 Saudi Arabian Grand Prix):
Predictions saved to: predictions_XGB/2025_R05_2025_Saudi_Arabian_Grand_Prix_jeddah_predictions_XGB.csv


| Driver             | Grid | Team                           | Probability |
|--------------------|------|--------------------------------|-------------|
| Oscar Piastri      | 2    | McLaren Mercedes               |      30.00% |
| Max Verstappen     | 1    | Red Bull Racing Honda RBPT     |      27.54% |
| Charles Leclerc    | 4    | Ferrari                        |       1.87% |
| George Russell     | 3    | Mercedes                       |       0.62% |
| Lando Norris       | 10   | McLaren Mercedes               |       0.38% |
| Pierre Gasly       | 9    | Alpine Renault                 |       0.11% |
| Yuki Tsunoda       | 8    | Red Bull Racing Honda RBPT     |       0.08% |
| Carlos Sainz       | 6    | Williams Mercedes              |       0.06% |
| Kimi Antonelli     | 5    | Mercedes                       |       0.05% |
| Jack Doohan        | 17   | Alpine Renault                 |       0.02% |
| Gabriel Bortoleto  | 20   | Kick Sauber Ferrari            |       0.02% |
| Esteban Ocon       | 19   | Haas Ferrari                   |       0.02% |
| Lewis Hamilton     | 7    | Ferrari                        |       0.02% |
| Nico Hulkenberg    | 18   | Kick Sauber Ferrari            |       0.01% |
| Fernando Alonso    | 13   | Aston Martin Aramco Mercedes   |       0.01% |
| Liam Lawson        | 12   | Racing Bulls Honda RBPT        |       0.01% |
| Oliver Bearman     | 15   | Haas Ferrari                   |       0.01% |
| Isack Hadjar       | 14   | Racing Bulls Honda RBPT        |       0.01% |
| Lance Stroll       | 16   | Aston Martin Aramco Mercedes   |       0.01% |
| Alexander Albon    | 11   | Williams Mercedes              |       0.01% |


In [14]:
# Cell 9: Define Grid for 2025 Miami Grand Prix

miami_2025_raw_grid = [
    # Red Bull Racing Honda RBPT
    {'driver': 'Max Verstappen',     'team': 'Red Bull Racing Honda RBPT',    'grid': 1},
    {'driver': 'Yuki Tsunoda',       'team': 'Red Bull Racing Honda RBPT',    'grid': 10},

    # McLaren Mercedes
    {'driver': 'Lando Norris',       'team': 'McLaren Mercedes',              'grid': 2},
    {'driver': 'Oscar Piastri',      'team': 'McLaren Mercedes',              'grid': 4},

    # Mercedes
    {'driver': 'Kimi Antonelli',     'team': 'Mercedes',                      'grid': 3},
    {'driver': 'George Russell',     'team': 'Mercedes',                      'grid': 5},

    # Ferrari
    {'driver': 'Charles Leclerc',    'team': 'Ferrari',                       'grid': 8},
    {'driver': 'Lewis Hamilton',     'team': 'Ferrari',                       'grid': 12},

    # Williams Mercedes
    {'driver': 'Carlos Sainz',       'team': 'Williams Mercedes',             'grid': 6},
    {'driver': 'Alexander Albon',    'team': 'Williams Mercedes',             'grid': 7},

    # Alpine Renault
    {'driver': 'Pierre Gasly',       'team': 'Alpine Renault',                'grid': 20},
    {'driver': 'Jack Doohan',        'team': 'Alpine Renault',                'grid': 14},

    # Racing Bulls Honda RBPT
    {'driver': 'Isack Hadjar',       'team': 'Racing Bulls Honda RBPT',       'grid': 11},
    {'driver': 'Liam Lawson',        'team': 'Racing Bulls Honda RBPT',       'grid': 15},

    # Aston Martin Aramco Mercedes
    {'driver': 'Fernando Alonso',    'team': 'Aston Martin Aramco Mercedes',  'grid': 17},
    {'driver': 'Lance Stroll',       'team': 'Aston Martin Aramco Mercedes',  'grid': 18},

    # Kick Sauber Ferrari
    {'driver': 'Gabriel Bortoleto',  'team': 'Kick Sauber Ferrari',           'grid': 13},
    {'driver': 'Nico Hulkenberg',    'team': 'Kick Sauber Ferrari',           'grid': 16},

    # Haas Ferrari
    {'driver': 'Esteban Ocon',       'team': 'Haas Ferrari',                  'grid': 9},
    {'driver': 'Oliver Bearman',     'team': 'Haas Ferrari',                  'grid': 19},
]





# Prepare the grid for prediction using the defined function
future_race_circuit_miami = "miami"
future_season_miami= 2025
future_round_miami = 6
race_description_miami = "2025 Miami Grand Prix"

print(f"Grid defined for {race_description_miami}")

# Cell 10: Execute Prediction for 2025 Miami Grand Prix

# --- Load the Saved Model ---
model_filename = "joblogs/f1_winner_predictor_model_XGB.joblib" # Changed filename for GBC model
loaded_model = None
try:
    loaded_model = joblib.load(model_filename)
    print(f"Loaded trained model from {model_filename}")
except FileNotFoundError:
    print(f"Error: Saved model file '{model_filename}' not found. Please train the model first (run Cell 5).")
    # Optionally, you could trigger training here if the model isn't found,
    # but for now, we'll assume it should exist.
except Exception as e:
    print(f"Error loading model: {e}")
# --------------------------

# --- Execute Prediction ---
# Check if model loaded successfully before proceeding
if loaded_model:
    predict_and_display_results(
        circuit_id=future_race_circuit_miami,
        future_season=future_season_miami,
        future_round=future_round_miami,
        raw_grid_list=miami_2025_raw_grid, # Defined in Cell 9
        model=loaded_model, # Use the loaded model
        base_races_df=races_df, # Original races_df from Cell 2 (with 2025 Bahrain data)
        base_qualifying_df=qualifying_df, # Original qualifying_df from Cell 2
        driver_name_to_id_hist_map=latest_driver_name_to_id_map, # From Cell 3
        constructor_name_to_id_hist_map=latest_constructor_name_to_id_map, # From Cell 3
        team_rebrand_map_current=TEAM_REBRAND_MAP, # From Cell 6
        model_features_list=features, # Defined in Cell 4
        race_description=race_description_miami, # Defined in Cell 9
        save_path="predictions_XGB" # Specify folder to save CSV
    )
else:
    print("Prediction skipped because the model could not be loaded.")

Grid defined for 2025 Miami Grand Prix
Loaded trained model from joblogs/f1_winner_predictor_model_XGB.joblib
--- Predicting for: 2025 Miami Grand Prix (miami) ---
Preparing future grid data (mapping names to IDs)...
Preparing grid for prediction...
Grid preparation complete.
Creating temporary DataFrame for feature calculation...
Combining with historical data...
Recalculating features on combined data...
Calculating features...
Full names not in races_df, attempting to merge from qualifying_df...
Names merged/filled in races_df.
Features calculated.
Isolating prediction rows...

Running prediction for 2025 Miami Grand Prix...

Predicted Win Probabilities (2025 Miami Grand Prix):
Predictions saved to: predictions_XGB/2025_R06_2025_Miami_Grand_Prix_miami_predictions_XGB.csv


| Driver             | Grid | Team                           | Probability |
|--------------------|------|--------------------------------|-------------|
| Max Verstappen     | 1    | Red Bull Racing Honda RBPT     |      32.20% |
| Lando Norris       | 2    | McLaren Mercedes               |       5.49% |
| Oscar Piastri      | 4    | McLaren Mercedes               |       2.50% |
| Kimi Antonelli     | 3    | Mercedes                       |       0.89% |
| Charles Leclerc    | 8    | Ferrari                        |       0.44% |
| George Russell     | 5    | Mercedes                       |       0.28% |
| Esteban Ocon       | 9    | Haas Ferrari                   |       0.15% |
| Yuki Tsunoda       | 10   | Red Bull Racing Honda RBPT     |       0.11% |
| Carlos Sainz       | 6    | Williams Mercedes              |       0.08% |
| Alexander Albon    | 7    | Williams Mercedes              |       0.04% |
| Jack Doohan        | 14   | Alpine Renault                 |       0.03% |
| Gabriel Bortoleto  | 13   | Kick Sauber Ferrari            |       0.02% |
| Nico Hulkenberg    | 16   | Kick Sauber Ferrari            |       0.02% |
| Pierre Gasly       | 20   | Alpine Renault                 |       0.02% |
| Lance Stroll       | 18   | Aston Martin Aramco Mercedes   |       0.02% |
| Liam Lawson        | 15   | Racing Bulls Honda RBPT        |       0.02% |
| Isack Hadjar       | 11   | Racing Bulls Honda RBPT        |       0.02% |
| Oliver Bearman     | 19   | Haas Ferrari                   |       0.01% |
| Fernando Alonso    | 17   | Aston Martin Aramco Mercedes   |       0.01% |
| Lewis Hamilton     | 12   | Ferrari                        |       0.01% |


In [15]:
# Cell 9: Define Grid for 2025 Emilia Romagna Grand Prix

imola_2025_raw_grid = [
    # McLaren Mercedes
    {'driver': 'Oscar Piastri',      'team': 'McLaren Mercedes',              'grid': 1},
    {'driver': 'Lando Norris',       'team': 'McLaren Mercedes',              'grid': 4},

    # Red Bull Racing Honda RBPT
    {'driver': 'Max Verstappen',     'team': 'Red Bull Racing Honda RBPT',    'grid': 2},
    {'driver': 'Yuki Tsunoda',       'team': 'Red Bull Racing Honda RBPT',    'grid': 20},

    # Mercedes
    {'driver': 'George Russell',     'team': 'Mercedes',                      'grid': 3},
    {'driver': 'Kimi Antonelli',     'team': 'Mercedes',                      'grid': 13},

    # Aston Martin Aramco Mercedes
    {'driver': 'Fernando Alonso',    'team': 'Aston Martin Aramco Mercedes',  'grid': 5},
    {'driver': 'Lance Stroll',       'team': 'Aston Martin Aramco Mercedes',  'grid': 8},

    # Williams Mercedes
    {'driver': 'Carlos Sainz',       'team': 'Williams Mercedes',             'grid': 6},
    {'driver': 'Alexander Albon',    'team': 'Williams Mercedes',             'grid': 7},

    # Racing Bulls Honda RBPT
    {'driver': 'Isack Hadjar',       'team': 'Racing Bulls Honda RBPT',       'grid': 9},
    {'driver': 'Liam Lawson',        'team': 'Racing Bulls Honda RBPT',       'grid': 15},

    # Alpine Renault
    {'driver': 'Pierre Gasly',       'team': 'Alpine Renault',                'grid': 10},
    {'driver': 'Franco Colapinto',   'team': 'Alpine Renault',                'grid': 16},  # Time not listed

    # Ferrari
    {'driver': 'Charles Leclerc',    'team': 'Ferrari',                       'grid': 11},
    {'driver': 'Lewis Hamilton',     'team': 'Ferrari',                       'grid': 12},

    # Kick Sauber Ferrari
    {'driver': 'Gabriel Bortoleto',  'team': 'Kick Sauber Ferrari',           'grid': 14},
    {'driver': 'Nico Hulkenberg',    'team': 'Kick Sauber Ferrari',           'grid': 17},

    # Haas Ferrari
    {'driver': 'Esteban Ocon',       'team': 'Haas Ferrari',                  'grid': 18},
    {'driver': 'Oliver Bearman',     'team': 'Haas Ferrari',                  'grid': 19},
]


# Prepare the grid for prediction using the defined function
future_race_circuit_imola = "imola"
future_season_imola= 2025
future_round_imola = 7
race_description_imola = "2025 Emilia Romagna Grand Prix"

print(f"Grid defined for {race_description_imola}")

# Cell 10: Execute Prediction for 2025 Emilia Romagna Grand Prix

# --- Load the Saved Model ---
model_filename = "joblogs/f1_winner_predictor_model_XGB.joblib" # Changed filename for GBC model
loaded_model = None
try:
    loaded_model = joblib.load(model_filename)
    print(f"Loaded trained model from {model_filename}")
except FileNotFoundError:
    print(f"Error: Saved model file '{model_filename}' not found. Please train the model first (run Cell 5).")
    # Optionally, you could trigger training here if the model isn't found,
    # but for now, we'll assume it should exist.
except Exception as e:
    print(f"Error loading model: {e}")
# --------------------------

# --- Execute Prediction ---
# Check if model loaded successfully before proceeding
if loaded_model:
    predict_and_display_results(
        circuit_id=future_race_circuit_imola,
        future_season=future_season_imola,
        future_round=future_round_imola,
        raw_grid_list=imola_2025_raw_grid, # Defined in Cell 9
        model=loaded_model, # Use the loaded model
        base_races_df=races_df, # Original races_df from Cell 2 (with 2025 Bahrain data)
        base_qualifying_df=qualifying_df, # Original qualifying_df from Cell 2
        driver_name_to_id_hist_map=latest_driver_name_to_id_map, # From Cell 3
        constructor_name_to_id_hist_map=latest_constructor_name_to_id_map, # From Cell 3
        team_rebrand_map_current=TEAM_REBRAND_MAP, # From Cell 6
        model_features_list=features, # Defined in Cell 4
        race_description=race_description_imola, # Defined in Cell 9
        save_path="predictions_XGB" # Specify folder to save CSV
    )
else:
    print("Prediction skipped because the model could not be loaded.")

Grid defined for 2025 Emilia Romagna Grand Prix
Loaded trained model from joblogs/f1_winner_predictor_model_XGB.joblib
--- Predicting for: 2025 Emilia Romagna Grand Prix (imola) ---
Preparing future grid data (mapping names to IDs)...
Preparing grid for prediction...
Grid preparation complete.
Creating temporary DataFrame for feature calculation...
Combining with historical data...
Recalculating features on combined data...
Calculating features...
Full names not in races_df, attempting to merge from qualifying_df...
Names merged/filled in races_df.
Features calculated.
Isolating prediction rows...

Running prediction for 2025 Emilia Romagna Grand Prix...

Predicted Win Probabilities (2025 Emilia Romagna Grand Prix):
Predictions saved to: predictions_XGB/2025_R07_2025_Emilia_Romagna_Grand_Prix_imola_predictions_XGB.csv


| Driver             | Grid | Team                           | Probability |
|--------------------|------|--------------------------------|-------------|
| Oscar Piastri      | 1    | McLaren Mercedes               |      32.65% |
| Max Verstappen     | 2    | Red Bull Racing Honda RBPT     |      16.99% |
| Lando Norris       | 4    | McLaren Mercedes               |       1.94% |
| George Russell     | 3    | Mercedes                       |       1.88% |
| Lance Stroll       | 8    | Aston Martin Aramco Mercedes   |       0.18% |
| Pierre Gasly       | 10   | Alpine Renault                 |       0.14% |
| Fernando Alonso    | 5    | Aston Martin Aramco Mercedes   |       0.11% |
| Isack Hadjar       | 9    | Racing Bulls Honda RBPT        |       0.11% |
| Alexander Albon    | 7    | Williams Mercedes              |       0.08% |
| Carlos Sainz       | 6    | Williams Mercedes              |       0.06% |
| Charles Leclerc    | 11   | Ferrari                        |       0.04% |
| Franco Colapinto   | 16   | Alpine Renault                 |       0.03% |
| Esteban Ocon       | 18   | Haas Ferrari                   |       0.03% |
| Nico Hulkenberg    | 17   | Kick Sauber Ferrari            |       0.03% |
| Gabriel Bortoleto  | 14   | Kick Sauber Ferrari            |       0.03% |
| Liam Lawson        | 15   | Racing Bulls Honda RBPT        |       0.02% |
| Yuki Tsunoda       | 20   | Red Bull Racing Honda RBPT     |       0.02% |
| Oliver Bearman     | 19   | Haas Ferrari                   |       0.01% |
| Kimi Antonelli     | 13   | Mercedes                       |       0.01% |
| Lewis Hamilton     | 12   | Ferrari                        |       0.01% |


In [16]:
# Cell 9: Define Grid for 2025 Monaco Grand Prix

monaco_2025_raw_grid = [
    # McLaren Mercedes
    {'driver': 'Lando Norris',       'team': 'McLaren Mercedes',              'grid': 1},
    {'driver': 'Oscar Piastri',      'team': 'McLaren Mercedes',              'grid': 3},

    # Ferrari
    {'driver': 'Charles Leclerc',    'team': 'Ferrari',                       'grid': 2},
    {'driver': 'Lewis Hamilton',     'team': 'Ferrari',                       'grid': 7},

    # Red Bull Racing Honda RBPT
    {'driver': 'Max Verstappen',     'team': 'Red Bull Racing Honda RBPT',    'grid': 4},
    {'driver': 'Yuki Tsunoda',       'team': 'Red Bull Racing Honda RBPT',    'grid': 12},
    {'driver': 'Isack Hadjar',       'team': 'Racing Bulls Honda RBPT',       'grid': 5},
    {'driver': 'Liam Lawson',        'team': 'Racing Bulls Honda RBPT',       'grid': 9},

    # Mercedes
    {'driver': 'George Russell',     'team': 'Mercedes',                      'grid': 14},
    {'driver': 'Kimi Antonelli',     'team': 'Mercedes',                      'grid': 15},

    # Aston Martin Aramco Mercedes
    {'driver': 'Fernando Alonso',    'team': 'Aston Martin Aramco Mercedes',  'grid': 6},
    {'driver': 'Lance Stroll',       'team': 'Aston Martin Aramco Mercedes',  'grid': 19},

    # Haas Ferrari
    {'driver': 'Esteban Ocon',       'team': 'Haas Ferrari',                  'grid': 8},
    {'driver': 'Oliver Bearman',     'team': 'Haas Ferrari',                  'grid': 20},

    # Williams Mercedes
    {'driver': 'Alexander Albon',    'team': 'Williams Mercedes',             'grid': 10},
    {'driver': 'Carlos Sainz',       'team': 'Williams Mercedes',             'grid': 11},

    # Kick Sauber Ferrari
    {'driver': 'Nico Hulkenberg',    'team': 'Kick Sauber Ferrari',           'grid': 13},
    {'driver': 'Gabriel Bortoleto',  'team': 'Kick Sauber Ferrari',           'grid': 16},

    # Alpine Renault
    {'driver': 'Pierre Gasly',       'team': 'Alpine Renault',                'grid': 17},
    {'driver': 'Franco Colapinto',   'team': 'Alpine Renault',                'grid': 18},
]



# Prepare the grid for prediction using the defined function
future_race_circuit_monaco = "monaco"
future_season_monaco = 2025
future_round_monaco = 8
race_description_monaco = "2025 Monaco Grand Prix"

print(f"Grid defined for {race_description_monaco}")

# Cell 10: Execute Prediction for 2025 Monaco Grand Prix

# --- Load the Saved Model ---
model_filename = "joblogs/f1_winner_predictor_model_XGB.joblib" # Changed filename for GBC model
loaded_model = None
try:
    loaded_model = joblib.load(model_filename)
    print(f"Loaded trained model from {model_filename}")
except FileNotFoundError:
    print(f"Error: Saved model file '{model_filename}' not found. Please train the model first (run Cell 5).")
    # Optionally, you could trigger training here if the model isn't found,
    # but for now, we'll assume it should exist.
except Exception as e:
    print(f"Error loading model: {e}")
# --------------------------

# --- Execute Prediction ---
# Check if model loaded successfully before proceeding
if loaded_model:
    predict_and_display_results(
        circuit_id=future_race_circuit_monaco,
        future_season=future_season_monaco,
        future_round=future_round_monaco,
        raw_grid_list=monaco_2025_raw_grid, # Defined in Cell 9
        model=loaded_model, # Use the loaded model
        base_races_df=races_df, # Original races_df from Cell 2 (with 2025 Bahrain data)
        base_qualifying_df=qualifying_df, # Original qualifying_df from Cell 2
        driver_name_to_id_hist_map=latest_driver_name_to_id_map, # From Cell 3
        constructor_name_to_id_hist_map=latest_constructor_name_to_id_map, # From Cell 3
        team_rebrand_map_current=TEAM_REBRAND_MAP, # From Cell 6
        model_features_list=features, # Defined in Cell 4
        race_description=race_description_monaco, # Defined in Cell 9
        save_path="predictions_XGB" # Specify folder to save CSV
    )
else:
    print("Prediction skipped because the model could not be loaded.")

Grid defined for 2025 Monaco Grand Prix
Loaded trained model from joblogs/f1_winner_predictor_model_XGB.joblib
--- Predicting for: 2025 Monaco Grand Prix (monaco) ---
Preparing future grid data (mapping names to IDs)...
Preparing grid for prediction...
Grid preparation complete.
Creating temporary DataFrame for feature calculation...
Combining with historical data...
Recalculating features on combined data...
Calculating features...
Full names not in races_df, attempting to merge from qualifying_df...
Names merged/filled in races_df.
Features calculated.
Isolating prediction rows...

Running prediction for 2025 Monaco Grand Prix...

Predicted Win Probabilities (2025 Monaco Grand Prix):
Predictions saved to: predictions_XGB/2025_R08_2025_Monaco_Grand_Prix_monaco_predictions_XGB.csv


| Driver             | Grid | Team                           | Probability |
|--------------------|------|--------------------------------|-------------|
| Lando Norris       | 1    | McLaren Mercedes               |      16.72% |
| Oscar Piastri      | 3    | McLaren Mercedes               |       9.72% |
| Charles Leclerc    | 2    | Ferrari                        |       8.74% |
| Max Verstappen     | 4    | Red Bull Racing Honda RBPT     |       7.52% |
| Lewis Hamilton     | 7    | Ferrari                        |       1.11% |
| Isack Hadjar       | 5    | Racing Bulls Honda RBPT        |       0.31% |
| Alexander Albon    | 10   | Williams Mercedes              |       0.14% |
| Fernando Alonso    | 6    | Aston Martin Aramco Mercedes   |       0.11% |
| Liam Lawson        | 9    | Racing Bulls Honda RBPT        |       0.10% |
| Esteban Ocon       | 8    | Haas Ferrari                   |       0.08% |
| George Russell     | 14   | Mercedes                       |       0.06% |
| Franco Colapinto   | 18   | Alpine Renault                 |       0.04% |
| Lance Stroll       | 19   | Aston Martin Aramco Mercedes   |       0.03% |
| Gabriel Bortoleto  | 16   | Kick Sauber Ferrari            |       0.03% |
| Nico Hulkenberg    | 13   | Kick Sauber Ferrari            |       0.03% |
| Pierre Gasly       | 17   | Alpine Renault                 |       0.03% |
| Oliver Bearman     | 20   | Haas Ferrari                   |       0.02% |
| Yuki Tsunoda       | 12   | Red Bull Racing Honda RBPT     |       0.02% |
| Kimi Antonelli     | 15   | Mercedes                       |       0.02% |
| Carlos Sainz       | 11   | Williams Mercedes              |       0.02% |


In [17]:
# Cell 9: Define Grid for 2025 Spanish Grand Prix

catalunya_2025_raw_grid = [
    # McLaren Mercedes
    {'driver': 'Lando Norris',       'team': 'McLaren Mercedes',              'grid': 2},
    {'driver': 'Oscar Piastri',      'team': 'McLaren Mercedes',              'grid': 1},

    # Ferrari
    {'driver': 'Charles Leclerc',    'team': 'Ferrari',                       'grid': 7},
    {'driver': 'Lewis Hamilton',     'team': 'Ferrari',                       'grid': 5},

    # Red Bull Racing Honda RBPT
    {'driver': 'Max Verstappen',     'team': 'Red Bull Racing Honda RBPT',    'grid': 3},
    {'driver': 'Yuki Tsunoda',       'team': 'Red Bull Racing Honda RBPT',    'grid': 20},
    {'driver': 'Isack Hadjar',       'team': 'Racing Bulls Honda RBPT',       'grid': 9},
    {'driver': 'Liam Lawson',        'team': 'Racing Bulls Honda RBPT',       'grid': 13},

    # Mercedes
    {'driver': 'George Russell',     'team': 'Mercedes',                      'grid': 4},
    {'driver': 'Kimi Antonelli',     'team': 'Mercedes',                      'grid': 6},

    # Aston Martin Aramco Mercedes
    {'driver': 'Fernando Alonso',    'team': 'Aston Martin Aramco Mercedes',  'grid': 10},
    {'driver': 'Lance Stroll',       'team': 'Aston Martin Aramco Mercedes',  'grid': 14},

    # Haas Ferrari
    {'driver': 'Esteban Ocon',       'team': 'Haas Ferrari',                  'grid': 17},
    {'driver': 'Oliver Bearman',     'team': 'Haas Ferrari',                  'grid': 15},

    # Williams Mercedes
    {'driver': 'Alexander Albon',    'team': 'Williams Mercedes',             'grid': 11},
    {'driver': 'Carlos Sainz',       'team': 'Williams Mercedes',             'grid': 18},

    # Kick Sauber Ferrari
    {'driver': 'Nico Hulkenberg',    'team': 'Kick Sauber Ferrari',           'grid': 16},
    {'driver': 'Gabriel Bortoleto',  'team': 'Kick Sauber Ferrari',           'grid': 12},

    # Alpine Renault
    {'driver': 'Pierre Gasly',       'team': 'Alpine Renault',                'grid': 8},
    {'driver': 'Franco Colapinto',   'team': 'Alpine Renault',                'grid': 19},
]



# Prepare the grid for prediction using the defined function
future_race_circuit_catalunya = "catalunya"
future_season_catalunya = 2025
future_round_catalunya = 9
race_description_catalunya = "2025 Spanish Grand Prix"

print(f"Grid defined for {race_description_catalunya}")

# Cell 10: Execute Prediction for 2025 Spanish Grand Prix

# --- Load the Saved Model ---
model_filename = "joblogs/f1_winner_predictor_model_XGB.joblib" # Changed filename for GBC model
loaded_model = None
try:
    loaded_model = joblib.load(model_filename)
    print(f"Loaded trained model from {model_filename}")
except FileNotFoundError:
    print(f"Error: Saved model file '{model_filename}' not found. Please train the model first (run Cell 5).")
    # Optionally, you could trigger training here if the model isn't found,
    # but for now, we'll assume it should exist.
except Exception as e:
    print(f"Error loading model: {e}")
# --------------------------

# --- Execute Prediction ---
# Check if model loaded successfully before proceeding
if loaded_model:
    predict_and_display_results(
        circuit_id=future_race_circuit_catalunya,
        future_season=future_season_catalunya,
        future_round=future_round_catalunya,
        raw_grid_list=catalunya_2025_raw_grid, # Defined in Cell 9
        model=loaded_model, # Use the loaded model
        base_races_df=races_df, # Original races_df from Cell 2 (with 2025 Bahrain data)
        base_qualifying_df=qualifying_df, # Original qualifying_df from Cell 2
        driver_name_to_id_hist_map=latest_driver_name_to_id_map, # From Cell 3
        constructor_name_to_id_hist_map=latest_constructor_name_to_id_map, # From Cell 3
        team_rebrand_map_current=TEAM_REBRAND_MAP, # From Cell 6
        model_features_list=features, # Defined in Cell 4
        race_description=race_description_catalunya, # Defined in Cell 9
        save_path="predictions_XGB" # Specify folder to save CSV
    )
else:
    print("Prediction skipped because the model could not be loaded.")

Grid defined for 2025 Spanish Grand Prix
Loaded trained model from joblogs/f1_winner_predictor_model_XGB.joblib
--- Predicting for: 2025 Spanish Grand Prix (catalunya) ---
Preparing future grid data (mapping names to IDs)...
Preparing grid for prediction...
Grid preparation complete.
Creating temporary DataFrame for feature calculation...
Combining with historical data...
Recalculating features on combined data...
Calculating features...
Full names not in races_df, attempting to merge from qualifying_df...
Names merged/filled in races_df.
Features calculated.
Isolating prediction rows...

Running prediction for 2025 Spanish Grand Prix...

Predicted Win Probabilities (2025 Spanish Grand Prix):
Predictions saved to: predictions_XGB/2025_R09_2025_Spanish_Grand_Prix_catalunya_predictions_XGB.csv


| Driver             | Grid | Team                           | Probability |
|--------------------|------|--------------------------------|-------------|
| Oscar Piastri      | 1    | McLaren Mercedes               |      31.11% |
| Max Verstappen     | 3    | Red Bull Racing Honda RBPT     |       7.80% |
| George Russell     | 4    | Mercedes                       |       3.73% |
| Lando Norris       | 2    | McLaren Mercedes               |       0.88% |
| Lewis Hamilton     | 5    | Ferrari                        |       0.20% |
| Pierre Gasly       | 8    | Alpine Renault                 |       0.17% |
| Fernando Alonso    | 10   | Aston Martin Aramco Mercedes   |       0.12% |
| Kimi Antonelli     | 6    | Mercedes                       |       0.10% |
| Charles Leclerc    | 7    | Ferrari                        |       0.10% |
| Isack Hadjar       | 9    | Racing Bulls Honda RBPT        |       0.06% |
| Franco Colapinto   | 19   | Alpine Renault                 |       0.04% |
| Oliver Bearman     | 15   | Haas Ferrari                   |       0.03% |
| Gabriel Bortoleto  | 12   | Kick Sauber Ferrari            |       0.03% |
| Lance Stroll       | 14   | Aston Martin Aramco Mercedes   |       0.03% |
| Nico Hulkenberg    | 16   | Kick Sauber Ferrari            |       0.03% |
| Carlos Sainz       | 18   | Williams Mercedes              |       0.02% |
| Yuki Tsunoda       | 20   | Red Bull Racing Honda RBPT     |       0.02% |
| Liam Lawson        | 13   | Racing Bulls Honda RBPT        |       0.02% |
| Esteban Ocon       | 17   | Haas Ferrari                   |       0.01% |
| Alexander Albon    | 11   | Williams Mercedes              |       0.01% |


In [18]:
# Cell 9: Define Grid for 2025 Canadian Grand Prix

villeneuve_2025_raw_grid = [
    # McLaren Mercedes
    {'driver': 'Lando Norris',       'team': 'McLaren Mercedes',              'grid': 7},
    {'driver': 'Oscar Piastri',      'team': 'McLaren Mercedes',              'grid': 3},

    # Ferrari
    {'driver': 'Charles Leclerc',    'team': 'Ferrari',                       'grid': 8},
    {'driver': 'Lewis Hamilton',     'team': 'Ferrari',                       'grid': 5},

    # Red Bull Racing Honda RBPT
    {'driver': 'Max Verstappen',     'team': 'Red Bull Racing Honda RBPT',    'grid': 2},
    {'driver': 'Yuki Tsunoda',       'team': 'Red Bull Racing Honda RBPT',    'grid': 20},
    {'driver': 'Isack Hadjar',       'team': 'Racing Bulls Honda RBPT',       'grid': 12},
    {'driver': 'Liam Lawson',        'team': 'Racing Bulls Honda RBPT',       'grid': 18},

    # Mercedes
    {'driver': 'George Russell',     'team': 'Mercedes',                      'grid': 1},
    {'driver': 'Kimi Antonelli',     'team': 'Mercedes',                      'grid': 4},

    # Aston Martin Aramco Mercedes
    {'driver': 'Fernando Alonso',    'team': 'Aston Martin Aramco Mercedes',  'grid': 6},
    {'driver': 'Lance Stroll',       'team': 'Aston Martin Aramco Mercedes',  'grid': 17},

    # Haas Ferrari
    {'driver': 'Esteban Ocon',       'team': 'Haas Ferrari',                  'grid': 14},
    {'driver': 'Oliver Bearman',     'team': 'Haas Ferrari',                  'grid': 13},

    # Williams Mercedes
    {'driver': 'Alexander Albon',    'team': 'Williams Mercedes',             'grid': 9},
    {'driver': 'Carlos Sainz',       'team': 'Williams Mercedes',             'grid': 16},

    # Kick Sauber Ferrari
    {'driver': 'Nico Hulkenberg',    'team': 'Kick Sauber Ferrari',           'grid': 11},
    {'driver': 'Gabriel Bortoleto',  'team': 'Kick Sauber Ferrari',           'grid': 15},

    # Alpine Renault
    {'driver': 'Pierre Gasly',       'team': 'Alpine Renault',                'grid': 19},
    {'driver': 'Franco Colapinto',   'team': 'Alpine Renault',                'grid': 10},
]



# Prepare the grid for prediction using the defined function
future_race_circuit_villeneuve = "villeneuve"
future_season_villeneuve = 2025
future_round_villeneuve = 10
race_description_villeneuve = "2025 Canadian Grand Prix"

print(f"Grid defined for {race_description_villeneuve}")

# Cell 10: Execute Prediction for 2025 Canadian Grand Prix

# --- Load the Saved Model ---
model_filename = "joblogs/f1_winner_predictor_model_XGB.joblib" # Changed filename for GBC model
loaded_model = None
try:
    loaded_model = joblib.load(model_filename)
    print(f"Loaded trained model from {model_filename}")
except FileNotFoundError:
    print(f"Error: Saved model file '{model_filename}' not found. Please train the model first (run Cell 5).")
    # Optionally, you could trigger training here if the model isn't found,
    # but for now, we'll assume it should exist.
except Exception as e:
    print(f"Error loading model: {e}")
# --------------------------

# --- Execute Prediction ---
# Check if model loaded successfully before proceeding
if loaded_model:
    predict_and_display_results(
        circuit_id=future_race_circuit_villeneuve,
        future_season=future_season_villeneuve,
        future_round=future_round_villeneuve,
        raw_grid_list=villeneuve_2025_raw_grid, # Defined in Cell 9
        model=loaded_model, # Use the loaded model
        base_races_df=races_df, # Original races_df from Cell 2 (with 2025 Bahrain data)
        base_qualifying_df=qualifying_df, # Original qualifying_df from Cell 2
        driver_name_to_id_hist_map=latest_driver_name_to_id_map, # From Cell 3
        constructor_name_to_id_hist_map=latest_constructor_name_to_id_map, # From Cell 3
        team_rebrand_map_current=TEAM_REBRAND_MAP, # From Cell 6
        model_features_list=features, # Defined in Cell 4
        race_description=race_description_villeneuve, # Defined in Cell 9
        save_path="predictions_XGB" # Specify folder to save CSV
    )
else:
    print("Prediction skipped because the model could not be loaded.")

Grid defined for 2025 Canadian Grand Prix
Loaded trained model from joblogs/f1_winner_predictor_model_XGB.joblib
--- Predicting for: 2025 Canadian Grand Prix (villeneuve) ---
Preparing future grid data (mapping names to IDs)...
Preparing grid for prediction...
Grid preparation complete.
Creating temporary DataFrame for feature calculation...
Combining with historical data...
Recalculating features on combined data...
Calculating features...
Full names not in races_df, attempting to merge from qualifying_df...
Names merged/filled in races_df.
Features calculated.
Isolating prediction rows...

Running prediction for 2025 Canadian Grand Prix...

Predicted Win Probabilities (2025 Canadian Grand Prix):
Predictions saved to: predictions_XGB/2025_R10_2025_Canadian_Grand_Prix_villeneuve_predictions_XGB.csv


| Driver             | Grid | Team                           | Probability |
|--------------------|------|--------------------------------|-------------|
| George Russell     | 1    | Mercedes                       |      28.81% |
| Max Verstappen     | 2    | Red Bull Racing Honda RBPT     |       7.26% |
| Oscar Piastri      | 3    | McLaren Mercedes               |       5.37% |
| Lewis Hamilton     | 5    | Ferrari                        |       0.90% |
| Lando Norris       | 7    | McLaren Mercedes               |       0.43% |
| Charles Leclerc    | 8    | Ferrari                        |       0.37% |
| Franco Colapinto   | 10   | Alpine Renault                 |       0.20% |
| Fernando Alonso    | 6    | Aston Martin Aramco Mercedes   |       0.09% |
| Kimi Antonelli     | 4    | Mercedes                       |       0.06% |
| Alexander Albon    | 9    | Williams Mercedes              |       0.05% |
| Lance Stroll       | 17   | Aston Martin Aramco Mercedes   |       0.03% |
| Oliver Bearman     | 13   | Haas Ferrari                   |       0.02% |
| Gabriel Bortoleto  | 15   | Kick Sauber Ferrari            |       0.02% |
| Carlos Sainz       | 16   | Williams Mercedes              |       0.02% |
| Pierre Gasly       | 19   | Alpine Renault                 |       0.02% |
| Yuki Tsunoda       | 20   | Red Bull Racing Honda RBPT     |       0.02% |
| Liam Lawson        | 18   | Racing Bulls Honda RBPT        |       0.02% |
| Esteban Ocon       | 14   | Haas Ferrari                   |       0.01% |
| Nico Hulkenberg    | 11   | Kick Sauber Ferrari            |       0.01% |
| Isack Hadjar       | 12   | Racing Bulls Honda RBPT        |       0.01% |


In [None]:
# Cell 9: Define Grid for 2025 Austrian Grand Prix

red_bull_ring_2025_raw_grid = [
    # McLaren Mercedes
    {'driver': 'Lando Norris',       'team': 'McLaren Mercedes',              'grid': 1},
    {'driver': 'Oscar Piastri',      'team': 'McLaren Mercedes',              'grid': 3},

    # Ferrari
    {'driver': 'Charles Leclerc',    'team': 'Ferrari',                       'grid': 2},
    {'driver': 'Lewis Hamilton',     'team': 'Ferrari',                       'grid': 4},

    # Red Bull Racing Honda RBPT
    {'driver': 'Max Verstappen',     'team': 'Red Bull Racing Honda RBPT',    'grid': 7},
    {'driver': 'Yuki Tsunoda',       'team': 'Red Bull Racing Honda RBPT',    'grid': 18},
    {'driver': 'Isack Hadjar',       'team': 'Racing Bulls Honda RBPT',       'grid': 13},
    {'driver': 'Liam Lawson',        'team': 'Racing Bulls Honda RBPT',       'grid': 6},

    # Mercedes
    {'driver': 'George Russell',     'team': 'Mercedes',                      'grid': 5},
    {'driver': 'Kimi Antonelli',     'team': 'Mercedes',                      'grid': 9},

    # Aston Martin Aramco Mercedes
    {'driver': 'Fernando Alonso',    'team': 'Aston Martin Aramco Mercedes',  'grid': 11},
    {'driver': 'Lance Stroll',       'team': 'Aston Martin Aramco Mercedes',  'grid': 16},

    # Haas Ferrari
    {'driver': 'Esteban Ocon',       'team': 'Haas Ferrari',                  'grid': 17},
    {'driver': 'Oliver Bearman',     'team': 'Haas Ferrari',                  'grid': 15},

    # Williams Mercedes
    {'driver': 'Alexander Albon',    'team': 'Williams Mercedes',             'grid': 12},
    {'driver': 'Carlos Sainz',       'team': 'Williams Mercedes',             'grid': 19},

    # Kick Sauber Ferrari
    {'driver': 'Nico Hulkenberg',    'team': 'Kick Sauber Ferrari',           'grid': 20},
    {'driver': 'Gabriel Bortoleto',  'team': 'Kick Sauber Ferrari',           'grid': 8},

    # Alpine Renault
    {'driver': 'Pierre Gasly',       'team': 'Alpine Renault',                'grid': 10},
    {'driver': 'Franco Colapinto',   'team': 'Alpine Renault',                'grid': 14},
]



# Prepare the grid for prediction using the defined function
future_race_circuit_red_bull_ring = "red_bull_ring"
future_season_red_bull_ring = 2025
future_round_red_bull_ring = 11
race_description_red_bull_ring = "2025 Austrian Grand Prix"

print(f"Grid defined for {race_description_red_bull_ring}")

# Cell 10: Execute Prediction for 2025 Austrian Grand Prix

# --- Load the Saved Model ---
model_filename = "joblogs/f1_winner_predictor_model_XGB.joblib" # Changed filename for XGB model
loaded_model = None
try:
    loaded_model = joblib.load(model_filename)
    print(f"Loaded trained model from {model_filename}")
except FileNotFoundError:
    print(f"Error: Saved model file '{model_filename}' not found. Please train the model first (run Cell 5).")
    # Optionally, you could trigger training here if the model isn't found,
    # but for now, we'll assume it should exist.
except Exception as e:
    print(f"Error loading model: {e}")
# --------------------------

# --- Execute Prediction ---
# Check if model loaded successfully before proceeding
if loaded_model:
    predict_and_display_results(
        circuit_id=future_race_circuit_red_bull_ring,
        future_season=future_season_red_bull_ring,
        future_round=future_round_red_bull_ring,
        raw_grid_list=red_bull_ring_2025_raw_grid, # Defined in Cell 9
        model=loaded_model, # Use the loaded model
        base_races_df=races_df, # Original races_df from Cell 2 (with 2025 Bahrain data)
        base_qualifying_df=qualifying_df, # Original qualifying_df from Cell 2
        driver_name_to_id_hist_map=latest_driver_name_to_id_map, # From Cell 3
        constructor_name_to_id_hist_map=latest_constructor_name_to_id_map, # From Cell 3
        team_rebrand_map_current=TEAM_REBRAND_MAP, # From Cell 6
        model_features_list=features, # Defined in Cell 4
        race_description=race_description_red_bull_ring, # Defined in Cell 9
        save_path="predictions_XGB" # Specify folder to save CSV
    )
else:
    print("Prediction skipped because the model could not be loaded.")