# F1 Dynasty Engine v7.0: Future-Proof & Dynamic Scaling

**Engine Version:** 7.0 (The "Dynasty" Release)
**Core Philosophy:** "Code once, run forever."

**New Capabilities:**
1.  **Dynamic Grid Scaling:** Automatically handles 20, 22, 24, or 26 car grids. (Future-proof for new teams like Andretti/Cadillac).
2.  **Robust Encoding:** Handles "Unknown" drivers/teams (Rookies in future seasons) without crashing, using generic capability baselines.
3.  **Continuous Ingestion:** Automatically detects the current year and fetches all available data up to the present day.
4.  **Weather Injection:** Allows manual weather overrides (Dry/Wet) to simulate forecast scenarios.

---

### 1. System Initialization

In [None]:
!pip install fastf1 lightgbm scikit-learn pandas numpy scipy tabulate

In [None]:
import fastf1 as ff1
import pandas as pd
import numpy as np
import lightgbm as lgb
from sklearn.base import BaseEstimator, TransformerMixin
import os
import logging
import warnings
from datetime import datetime

warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)

# --- CACHE SETUP ---
CACHE_DIR = 'f1_cache_dynasty'
if not os.path.exists(CACHE_DIR):
    os.makedirs(CACHE_DIR)

try:
    ff1.Cache.enable_cache(CACHE_DIR)
    logging.getLogger('fastf1').setLevel(logging.ERROR)
    print("‚úÖ Dynasty Engine v7.0 Online.")
except Exception as e:
    print(f"‚ö†Ô∏è Cache Warning: {e}")

## 2. Robust Encoding (The "New Driver" Handler)
Standard encoders crash on new data. This custom encoder maps unknown entities (e.g., "Kimi Antonelli" in 2025) to a generic "Unknown" bucket so the model can still predict based on their Car and Grid position.

In [None]:
class RobustEncoder(BaseEstimator, TransformerMixin):
    def __init__(self):
        self.mapping = {}
        self.unknown_token = -1
        
    def fit(self, y):
        # Learn known labels
        unique_labels = pd.Series(y).unique()
        self.mapping = {label: idx for idx, label in enumerate(unique_labels)}
        return self
        
    def transform(self, y):
        # Map, defaulting to -1 for new/unknown drivers in future seasons
        return pd.Series(y).map(lambda x: self.mapping.get(x, self.unknown_token)).values
        
    def fit_transform(self, y):
        return self.fit(y).transform(y)

## 3. Dynamic Circuit DNA & Elo
The core logic remains, but adapted for continuous updates.

In [None]:
# TRACK DNA (Static Knowledge Base)
TRACK_DNA = {
    'Bahrain': {'Type': 'Balanced', 'Overtaking': 8},
    'Saudi': {'Type': 'Street_Fast', 'Overtaking': 7},
    'Australia': {'Type': 'Street_Fast', 'Overtaking': 6},
    'Japan': {'Type': 'Technical', 'Overtaking': 4},
    'China': {'Type': 'Balanced', 'Overtaking': 7},
    'Miami': {'Type': 'Street_Fast', 'Overtaking': 6},
    'Emilia': {'Type': 'Technical', 'Overtaking': 3},
    'Monaco': {'Type': 'Street_Slow', 'Overtaking': 1},
    'Canada': {'Type': 'Street_Fast', 'Overtaking': 7},
    'Spain': {'Type': 'Technical', 'Overtaking': 5},
    'Austria': {'Type': 'Power', 'Overtaking': 8},
    'Britain': {'Type': 'High_Speed', 'Overtaking': 7},
    'Hungary': {'Type': 'Technical', 'Overtaking': 3},
    'Belgium': {'Type': 'High_Speed', 'Overtaking': 9},
    'Netherlands': {'Type': 'Technical', 'Overtaking': 4},
    'Italy': {'Type': 'Power', 'Overtaking': 8},
    'Azerbaijan': {'Type': 'Street_Fast', 'Overtaking': 8},
    'Singapore': {'Type': 'Street_Slow', 'Overtaking': 2},
    'Austin': {'Type': 'Balanced', 'Overtaking': 7},
    'Mexico': {'Type': 'High_Altitude', 'Overtaking': 6},
    'Brazil': {'Type': 'Balanced', 'Overtaking': 9},
    'Las Vegas': {'Type': 'Street_Fast', 'Overtaking': 8},
    'Qatar': {'Type': 'High_Speed', 'Overtaking': 6},
    'Abu Dhabi': {'Type': 'Balanced', 'Overtaking': 5},
}
def get_track_dna(circuit_name):
    for key in TRACK_DNA:
        if key in circuit_name: return TRACK_DNA[key]
    return {'Type': 'Balanced', 'Overtaking': 5}

# ELO ENGINE
class EloTracker:
    def __init__(self, base=1500):
        self.driver_ratings = {}
        self.team_ratings = {}
        self.base = base

    def get_rating(self, entity, is_team=False):
        target = self.team_ratings if is_team else self.driver_ratings
        return target.get(entity, self.base)

    def update(self, df):
        # Calculate updates based on race results
        curr_d = {r['Driver']: self.get_rating(r['Driver']) for _, r in df.iterrows()}
        curr_t = {r['Team']: self.get_rating(r['Team'], True) for _, r in df.iterrows()}
        new_d, new_t = curr_d.copy(), curr_t.copy()
        
        drivers = list(curr_d.keys())
        # Pairwise update
        for i in range(len(drivers)):
            dA = drivers[i]
            tA = df[df['Driver'] == dA]['Team'].values[0]
            posA = df[df['Driver'] == dA]['Position'].values[0]
            for j in range(i + 1, len(drivers)):
                dB = drivers[j]
                tB = df[df['Driver'] == dB]['Team'].values[0]
                posB = df[df['Driver'] == dB]['Position'].values[0]
                
                score = 1.0 if posA < posB else (0.0 if posA > posB else 0.5)
                
                EA = 1 / (1 + 10 ** ((curr_d[dB] - curr_d[dA]) / 400))
                delta_d = 32 * (score - EA)
                new_d[dA] += delta_d; new_d[dB] -= delta_d
                
                ETA = 1 / (1 + 10 ** ((curr_t[tB] - curr_t[tA]) / 400))
                delta_t = 24 * (score - ETA)
                new_t[tA] += delta_t; new_t[tB] -= delta_t
        
        self.driver_ratings, self.team_ratings = new_d, new_t

## 4. Continuous Data Ingestion Loop

This function doesn't stop at a hardcoded year. It checks the system date and keeps pulling data until "Now". This ensures that if you run this in 2026, it automatically trains on 2025 data.

In [None]:
def build_engine():
    print("‚öôÔ∏è Building Dynasty Engine...")
    data = []
    current_year = datetime.now().year
    
    # DYNAMIC LOOP: From 2021 to Current Year
    # FastF1 handles the future gracefully, but we add checks to be safe
    for year in range(2021, current_year + 1):
        try:
            schedule = ff1.get_event_schedule(year)
            if schedule.empty: continue
            if schedule['EventDate'].dt.tz is not None:
                schedule['EventDate'] = schedule['EventDate'].dt.tz_localize(None)
            
            # Only fetch completed races
            completed = schedule[schedule['EventDate'] < datetime.now()]
            
            for _, event in completed.iterrows():
                if event['Session5'] != 'Race': continue
                try:
                    session = ff1.get_session(year, event['RoundNumber'], 'R')
                    session.load(laps=False, telemetry=False, weather=False, messages=False)
                    if session.results.empty: continue
                    
                    dna = get_track_dna(event['EventName'])
                    for _, row in session.results.iterrows():
                        data.append({
                            'Year': year, 'Round': event['RoundNumber'], 'Circuit': event['EventName'],
                            'Track_Type': dna['Type'], 'Overtaking_Fac': dna['Overtaking'],
                            'Driver': row['Abbreviation'], 'Team': row['TeamName'],
                            'Grid': row['GridPosition'], 'Position': row['Position'], 'Status': row['Status']
                        })
                except: continue
        except: continue
        
    df = pd.DataFrame(data)
    if df.empty: return None, None, None, None, None
    
    # FEATURE ENGINEERING
    df = df.sort_values(['Year', 'Round'])
    df['Position'] = pd.to_numeric(df['Position'], errors='coerce').fillna(20)
    tracker = EloTracker()
    
    d_elos, t_elos = [], []
    for _, race in df.groupby(['Year', 'Round']):
        for _, row in race.iterrows():
            d_elos.append(tracker.get_rating(row['Driver']))
            t_elos.append(tracker.get_rating(row['Team'], is_team=True))
        tracker.update(race)
        
    df['Driver_Elo'] = d_elos
    df['Team_Elo'] = t_elos
    df['Form'] = df.groupby('Driver')['Position'].transform(lambda x: x.shift(1).rolling(5, min_periods=1).mean())
    df['Consistency'] = df.groupby('Driver')['Position'].transform(lambda x: x.shift(1).rolling(5, min_periods=1).std()).fillna(3.0)
    df['Type_Affinity'] = df.groupby(['Driver', 'Track_Type'])['Position'].transform(lambda x: x.shift(1).expanding().mean())
    df['Reliability'] = df.groupby('Team')['Status'].transform(lambda x: x.shift(1).isin(['Finished', '+1 Lap']).rolling(10).mean()).fillna(0.8)
    df.fillna(0, inplace=True)
    
    # ROBUST TRAINING (Handling Unknowns)
    le_d, le_t, le_tt = RobustEncoder(), RobustEncoder(), RobustEncoder()
    df['Driver_ID'] = le_d.fit_transform(df['Driver'])
    df['Team_ID'] = le_t.fit_transform(df['Team'])
    df['Type_ID'] = le_tt.fit_transform(df['Track_Type'])
    
    FEATS = ['Grid', 'Driver_Elo', 'Team_Elo', 'Form', 'Consistency', 'Type_Affinity', 'Overtaking_Fac', 'Reliability', 'Driver_ID', 'Team_ID', 'Type_ID']
    
    # Dynamic Split: Train on everything except last 5 races, Validate on last 5
    # This ensures we always have a validation set regardless of the current date
    unique_races = df[['Year', 'Round']].drop_duplicates().sort_values(['Year', 'Round'])
    cutoff_idx = len(unique_races) - 5
    if cutoff_idx < 1: cutoff_idx = 1 # Fallback for tiny data
    
    # Create Mask based on race index, not Year (Safe for Early Season)
    df['Race_Index'] = df.groupby(['Year', 'Round']).ngroup()
    train_mask = df['Race_Index'] < cutoff_idx
    val_mask = df['Race_Index'] >= cutoff_idx
    
    X_tr, y_tr = df[train_mask][FEATS], 21 - df[train_mask]['Position']
    g_tr = df[train_mask].groupby(['Year', 'Round']).size().to_numpy()
    
    model = lgb.LGBMRanker(objective='lambdarank', metric='ndcg', n_estimators=600, learning_rate=0.03, random_state=42)
    model.fit(X_tr, y_tr, group=g_tr)
    
    # Extract Residuals for Hybrid Sim
    residuals = []
    preds = model.predict(df[val_mask][FEATS])
    curr = 0
    for _, grp in df[val_mask].groupby(['Year', 'Round']):
        n = len(grp)
        p = preds[curr:curr+n]
        curr += n
        ranks = (-p).argsort().argsort() + 1
        residuals.extend(grp['Position'].values - ranks)
        
    print(f"‚úÖ Engine Ready. Trained on {len(df)} historical entries.")
    return model, tracker, df, np.array(residuals), (le_d, le_t, le_tt)

model, tracker, train_df, residuals, encoders = build_engine()

## 5. Dynamic Grid & Weather Simulator

This prediction logic is "Grid Agnostic." It counts how many drivers are in the input and scales accordingly.

In [None]:
def get_next_race():
    now = datetime.now()
    # Try current year first
    schedule = ff1.get_event_schedule(now.year)
    if schedule['EventDate'].dt.tz is not None:
        schedule['EventDate'] = schedule['EventDate'].dt.tz_localize(None)
    
    upcoming = schedule[schedule['EventDate'] >= now]
    if not upcoming.empty:
        return now.year, upcoming.iloc[0]['EventName'], upcoming.iloc[0]['EventDate']
    
    # Else try next year (e.g., it's Dec 2025, look for Mar 2026)
    try:
        schedule = ff1.get_event_schedule(now.year + 1)
        return now.year + 1, schedule.iloc[0]['EventName'], schedule.iloc[0]['EventDate']
    except:
        return None, None, None

def run_dynasty_prediction(weather_forecast='Dry', n_sims=10000):
    if model is None: return
    
    # 1. AUTO-DETECT
    year, race_name, race_date = get_next_race()
    if not year:
        print("‚ùå No future races found in schedule.")
        return

    print(f"\nüåç DYNASTY PREDICTION PROTOCOL: {year} {race_name}")
    print(f"üìÖ Date: {race_date.strftime('%Y-%m-%d')} | üå§Ô∏è Weather: {weather_forecast}")
    
    dna = get_track_dna(race_name)
    
    # 2. GRID ACQUISITION
    # We attempt to fetch real grid. If not, we project based on LAST KNOWN active drivers.
    # This handles the "2026 Season" case -> it will grab 2025's end state as the baseline.
    try:
        session = ff1.get_session(year, race_name, 'Q')
        session.load(laps=False, telemetry=False, weather=False, messages=False)
        if not session.results.empty:
            grid = session.results[['Abbreviation', 'TeamName', 'GridPosition']]
            print("‚úÖ Using Official Grid.")
        else: raise ValueError
    except:
        print("‚ö†Ô∏è Future Race. Projecting Grid based on current Elo Rankings.")
        # Get unique drivers from the VERY LATEST timestamp in training data
        latest_year = train_df['Year'].max()
        active = train_df[train_df['Year'] == latest_year].drop_duplicates('Driver')
        
        # Projection: High Elo = Better Grid
        active['Proj_Score'] = (active['Driver_Elo'] * 0.6) + (active['Team_Elo'] * 0.4)
        grid = active.sort_values('Proj_Score', ascending=False)[['Driver', 'Team', 'Proj_Score']]
        grid.columns = ['Abbreviation', 'TeamName', 'Proj_Score']
        grid['GridPosition'] = range(1, len(grid) + 1)

    # 3. DYNAMIC SCALING (The 24 Driver Fix)
    N_DRIVERS = len(grid)
    print(f"üèéÔ∏è Grid Size: {N_DRIVERS} Cars detected.")

    # 4. PREPARE FEATURES
    le_d, le_t, le_tt = encoders
    FEATS = ['Grid', 'Driver_Elo', 'Team_Elo', 'Form', 'Consistency', 'Type_Affinity', 'Overtaking_Fac', 'Reliability', 'Driver_ID', 'Team_ID', 'Type_ID']
    rows = []

    for _, row in grid.iterrows():
        drv = row['Abbreviation']
        tm = row['TeamName']
        
        # Elo Lookup (Robust to new names)
        d_elo = tracker.get_rating(drv)
        t_elo = tracker.get_rating(tm, True)
        
        # History Lookup
        hist = train_df[train_df['Driver'] == drv].tail(1)
        if not hist.empty:
            form = hist['Form'].values[0]
            cons = hist['Consistency'].values[0]
            rel = hist['Reliability'].values[0]
            aff = hist['Type_Affinity'].values[0]
        else:
            # Generic Rookie Profile
            form = 15; cons = 4.0; rel = 0.8; aff = 15

        rows.append([row['GridPosition'], d_elo, t_elo, form, cons, aff, dna['Overtaking'], rel, 
                     le_d.transform([drv])[0], le_t.transform([tm])[0], le_tt.transform([dna['Type']])[0], drv])
        
    p_df = pd.DataFrame(rows, columns=FEATS + ['Driver'])

    # 5. SIMULATION
    # Base Score
    p_df['Base_Score'] = model.predict(p_df[FEATS])
    p_df = p_df.sort_values('Base_Score', ascending=False).reset_index(drop=True)
    base_ranks = p_df.index.values + 1
    
    # Matrices sized by N_DRIVERS
    matrix = np.zeros((N_DRIVERS, N_DRIVERS))
    
    # Chaos Injection
    errors = np.random.choice(residuals, size=(N_DRIVERS, n_sims))
    
    # Modifiers
    # Weather: If Wet, increase variance by 50%
    weather_mod = 1.5 if weather_forecast == 'Wet' else 1.0
    cons_mod = (p_df['Consistency'].values / 2.5).clip(0.5, 1.5)
    chaos_mod = (0.5 + (dna['Overtaking'] / 10.0)) * weather_mod
    
    final_errors = errors * cons_mod[:, np.newaxis] * chaos_mod
    sim_ranks = base_ranks[:, np.newaxis] + final_errors
    
    # DNF Logic
    dnf_probs = 1.0 - p_df['Reliability'].values
    # If Wet, DNF chance increases
    if weather_forecast == 'Wet': dnf_probs *= 1.5
    
    sim_ranks[np.random.random((N_DRIVERS, n_sims)) < dnf_probs[:, np.newaxis]] = 999
    
    # Vectorized Tally
    for s in range(n_sims):
        col = sim_ranks[:, s]
        finishers = np.argsort(col)
        for r, idx in enumerate(finishers):
            if r < N_DRIVERS and sim_ranks[idx, s] < 900:
                matrix[idx, r] += 1

    # 6. REPORT
    probs = (matrix / n_sims) * 100
    output = []
    for i, row in p_df.iterrows():
        p = probs[i]
        output.append({
            'Driver': row['Driver'],
            'Grid': int(row['Grid']),
            'Win %': p[0],
            'Podium %': np.sum(p[:3]),
            'Points %': np.sum(p[:10]),
            'Avg Pos': np.sum(p * np.arange(1, N_DRIVERS+1)) / 100
        })
        
    print("\nüèÜ PREDICTED OUTCOME:")
    res_df = pd.DataFrame(output).sort_values('Win %', ascending=False)
    pd.options.display.float_format = '{:,.1f}'.format
    print(res_df.to_string(index=False))

# --- RUN WITH OPTIONAL WEATHER ---
run_dynasty_prediction(weather_forecast='Dry')
# run_dynasty_prediction(weather_forecast='Wet')