<a href="https://colab.research.google.com/github/alb495/bettingModelAttempt/blob/F1/F1Predict.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Set Up
1. Let's look at the 2025 season standings (the most recent data)
2. Let's see what the columns are actually named to avoid the KeyError
3. Create the Team Power Rankings

Based on current Ergast structure, it's often 'constructorNames' or 'constructorIds'
We will use the first constructor listed for each driver


In [1]:
import fastf1
from fastf1.ergast import Ergast
import pandas as pd

ergast = Ergast()

# 1.
standings = ergast.get_driver_standings(season=2025)
df_2025 = standings.content[0]

# 2.
print("Actual Columns in Data:", df_2025.columns.tolist())

# 3.
if 'constructorNames' in df_2025.columns:
    # This converts the list of constructors to a single string
    df_2025['team'] = df_2025['constructorNames'].apply(lambda x: x[0] if isinstance(x, list) else x)

    team_strength = df_2025.groupby('team')['points'].sum().sort_values(ascending=False)
    print("\n--- 2025 Team Power Rankings ---")
    print(team_strength)
else:
    print("\nColumn 'constructorNames' not found. Check the column list above!")



Actual Columns in Data: ['position', 'positionText', 'points', 'wins', 'driverId', 'driverNumber', 'driverCode', 'driverUrl', 'givenName', 'familyName', 'dateOfBirth', 'driverNationality', 'constructorIds', 'constructorUrls', 'constructorNames', 'constructorNationalities']

--- 2025 Team Power Rankings ---
team
McLaren           833.0
Mercedes          469.0
Red Bull          459.0
Ferrari           398.0
Williams          137.0
Aston Martin       89.0
RB F1 Team         84.0
Haas F1 Team       79.0
Sauber             70.0
Alpine F1 Team     22.0
Name: points, dtype: float64


# The 2026 Model

Fetching Data
1. Get all races for the season
2. We want a list of every driver's result in every race
  - a. Adding a 'round' number to help us weight by time
3. Combine into one big DataFrame

In [2]:
import fastf1.ergast
import pandas as pd
import numpy as np

ergast = fastf1.ergast.Ergast()
season = 2025

# 1.
races = ergast.get_race_results(season=season)

# 2.
all_results = []
for race in races.content:
    # 2a.
    race['round'] = race['number']
    all_results.append(race)

# 3.
df_season = pd.concat(all_results)

Weighting the Model

1. Define how "aggressive" the weight is.
0.9 means each previous race is worth 90% of the one after it.
2. Calculate the weight for each race
3. Calculate "Weighted Points"
4. Group by driver to see who finished the year strongest


In [3]:
# 1.
alpha = 0.9
max_round = df_season['round'].max()

# 2.
df_season['weight'] = alpha ** (max_round - df_season['round'])

# 3.
df_season['weighted_points'] = df_season['points'] * df_season['weight']

# 4.
weighted_standings = df_season.groupby(['givenName', 'familyName'])['weighted_points'].sum().sort_values(ascending=False)

print("--- Weighted Driver Form (End of 2025) ---")
print(weighted_standings.head(10))

--- Weighted Driver Form (End of 2025) ---
givenName    familyName
Oscar        Piastri       14.348907
Oliver       Bearman        4.000000
George       Russell        2.392993
Carlos       Sainz          0.034337
Esteban      Ocon           0.027389
Alexander    Albon          0.018864
Nico         Hülkenberg     0.010782
Lewis        Hamilton       0.010775
Andrea Kimi  Antonelli      0.007400
Lance        Stroll         0.006962
Name: weighted_points, dtype: float64


#2026 Opening Power Ratings
1. Get Data
2. Feature Engineering
  - a. Weighting
  - b. Podium Streak (Last 5 Races)
  - c. Constructor Bonus (Top 3 Teams)
3. Generate 2026 Ranking
4. Ouput

In [8]:
import pandas as pd
import requests
import matplotlib.pyplot as plt
# 1.
def get_f1_data(year=2025):
    url = f"http://api.jolpi.ca/ergast/f1/{year}/results.json?limit=1000"
    data = requests.get(url).json()
    races = data['MRData']['RaceTable']['Races']
    results = []
    for race in races:
        for res in race['Results']:
            results.append({
                'round': int(race['round']),
                'driver': res['Driver']['familyName'],
                'constructor': res['Constructor']['name'],
                'points': float(res['points']),
                'pos': int(res['position'])
            })
    return pd.DataFrame(results)

df = get_f1_data(2025)

# 2.
# A.
alpha = 0.92
max_r = df['round'].max()
df['weight'] = alpha ** (max_r - df['round'])
df['weighted_pts'] = df['points'] * df['weight']

# B.
df['is_podium'] = (df['pos'] <= 3).astype(int)
streaks = df.groupby('driver').apply(lambda x: x.sort_values('round').tail(5)['is_podium'].sum())

# C.
top_3 = df.groupby('constructor')['points'].sum().nlargest(3).index.tolist()

# 3.
model = df.groupby('driver').agg({'weighted_pts': 'sum', 'constructor': 'last'})
model['streak'] = streaks
model['bonus'] = model['constructor'].apply(lambda x: 1.15 if x in top_3 else 1.0)
model['2026_score'] = (model['weighted_pts'] + (model['streak'] * 5)) * model['bonus']

# 4.
prediction = model.sort_values('2026_score', ascending=False)
print("--- 2026 F1 PREDICTED STANDINGS ---")
print(prediction[['constructor', '2026_score']].head(20))

--- 2026 F1 PREDICTED STANDINGS ---
               constructor  2026_score
driver                                
Piastri            McLaren  116.835384
Norris             McLaren  106.905619
Verstappen        Red Bull   96.323229
Russell           Mercedes   83.317747
Leclerc            Ferrari   44.062372
Antonelli         Mercedes   34.037032
Hamilton           Ferrari   20.994793
Albon             Williams   15.528858
Ocon          Haas F1 Team   11.466880
Stroll        Aston Martin    7.288520
Gasly       Alpine F1 Team    5.520000
Bearman       Haas F1 Team    4.881152
Sainz             Williams    4.778688
Hadjar          RB F1 Team    4.385600
Hülkenberg          Sauber    4.298358
Tsunoda           Red Bull    2.116000
Alonso        Aston Martin    0.000000
Lawson          RB F1 Team    0.000000
Doohan      Alpine F1 Team    0.000000
Bortoleto           Sauber    0.000000


  streaks = df.groupby('driver').apply(lambda x: x.sort_values('round').tail(5)['is_podium'].sum())
