In [1]:
import pandas as pd
import time
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from api import JolpicaF1API

In [2]:
api = JolpicaF1API()

In [3]:
test = api.getRaceResults(2024, 2)

In [4]:
def get_race_data(season: int) -> pd.DataFrame:
    """Get race data for a season including round numbers"""
    data = api.getRaceInfo(season=str(season))
    return pd.DataFrame(data['MRData']['RaceTable']['Races'])

def get_qualifying_data(season: int, round_num: int) -> pd.DataFrame:
    """Get qualifying results for a specific race"""
    data = api.getQualifyingResults(season=str(season), round=int(round_num))
    return pd.DataFrame(data['MRData']['RaceTable']['Races'][0]['QualifyingResults'])

def get_race_results(season: int, round_num: int) -> pd.DataFrame:
    """Get race results for a specific race"""
    data = api.getRaceResults(season=str(season), round=int(round_num))
    return pd.DataFrame(data['MRData']['RaceTable']['Races'][0]['Results'])

In [23]:
recent_years = list(range(2012, 2024))
british_gp_data = []

In [24]:
for year in recent_years:
    season_races = get_race_data(year)
    british_gp = season_races[season_races['raceName'].str.contains("British Grand Prix")]
    
    if not british_gp.empty:
        round_num = british_gp.iloc[0]['round']
        
        # Get qualifying and results
        qualifying = get_qualifying_data(year, round_num)
        results = get_race_results(year, round_num)
        
        # Add metadata
        qualifying['season'] = year
        qualifying['round'] = round_num
        results['season'] = year
        results['round'] = round_num
        
        british_gp_data.append({
            'year': year,
            'round': round_num,
            'qualifying': qualifying,
            'results': results
        })
    time.sleep(0.5) 

In [25]:
qualifying_dfs = []
results_dfs = []

for race in british_gp_data:
    q = race['qualifying'].copy()
    q['driverId'] = q['Driver'].apply(lambda x: x['driverId'])
    q['position'] = q['position'].astype(int)
    qualifying_dfs.append(q[['season', 'round', 'driverId', 'position']])
    
    r = race['results'].copy()
    r['driverId'] = r['Driver'].apply(lambda x: x['driverId'])
    r['positionOrder'] = r['position'].fillna(0).astype(int)
    r['positionOrder'] = r['positionOrder'].replace(0, r['positionOrder'].max() + 1)
    results_dfs.append(r[['season', 'round', 'driverId', 'positionOrder']])

qualifying_historical = pd.concat(qualifying_dfs)
results_historical = pd.concat(results_dfs)

In [26]:
# Calculate average positions
avg_qualifying = qualifying_historical.groupby('driverId')['position'].mean().reset_index()
avg_race = results_historical.groupby('driverId')['positionOrder'].mean().reset_index()

# Create target variable (winners)
winners = results_historical[results_historical['positionOrder'] == 1]['driverId'].unique()

# Merge features
features = avg_qualifying.merge(avg_race, on='driverId')
features['winner'] = features['driverId'].isin(winners).astype(int)
features = features.rename(columns={'position': 'qualifying_position', 'positionOrder': 'race_position'})

In [27]:
# Model training
X = features[['qualifying_position', 'race_position']]
y = features['winner']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=21)

model = RandomForestClassifier(n_estimators=200, random_state=21)
model.fit(X_train, y_train)

# Evaluation
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.9230769230769231
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.92      0.96        13
           1       0.00      0.00      0.00         0

    accuracy                           0.92        13
   macro avg       0.50      0.46      0.48        13
weighted avg       1.00      0.92      0.96        13



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [28]:
# Get 2024 data for prediction
current_races = get_race_data(2024)

# Find the British GP round
british_gp_2024 = current_races[current_races['raceName'].str.contains("British Grand Prix")]
british_gp_round = None

if not british_gp_2024.empty:
    british_gp_round = british_gp_2024.iloc[0]['round']

In [29]:
# Determine who's actually participating
try:
    british_gp_qualifying = get_qualifying_data(2024, british_gp_round)
    british_gp_drivers = set(british_gp_qualifying['Driver'].apply(lambda x: x['driverId']))
    print(f"Found {len(british_gp_drivers)} drivers participating in 2024 British GP")
except:
    # Fallback if the GP hasn't occurred yet
    print("GP qualifying data not available yet, using all active drivers")
    british_gp_drivers = None

Found 20 drivers participating in 2024 British GP


In [30]:
# Get current season data for all races so far
qualifying_2024 = []
results_2024 = []

for _, race in current_races.iterrows():
    try:
        q = get_qualifying_data(2024, race['round'])
        q['driverId'] = q['Driver'].apply(lambda x: x['driverId'])
        qualifying_2024.append(q[['driverId', 'position']])
        
        r = get_race_results(2024, race['round'])
        r['driverId'] = r['Driver'].apply(lambda x: x['driverId'])
        results_2024.append(r[['driverId', 'position']])
        time.sleep(0.5)
    except Exception as e:
        print(f"Could not get data for round {race['round']}: {e}")

In [31]:
current_qual = (
    pd.concat(qualifying_2024)
    .assign(position=lambda x: x['position'].fillna(0).astype(int))
    .groupby('driverId')['position']
    .mean()
    .reset_index()
)

current_results = (
    pd.concat(results_2024)
    .assign(position=lambda x: x['position'].fillna(0).astype(int))
    .groupby('driverId')['position']
    .mean()
    .reset_index()
)

In [32]:
# Handle any remaining invalid values before merging
max_position = max(current_qual['position'].max(), current_results['position'].max()) or 20
current_qual['position'] = current_qual['position'].replace(0, max_position + 1)
current_results['position'] = current_results['position'].replace(0, max_position + 1)

In [33]:
current_features = current_qual.merge(
    current_results,
    on='driverId',
    suffixes=('_qual', '_race')
).rename(columns={
    'position_qual': 'qualifying_position',
    'position_race': 'race_position'
})

In [34]:
# Filter only drivers participating in the British GP
if british_gp_drivers:
    current_features = current_features[current_features['driverId'].isin(british_gp_drivers)]
    print(f"Filtered to {len(current_features)} participating drivers")

current_features = current_features.fillna(max_position + 1)
current_features = current_features[(current_features['qualifying_position'] > 0) & 
                                    (current_features['race_position'] > 0)]

# Make predictions for the filtered list of drivers
current_features['predicted_winner'] = model.predict(
    current_features[['qualifying_position', 'race_position']].astype(float))

print("2024 British GP Predictions:")
print(current_features[['driverId', 'qualifying_position', 'race_position', 'predicted_winner']])

Filtered to 20 participating drivers
2024 British GP Predictions:
           driverId  qualifying_position  race_position  predicted_winner
0             albon            12.500000      14.625000                 0
1            alonso             9.666667      10.208333                 0
3            bottas            15.291667      15.875000                 0
6             gasly            13.166667      12.916667                 0
7          hamilton             8.541667       7.000000                 0
8        hulkenberg            11.791667      11.666667                 0
9   kevin_magnussen            14.318182      13.363636                 0
11          leclerc             5.333333       4.583333                 0
12   max_verstappen             2.916667       3.666667                 0
13           norris             3.583333       4.333333                 0
14             ocon            14.478261      13.739130                 0
15            perez             9.333333      