<a href="https://colab.research.google.com/github/abhijadhav14/F1-Predictions/blob/main/F1_Predictions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install fastf1

Collecting fastf1
  Downloading fastf1-3.5.3-py3-none-any.whl.metadata (4.6 kB)
Collecting rapidfuzz (from fastf1)
  Downloading rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting requests-cache>=1.0.0 (from fastf1)
  Downloading requests_cache-1.2.1-py3-none-any.whl.metadata (9.9 kB)
Collecting timple>=0.1.6 (from fastf1)
  Downloading timple-0.1.8-py3-none-any.whl.metadata (2.0 kB)
Collecting websockets<14,>=10.3 (from fastf1)
  Downloading websockets-13.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)
Collecting cattrs>=22.2 (from requests-cache>=1.0.0->fastf1)
  Downloading cattrs-24.1.3-py3-none-any.whl.metadata (8.4 kB)
Collecting url-normalize>=1.4 (from requests-cache>=1.0.0->fastf1)
  Downloading url_normalize-2.2.0-py3-none-any.whl.metadata (4.9 kB)
Downloading fastf1-3.5.3-py3-none-any.whl (151 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m 

In [3]:
import fastf1
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer

In [6]:
import os
import fastf1

cache_dir = 'cache'
if not os.path.exists(cache_dir):
    os.makedirs(cache_dir)

fastf1.Cache.enable_cache(cache_dir)

In [8]:
def fetch_f1_data(year, round_number):
    """Fetch data using official F1 API via FastF1"""
    try:

        quali = fastf1.get_session(year, round_number, 'Q')
        quali.load()


        results = quali.results[['DriverNumber', 'FullName', 'TeamName', 'Q1', 'Q2', 'Q3']]


        results = results.rename(columns={'FullName': 'Driver'})


        for col in ['Q1', 'Q2', 'Q3']:
            results[col + '_sec'] = results[col].apply(
                lambda x: x.total_seconds() if pd.notnull(x) else None
            )


        print("\nQualifying Results Structure:")
        print(results.head())

        return results
    except Exception as e:
        print(f"Error fetching data: {e}")
        print("DataFrame columns available:", quali.results.columns.tolist())
        return None

In [9]:
def convert_time_to_seconds(time_str):
    if pd.isna(time_str):
        return None
    try:

        if ':' in time_str:
            minutes, seconds = time_str.split(':')
            return float(minutes) * 60 + float(seconds)

        else:
            return float(time_str)
    except (ValueError, TypeError) as e:
        print(f"Warning: Could not convert time: {time_str}, Error: {e}")
        return None

In [10]:
def clean_data(df):

    print("\nBefore cleaning:")
    print(df[['Driver', 'Q1', 'Q2', 'Q3']].head())

    df['Q1_sec'] = df['Q1'].apply(convert_time_to_seconds)
    df['Q2_sec'] = df['Q2'].apply(convert_time_to_seconds)
    df['Q3_sec'] = df['Q3'].apply(convert_time_to_seconds)

    print("\nAfter cleaning:")
    print(df[['Driver', 'Q1_sec', 'Q2_sec', 'Q3_sec']].head())

    return df.dropna()

In [11]:
def visualize_data(df):
    sns.boxplot(data=df[['Q1_sec', 'Q2_sec', 'Q3_sec']])
    plt.title('Qualifying Lap Times (seconds)')
    plt.ylabel('Lap Time (seconds)')
    plt.show()

In [12]:
def train_and_evaluate(df):
    X = df[['Q1_sec', 'Q2_sec']]
    y = df['Q3_sec']

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )

    model = LinearRegression()
    model.fit(X_train, y_train)


    predictions = model.predict(X)


    results_df = df[['Driver', 'TeamName', 'Q1_sec', 'Q2_sec', 'Q3_sec']].copy()
    results_df['Predicted_Q3'] = predictions
    results_df['Difference'] = results_df['Predicted_Q3'] - results_df['Q3_sec']


    results_df = results_df.sort_values('Predicted_Q3')


    print("\nPredicted Q3 Rankings:")
    print("=" * 70)
    print(f"{'Position':<10}{'Driver':<15}{'Team':<20}{'Predicted Time':<15}{'Actual Time':<15}")
    print("-" * 70)

    for idx, row in results_df.iterrows():
        pred_time = f"{row['Predicted_Q3']:.3f}"
        actual_time = f"{row['Q3_sec']:.3f}" if not pd.isna(row['Q3_sec']) else "N/A"
        print(f"{results_df.index.get_loc(idx)+1:<10}{row['Driver']:<15}{row['TeamName']:<20}{pred_time:<15}{actual_time:<15}")


    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    print("\nModel Performance Metrics:")
    print(f'Mean Absolute Error: {mae:.2f} seconds')
    print(f'R^2 Score: {r2:.2f}')

In [13]:
def fetch_recent_data():
    """Fetch data from recent races using FastF1"""
    all_data = []


    current_year = 2025
    for round_num in range(1, 5):  # First 4 races of 2025
        print(f"Fetching data for {current_year} round {round_num}...")
        df = fetch_f1_data(current_year, round_num)
        if df is not None:
            df['Year'] = current_year
            df['Round'] = round_num
            all_data.append(df)


    print("Fetching 2024 Japanese GP data...")
    japan_2024 = fetch_f1_data(2024, 4)
    if japan_2024 is not None:
        japan_2024['Year'] = 2024
        japan_2024['Round'] = 4
        all_data.append(japan_2024)

    return all_data

In [14]:
def apply_performance_factors(predictions_df):
    """Apply 2025-specific performance factors"""
    base_time = 89.5  # in seconds

    team_factors = {
        'Red Bull Racing': 0.997,    # -0.3s from base
        'Ferrari': 0.998,          # -0.2s from base
        'McLaren': 0.999,          # -0.15s from base
        'Mercedes': 0.999,         # -0.15s from base
        'Aston Martin': 1.001,     # +0.1s from base
        'RB': 1.002,              # +0.2s from base
        'Williams': 1.003,         # +0.3s from base
        'Haas F1 Team': 1.004,     # +0.4s from base
        'Kick Sauber': 1.004,      # +0.4s from base (Audi development)
        'Alpine': 1.005,           # +0.5s from base
    }


    driver_factors = {
        'Max Verstappen': 0.998,     # -0.2s (exceptional)
        'Charles Leclerc': 0.999,    # -0.1s (very strong qualifier)
        'Carlos Sainz': 0.999,       # -0.1s (very consistent)
        'Lando Norris': 0.999,       # -0.1s (McLaren leader)
        'Oscar Piastri': 1.000,      # Base time (strong)
        'Sergio Perez': 1.000,       # Base time
        'Lewis Hamilton': 1.000,     # Base time
        'George Russell': 1.000,     # Base time
        'Fernando Alonso': 1.000,    # Base time
        'Lance Stroll': 1.001,       # +0.1s
        'Alex Albon': 1.001,         # +0.1s
        'Daniel Ricciardo': 1.001,   # +0.1s
        'Yuki Tsunoda': 1.002,       # +0.2s
        'Valtteri Bottas': 1.002,    # +0.2s
        'Zhou Guanyu': 1.003,        # +0.3s
        'Kevin Magnussen': 1.003,    # +0.3s
        'Nico Hulkenberg': 1.003,    # +0.3s
        'Logan Sargeant': 1.004,     # +0.4s
        'Pierre Gasly': 1.004,       # +0.4s
        'Esteban Ocon': 1.004,       # +0.4s
    }


    for idx, row in predictions_df.iterrows():
        team_factor = team_factors.get(row['Team'], 1.005)
        driver_factor = driver_factors.get(row['Driver'], 1.002)


        base_prediction = base_time * team_factor * driver_factor


        random_variation = np.random.uniform(-0.1, 0.1)
        predictions_df.loc[idx, 'Predicted_Q3'] = base_prediction + random_variation

    return predictions_df

In [15]:
def predict_japanese_gp(model, latest_data):
    """Predict Q3 times for Japanese GP 2025"""

    driver_teams = {
        'Max Verstappen': 'Red Bull Racing',
        'Sergio Perez': 'Red Bull Racing',
        'Charles Leclerc': 'Ferrari',
        'Carlos Sainz': 'Ferrari',
        'Lewis Hamilton': 'Mercedes',
        'George Russell': 'Mercedes',
        'Lando Norris': 'McLaren',
        'Oscar Piastri': 'McLaren',
        'Fernando Alonso': 'Aston Martin',
        'Lance Stroll': 'Aston Martin',
        'Daniel Ricciardo': 'RB',
        'Yuki Tsunoda': 'RB',
        'Alexander Albon': 'Williams',
        'Logan Sargeant': 'Williams',
        'Valtteri Bottas': 'Kick Sauber',
        'Zhou Guanyu': 'Kick Sauber',
        'Kevin Magnussen': 'Haas F1 Team',
        'Nico Hulkenberg': 'Haas F1 Team',
        'Pierre Gasly': 'Alpine',
        'Esteban Ocon': 'Alpine'
    }


    results_df = pd.DataFrame(list(driver_teams.items()), columns=['Driver', 'Team'])


    results_df = apply_performance_factors(results_df)

    results_df = results_df.sort_values('Predicted_Q3')


    print("\nJapanese GP 2025 Qualifying Predictions:")
    print("=" * 100)
    print(f"{'Position':<10}{'Driver':<20}{'Team':<25}{'Predicted Q3':<15}")
    print("-" * 100)

    for idx, row in results_df.iterrows():
        print(f"{results_df.index.get_loc(idx)+1:<10}"
              f"{row['Driver']:<20}"
              f"{row['Team']:<25}"
              f"{row['Predicted_Q3']:.3f}s")


if __name__ == "__main__":
    print("Initializing enhanced F1 prediction model...")

    all_data = fetch_recent_data()

    if all_data:

        combined_df = pd.concat(all_data, ignore_index=True)


        valid_data = combined_df.dropna(subset=['Q1_sec', 'Q2_sec', 'Q3_sec'], how='all')

        imputer = SimpleImputer(strategy='median')

        X = valid_data[['Q1_sec', 'Q2_sec']]
        y = valid_data['Q3_sec']


        X_clean = pd.DataFrame(imputer.fit_transform(X), columns=X.columns)
        y_clean = pd.Series(imputer.fit_transform(y.values.reshape(-1, 1)).ravel())

        model = LinearRegression()
        model.fit(X_clean, y_clean)

        predict_japanese_gp(model, valid_data)

        y_pred = model.predict(X_clean)
        mae = mean_absolute_error(y_clean, y_pred)
        r2 = r2_score(y_clean, y_pred)

        print("\nModel Performance Metrics:")
        print(f'Mean Absolute Error: {mae:.2f} seconds')
        print(f'R^2 Score: {r2:.2f}')
    else:
        print("Failed to fetch F1 data")

Initializing enhanced F1 prediction model...
Fetching data for 2025 round 1...


core           INFO 	Loading data for Australian Grand Prix - Qualifying [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Australian Grand Prix - Qualifying [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
INFO:fastf1.fastf1.req:No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
INFO:fastf1.api:Fetching session info data...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
INFO:fastf1.fastf1.req:No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
INFO:fastf1.api:Fetching driver list...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
DEBUG:fastf1.ergast:Failed to parse timestamp '' in Ergastresponse.
req            INFO 	No cached data found for session_status_


Qualifying Results Structure:
   DriverNumber          Driver         TeamName                     Q1  \
4             4    Lando Norris          McLaren 0 days 00:01:15.912000   
81           81   Oscar Piastri          McLaren 0 days 00:01:16.062000   
1             1  Max Verstappen  Red Bull Racing 0 days 00:01:16.018000   
63           63  George Russell         Mercedes 0 days 00:01:15.971000   
22           22    Yuki Tsunoda     Racing Bulls 0 days 00:01:16.225000   

                       Q2                     Q3  Q1_sec  Q2_sec  Q3_sec  
4  0 days 00:01:15.415000 0 days 00:01:15.096000  75.912  75.415  75.096  
81 0 days 00:01:15.468000 0 days 00:01:15.180000  76.062  75.468  75.180  
1  0 days 00:01:15.565000 0 days 00:01:15.481000  76.018  75.565  75.481  
63 0 days 00:01:15.798000 0 days 00:01:15.546000  75.971  75.798  75.546  
22 0 days 00:01:16.009000 0 days 00:01:15.670000  76.225  76.009  75.670  
Fetching data for 2025 round 2...


req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
INFO:fastf1.fastf1.req:No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
INFO:fastf1.api:Fetching driver list...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for session_status_data. Loading data...
INFO:fastf1.fastf1.req:No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
INFO:fastf1.api:Fetching session status data...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for track_status_data. Loading data...
INFO:fastf1.fastf1.req:No cached data found for track_status_data. Loading data...
_api  


Qualifying Results Structure:
   DriverNumber          Driver         TeamName                     Q1  \
81           81   Oscar Piastri          McLaren 0 days 00:01:31.591000   
63           63  George Russell         Mercedes 0 days 00:01:31.295000   
4             4    Lando Norris          McLaren 0 days 00:01:30.983000   
1             1  Max Verstappen  Red Bull Racing 0 days 00:01:31.424000   
44           44  Lewis Hamilton          Ferrari 0 days 00:01:31.690000   

                       Q2                     Q3  Q1_sec  Q2_sec  Q3_sec  
81 0 days 00:01:31.200000 0 days 00:01:30.641000  91.591  91.200  90.641  
63 0 days 00:01:31.307000 0 days 00:01:30.723000  91.295  91.307  90.723  
4  0 days 00:01:30.787000 0 days 00:01:30.793000  90.983  90.787  90.793  
1  0 days 00:01:31.142000 0 days 00:01:30.817000  91.424  91.142  90.817  
44 0 days 00:01:31.501000 0 days 00:01:30.927000  91.690  91.501  90.927  
Fetching data for 2025 round 3...


req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for session_status_data. Loading data...
INFO:fastf1.fastf1.req:No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
INFO:fastf1.api:Fetching session status data...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for track_status_data. Loading data...
INFO:fastf1.fastf1.req:No cached data found for track_status_data. Loading data...
_api           INFO 	Fetching track status data...
INFO:fastf1.api:Fetching track status data...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for _extended_timing_data. Loading data...
INFO:fastf1.fastf1.req:No cached data found for _extended_tim


Qualifying Results Structure:
   DriverNumber           Driver         TeamName                     Q1  \
1             1   Max Verstappen  Red Bull Racing 0 days 00:01:27.943000   
4             4     Lando Norris          McLaren 0 days 00:01:27.845000   
81           81    Oscar Piastri          McLaren 0 days 00:01:27.687000   
16           16  Charles Leclerc          Ferrari 0 days 00:01:27.920000   
63           63   George Russell         Mercedes 0 days 00:01:27.843000   

                       Q2                     Q3  Q1_sec  Q2_sec  Q3_sec  
1  0 days 00:01:27.502000 0 days 00:01:26.983000  87.943  87.502  86.983  
4  0 days 00:01:27.146000 0 days 00:01:26.995000  87.845  87.146  86.995  
81 0 days 00:01:27.507000 0 days 00:01:27.027000  87.687  87.507  87.027  
16 0 days 00:01:27.555000 0 days 00:01:27.299000  87.920  87.555  87.299  
63 0 days 00:01:27.400000 0 days 00:01:27.318000  87.843  87.400  87.318  
Fetching data for 2025 round 4...


DEBUG:fastf1.api:Falling back to livetiming mirror (https://livetiming-mirror.fastf1.dev)
DEBUG:fastf1.fastf1.core:Traceback for failure in session info data
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/fastf1/logger.py", line 151, in __wrapped
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/fastf1/core.py", line 1470, in _load_session_info
    self._session_info = api.session_info(self.api_path,
                         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/fastf1/req.py", line 479, in _cached_api_request
    data = func(api_path, **func_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/fastf1/_api.py", line 1687, in session_info
    raise SessionNotAvailableError(
fastf1._api.SessionNotAvailableError: No data for this session! If this session only finished recently, please try again in a few mi


Qualifying Results Structure:
Empty DataFrame
Columns: [DriverNumber, Driver, TeamName, Q1, Q2, Q3, Q1_sec, Q2_sec, Q3_sec]
Index: []
Fetching 2024 Japanese GP data...


req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
INFO:fastf1.fastf1.req:No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
INFO:fastf1.api:Fetching driver list...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for session_status_data. Loading data...
INFO:fastf1.fastf1.req:No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
INFO:fastf1.api:Fetching session status data...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for track_status_data. Loading data...
INFO:fastf1.fastf1.req:No cached data found for track_status_data. Loading data...
_api  


Qualifying Results Structure:
   DriverNumber           Driver         TeamName                     Q1  \
1             1   Max Verstappen  Red Bull Racing 0 days 00:01:28.866000   
11           11     Sergio Perez  Red Bull Racing 0 days 00:01:29.303000   
4             4     Lando Norris          McLaren 0 days 00:01:29.536000   
55           55     Carlos Sainz          Ferrari 0 days 00:01:29.513000   
14           14  Fernando Alonso     Aston Martin 0 days 00:01:29.254000   

                       Q2                     Q3  Q1_sec  Q2_sec  Q3_sec  
1  0 days 00:01:28.740000 0 days 00:01:28.197000  88.866  88.740  88.197  
11 0 days 00:01:28.752000 0 days 00:01:28.263000  89.303  88.752  88.263  
4  0 days 00:01:28.940000 0 days 00:01:28.489000  89.536  88.940  88.489  
55 0 days 00:01:29.099000 0 days 00:01:28.682000  89.513  89.099  88.682  
14 0 days 00:01:29.082000 0 days 00:01:28.686000  89.254  89.082  88.686  

Japanese GP 2025 Qualifying Predictions:
Position  Driver    