In [None]:
!pip install --upgrade fastf1

In [41]:
#Import functions
import fastf1 as ff1
import pandas as pd
import requests
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer

In [27]:
cache_dir = 'f1_cache'
if not os.path.exists(cache_dir):
  os.makedirs(cache_dir)
ff1.Cache.enable_cache(cache_dir)

In [42]:
def fetch_f1_data(year, round_number, cache_dir):
  try:
    quali = ff1.get_session(year, round_number, 'Q')
    quali.load()

    results = quali.results[['DriverNumber', 'FullName', 'TeamName', 'Q1', 'Q2', 'Q3']]

    results = results.rename(columns={'FullName': 'Driver'})

    for col in ['Q1', 'Q2', 'Q3']:
      results[col + '_sec'] = results[col].apply(
          lambda x: x.total_seconds() if pd.notnull(x) else None
      )
    print("\n Qualifying Reesults Structure:")
    print(results.head())

    return results
  except Exception as e:
    print(f"Error fetching data: {e}")
    print("DataFrame columns available:", quali.results.columns.tolist())
    return None
    quali.load()

    results = quali.results[['DriveNumber', 'FullName', 'TeamName', 'Q1', 'Q2', 'Q3']]

    results = results.rename(columns={'FullName': 'Driver'})

    for col in ['Q1', 'Q2', 'Q3']:
      results[col + '_sec'] = results[col].apply(
          lambda x: x.total_seconds() if pd.notnull(x) else None
      )
    print("\n Qualifying Reesults Structure:")
    print(results.head())

    return results
  except Exception as e:
    print(f"Error fetching data: {e}")
    print("DataFrame columns available:", quali.results.columns.tolist())
    return None

In [29]:
def convert_time_to_seconds(time_str):
  if pd.isna(time_str):
    return None

  try:

     if ':' in time_str:
      minutes, seconds = time_str.split(':')
      return float(minutes) * 60 + float(seconds)
     else:
      return float(time_str)
  except (ValueError, TypeError) as e:
    print(f"Error converting time: {time_str}, Error: {e}")
    return None

In [30]:
def clean_data(df):
  print('\n Before cleaning:')
  print(df[['Driver', 'Q1', 'Q2', 'Q3']].head())

  df['Q1_sec'] = df['Q1'].apply(convert_time_to_seconds)
  df['Q2_sec'] = df['Q2'].apply(convert_time_to_seconds)
  df['Q3_sec'] = df['Q3'].apply(convert_time_to_seconds)

  print('\n After cleaning:')
  print(df[['Driver', 'Q1_sec', 'Q2_sec', 'Q3_sec']].head())

  return df.dropna()

In [31]:
def visualizze_data(df):
  sns.boxplot(data = df[['Q1_sec', 'Q2_sec', 'Q3_sec']])
  plt.title('Qualifying Lap Times (seconds)')
  plt.ylabel('Lap Time (seconds)')
  plt.show()

In [32]:
def train_and_evaluate(df):
    X = df[['Q1_sec', 'Q2_sec']]
    y = df['Q3_sec']

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )

    model = LinearRegression()
    model.fit(X_train, y_train)


    predictions = model.predict(X)


    results_df = df[['Driver', 'TeamName', 'Q1_sec', 'Q2_sec', 'Q3_sec']].copy()
    results_df['Predicted_Q3'] = predictions
    results_df['Difference'] = results_df['Predicted_Q3'] - results_df['Q3_sec']


    results_df = results_df.sort_values('Predicted_Q3')


    print("\nPredicted Q3 Rankings:")
    print("=" * 70)
    print(f"{'Position':<10}{'Driver':<15}{'Team':<20}{'Predicted Time':<15}{'Actual Time':<15}")
    print("-" * 70)

    for idx, row in results_df.iterrows():
        pred_time = f"{row['Predicted_Q3']:.3f}"
        actual_time = f"{row['Q3_sec']:.3f}" if not pd.isna(row['Q3_sec']) else "N/A"
        print(f"{results_df.index.get_loc(idx)+1:<10}{row['Driver']:<15}{row['TeamName']:<20}{pred_time:<15}{actual_time:<15}")


    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    print("\nModel Performance Metrics:")
    print(f'Mean Absolute Error: {mae:.2f} seconds')
    print(f'R^2 Score: {r2:.2f}')


In [38]:
def fetch_recent_data():
    """Fetch data from recent races using FastF1"""
    all_data = []

    # Instead of using ff1.Cache.cache_dir directly, access
    # the cache directory through the FastF1 object
    cache_dir = ff1.Cache.enable_cache('f1_cache')

    current_year = 2025
    for round_num in range(1, 5):  # First 4 races of 2025
        print(f"Fetching data for {current_year} round {round_num}...")
        # Pass the cache_dir to fetch_f1_data
        df = fetch_f1_data(current_year, round_num, cache_dir)
        if df is not None:
            df['Year'] = current_year
            df['Round'] = round_num
            all_data.append(df)


    print("Fetching 2024 Japanese GP data...")
    # Pass the cache_dir to fetch_f1_data
    japan_2024 = fetch_f1_data(2024, 4, cache_dir)
    if japan_2024 is not None:
        japan_2024['Year'] = 2024
        japan_2024['Round'] = 4
        all_data.append(japan_2024)

    return all_data

In [39]:
def apply_performance_factors(predictions_df):
    """Apply 2025-specific performance factors"""
    base_time = 89.5  # in seconds

    team_factors = {
        'Red Bull Racing': 0.997,    # -0.3s from base
        'Ferrari': 0.998,          # -0.2s from base
        'McLaren': 0.999,          # -0.15s from base
        'Mercedes': 0.999,         # -0.15s from base
        'Aston Martin': 1.001,     # +0.1s from base
        'RB': 1.002,              # +0.2s from base
        'Williams': 1.003,         # +0.3s from base
        'Haas F1 Team': 1.004,     # +0.4s from base
        'Kick Sauber': 1.004,      # +0.4s from base (Audi development)
        'Alpine': 1.005,           # +0.5s from base
    }

    driver_factors = {
        'Max Verstappen': 0.998,     # -0.2s (exceptional)
        'Charles Leclerc': 0.999,    # -0.1s (very strong qualifier)
        'Carlos Sainz': 0.999,       # -0.1s (very consistent)
        'Lando Norris': 0.999,       # -0.1s (McLaren leader)
        'Oscar Piastri': 1.000,      # Base time (strong)
        'Sergio Perez': 1.000,       # Base time
        'Lewis Hamilton': 1.000,     # Base time
        'George Russell': 1.000,     # Base time
        'Fernando Alonso': 1.000,    # Base time
        'Lance Stroll': 1.001,       # +0.1s
        'Alex Albon': 1.001,         # +0.1s
        'Daniel Ricciardo': 1.001,   # +0.1s
        'Yuki Tsunoda': 1.002,       # +0.2s
        'Valtteri Bottas': 1.002,    # +0.2s
        'Zhou Guanyu': 1.003,        # +0.3s
        'Kevin Magnussen': 1.003,    # +0.3s
        'Nico Hulkenberg': 1.003,    # +0.3s
        'Logan Sargeant': 1.004,     # +0.4s
        'Pierre Gasly': 1.004,       # +0.4s
        'Esteban Ocon': 1.004,       # +0.4s
    }


    for idx, row in predictions_df.iterrows():
        team_factor = team_factors.get(row['Team'], 1.005)
        driver_factor = driver_factors.get(row['Driver'], 1.002)


        base_prediction = base_time * team_factor * driver_factor


        random_variation = np.random.uniform(-0.1, 0.1)
        predictions_df.loc[idx, 'Predicted_Q3'] = base_prediction + random_variation

    return predictions_df

In [44]:
def predict_japanese_gp(model, latest_data):
    """Predict Q3 times for Japanese GP 2025"""

    driver_teams = {
        'Lando Norris': 'McLaren',
        'Oscar Piastri': 'McLaren',

        'Max Verstappen': 'Red Bull Racing',
        'Yuki Tsunoda': 'Red Bull Racing',

        'Charles Leclerc': 'Ferrari',
        'Lewis Hamilton': 'Ferrari',

        'Carlos Sainz': 'Williams',
        'Alexander Albon': 'Williams',

        'George Russell': 'Mercedes',
        'Kimi Antonelli': 'Mercedes',

        'Fernando Alonso': 'Aston Martin',
        'Lance Stroll': 'Aston Martin',

        'Gabriel Bortoleto': 'Alpine',
        'Nico Hulkenberg': 'Kick Sauber',

        'Esteban Ocon': 'Haas F1 Team',
        'Oliver Bearman': 'Haas F1 Team',

        'Pierre Gasly': 'Alpine',
        'Jack Doohan': 'Alpine',

        'Isack Hadjar': 'RB',
        'Liam Lawson': 'RB',


    }


    results_df = pd.DataFrame(list(driver_teams.items()), columns=['Driver', 'Team'])


    results_df = apply_performance_factors(results_df)

    results_df = results_df.sort_values('Predicted_Q3')


    print("\nJapanese GP 2025 Qualifying Predictions:")
    print("=" * 100)
    print(f"{'Position':<10}{'Driver':<20}{'Team':<25}{'Predicted Q3':<15}")
    print("-" * 100)

    for idx, row in results_df.iterrows():
        print(f"{results_df.index.get_loc(idx)+1:<10}"
              f"{row['Driver']:<20}"
              f"{row['Team']:<25}"
              f"{row['Predicted_Q3']:.3f}s")


if __name__ == "__main__":
    print("Initializing enhanced F1 prediction model...")

    all_data = fetch_recent_data()

    if all_data:

        combined_df = pd.concat(all_data, ignore_index=True)


        valid_data = combined_df.dropna(subset=['Q1_sec', 'Q2_sec', 'Q3_sec'], how='all')

        imputer = SimpleImputer(strategy='median')


        X = valid_data[['Q1_sec', 'Q2_sec']]
        y = valid_data['Q3_sec']


        X_clean = pd.DataFrame(imputer.fit_transform(X), columns=X.columns)
        y_clean = pd.Series(imputer.fit_transform(y.values.reshape(-1, 1)).ravel())

        model = LinearRegression()
        model.fit(X_clean, y_clean)

        predict_japanese_gp(model, valid_data)

        y_pred = model.predict(X_clean)
        mae = mean_absolute_error(y_clean, y_pred)
        r2 = r2_score(y_clean, y_pred)

        print("\nModel Performance Metrics:")
        print(f'Mean Absolute Error: {mae:.2f} seconds')
        print(f'R^2 Score: {r2:.2f}')
    else:
        print("Failed to fetch F1 data")

core           INFO 	Loading data for Australian Grand Prix - Qualifying [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Australian Grand Prix - Qualifying [v3.5.3]
req            INFO 	Using cached data for session_info
INFO:fastf1.fastf1.req:Using cached data for session_info
req            INFO 	Using cached data for driver_info
INFO:fastf1.fastf1.req:Using cached data for driver_info
DEBUG:fastf1.ergast:Failed to parse timestamp '' in Ergastresponse.


Initializing enhanced F1 prediction model...
Fetching data for 2025 round 1...


req            INFO 	Using cached data for session_status_data
INFO:fastf1.fastf1.req:Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
INFO:fastf1.fastf1.req:Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
INFO:fastf1.fastf1.req:Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
INFO:fastf1.fastf1.req:Using cached data for timing_app_data
core           INFO 	Processing timing data...
INFO:fastf1.fastf1.core:Processing timing data...
req            INFO 	Using cached data for car_data
INFO:fastf1.fastf1.req:Using cached data for car_data
req            INFO 	Using cached data for position_data
INFO:fastf1.fastf1.req:Using cached data for position_data
req            INFO 	Using cached data for weather_data
INFO:fastf1.fastf1.req:Using cached data for weather_data
req            INFO 	Using cached data for race_control_message


 Qualifying Reesults Structure:
   DriverNumber          Driver         TeamName                     Q1  \
4             4    Lando Norris          McLaren 0 days 00:01:15.912000   
81           81   Oscar Piastri          McLaren 0 days 00:01:16.062000   
1             1  Max Verstappen  Red Bull Racing 0 days 00:01:16.018000   
63           63  George Russell         Mercedes 0 days 00:01:15.971000   
22           22    Yuki Tsunoda     Racing Bulls 0 days 00:01:16.225000   

                       Q2                     Q3  Q1_sec  Q2_sec  Q3_sec  
4  0 days 00:01:15.415000 0 days 00:01:15.096000  75.912  75.415  75.096  
81 0 days 00:01:15.468000 0 days 00:01:15.180000  76.062  75.468  75.180  
1  0 days 00:01:15.565000 0 days 00:01:15.481000  76.018  75.565  75.481  
63 0 days 00:01:15.798000 0 days 00:01:15.546000  75.971  75.798  75.546  
22 0 days 00:01:16.009000 0 days 00:01:15.670000  76.225  76.009  75.670  
Fetching data for 2025 round 2...


req            INFO 	Using cached data for car_data
INFO:fastf1.fastf1.req:Using cached data for car_data
req            INFO 	Using cached data for position_data
INFO:fastf1.fastf1.req:Using cached data for position_data
req            INFO 	Using cached data for weather_data
INFO:fastf1.fastf1.req:Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
INFO:fastf1.fastf1.req:Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['81', '63', '4', '1', '44', '16', '6', '12', '22', '23', '31', '27', '14', '18', '55', '10', '87', '7', '5', '30']
INFO:fastf1.fastf1.core:Finished loading data for 20 drivers: ['81', '63', '4', '1', '44', '16', '6', '12', '22', '23', '31', '27', '14', '18', '55', '10', '87', '7', '5', '30']
core           INFO 	Loading data for Japanese Grand Prix - Qualifying [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Japanese Grand Prix - Qualifying [v3.5.3]
req            I


 Qualifying Reesults Structure:
   DriverNumber          Driver         TeamName                     Q1  \
81           81   Oscar Piastri          McLaren 0 days 00:01:31.591000   
63           63  George Russell         Mercedes 0 days 00:01:31.295000   
4             4    Lando Norris          McLaren 0 days 00:01:30.983000   
1             1  Max Verstappen  Red Bull Racing 0 days 00:01:31.424000   
44           44  Lewis Hamilton          Ferrari 0 days 00:01:31.690000   

                       Q2                     Q3  Q1_sec  Q2_sec  Q3_sec  
81 0 days 00:01:31.200000 0 days 00:01:30.641000  91.591  91.200  90.641  
63 0 days 00:01:31.307000 0 days 00:01:30.723000  91.295  91.307  90.723  
4  0 days 00:01:30.787000 0 days 00:01:30.793000  90.983  90.787  90.793  
1  0 days 00:01:31.142000 0 days 00:01:30.817000  91.424  91.142  90.817  
44 0 days 00:01:31.501000 0 days 00:01:30.927000  91.690  91.501  90.927  
Fetching data for 2025 round 3...


DEBUG:fastf1.api:Falling back to livetiming mirror (https://livetiming-mirror.fastf1.dev)
DEBUG:fastf1.fastf1.core:Traceback for failure in session info data
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/fastf1/logger.py", line 151, in __wrapped
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/fastf1/core.py", line 1470, in _load_session_info
    self._session_info = api.session_info(self.api_path,
                         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/fastf1/req.py", line 479, in _cached_api_request
    data = func(api_path, **func_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/fastf1/_api.py", line 1687, in session_info
    raise SessionNotAvailableError(
fastf1._api.SessionNotAvailableError: No data for this session! If this session only finished recently, please try again in a few mi


 Qualifying Reesults Structure:
Empty DataFrame
Columns: [DriverNumber, Driver, TeamName, Q1, Q2, Q3, Q1_sec, Q2_sec, Q3_sec]
Index: []
Fetching data for 2025 round 4...


DEBUG:fastf1.api:Falling back to livetiming mirror (https://livetiming-mirror.fastf1.dev)
DEBUG:fastf1.fastf1.core:Traceback for failure in session info data
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/fastf1/logger.py", line 151, in __wrapped
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/fastf1/core.py", line 1470, in _load_session_info
    self._session_info = api.session_info(self.api_path,
                         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/fastf1/req.py", line 479, in _cached_api_request
    data = func(api_path, **func_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/fastf1/_api.py", line 1687, in session_info
    raise SessionNotAvailableError(
fastf1._api.SessionNotAvailableError: No data for this session! If this session only finished recently, please try again in a few mi


 Qualifying Reesults Structure:
Empty DataFrame
Columns: [DriverNumber, Driver, TeamName, Q1, Q2, Q3, Q1_sec, Q2_sec, Q3_sec]
Index: []
Fetching 2024 Japanese GP data...


req            INFO 	Using cached data for car_data
INFO:fastf1.fastf1.req:Using cached data for car_data
req            INFO 	Using cached data for position_data
INFO:fastf1.fastf1.req:Using cached data for position_data
req            INFO 	Using cached data for weather_data
INFO:fastf1.fastf1.req:Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
INFO:fastf1.fastf1.req:Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '4', '55', '14', '81', '44', '16', '63', '22', '3', '27', '77', '23', '31', '18', '10', '20', '2', '24']
INFO:fastf1.fastf1.core:Finished loading data for 20 drivers: ['1', '11', '4', '55', '14', '81', '44', '16', '63', '22', '3', '27', '77', '23', '31', '18', '10', '20', '2', '24']



 Qualifying Reesults Structure:
   DriverNumber           Driver         TeamName                     Q1  \
1             1   Max Verstappen  Red Bull Racing 0 days 00:01:28.866000   
11           11     Sergio Perez  Red Bull Racing 0 days 00:01:29.303000   
4             4     Lando Norris          McLaren 0 days 00:01:29.536000   
55           55     Carlos Sainz          Ferrari 0 days 00:01:29.513000   
14           14  Fernando Alonso     Aston Martin 0 days 00:01:29.254000   

                       Q2                     Q3  Q1_sec  Q2_sec  Q3_sec  
1  0 days 00:01:28.740000 0 days 00:01:28.197000  88.866  88.740  88.197  
11 0 days 00:01:28.752000 0 days 00:01:28.263000  89.303  88.752  88.263  
4  0 days 00:01:28.940000 0 days 00:01:28.489000  89.536  88.940  88.489  
55 0 days 00:01:29.099000 0 days 00:01:28.682000  89.513  89.099  88.682  
14 0 days 00:01:29.082000 0 days 00:01:28.686000  89.254  89.082  88.686  

Japanese GP 2025 Qualifying Predictions:
Position  Driver  