In [1]:
import fastf1
import pandas as pd
import numpy as np
import time
from collections import defaultdict

no need to worry about caching - it is enabled by default

## Features to Use 

#### Only trying to predict race order as of now

1. Qualifying Position - Qualifying to Race Position Delta
    - change weight based on historical quali vs race order for different tracks
    - this is a direct indicator of car and driver performance
2. Track Performance 
    - look at position changes in a race to get an idea of how order changes within the race
3. 

### Training and Performance

- make sure to train on data from way before, but weight recent performances higher (car and driver)
- look at historical car performance on the track (only for last 3 years) (2022-2025)
- look at historical driver performance on the track (only for last 3 years) (2022-2025)
    - weight last 6 months of finishes higher

- Train up until (and including) Bahrain GP in 2025 
- Test on all 2025 races to date

In [2]:
fastf1.Cache.get_cache_info()

(None, None)

In [3]:
fastf1.set_log_level('DEBUG')

No need to pull laps data for now either, only do load for results

In [4]:
years = list(range(2014, 2025))
raceData = defaultdict(list)
for year in years:
    # time.sleep(5)
    currYearSchedule = None
    if year < 2018:
        currYearSchedule = fastf1.get_event_schedule(year, include_testing=False, backend='ergast')
    else:
        currYearSchedule = fastf1.get_event_schedule(year, include_testing=False, backend='fastf1')
    for event in currYearSchedule['EventName']:
        raceResults = None
        if year < 2018:
            raceResults = fastf1.get_session(year, event, 'R', backend='ergast')
        else:
            raceResults = fastf1.get_session(year, event, 'R', backend='fastf1')
        raceResults.load(laps=False, telemetry=False, weather=False, messages=False)
        # time.sleep(1)
        df_race = raceResults.results
        df_race = df_race.drop(columns=['BroadcastName', 'TeamColor', 'HeadshotUrl', 'CountryCode'])
        df_race['PositionChange'] = df_race['GridPosition'] - df_race['Position']
        df_race['Year'] = year
        df_race['Event'] = event
        raceData[event].append(df_race)

core           INFO 	Loading data for Australian Grand Prix - Race [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
_api          DEBUG 	Falling back to livetiming mirror (https://livetiming-mirror.fastf1.dev)
logger        DEBUG 	Traceback for failure in session info data
Traceback (most recent call last):
  File "/Users/abhiattaluri/miniforge3/envs/f1env/lib/python3.10/site-packages/fastf1/logger.py", line 151, in __wrapped
    return func(*args, **kwargs)
  File "/Users/abhiattaluri/miniforge3/envs/f1env/lib/python3.10/site-packages/fastf1/core.py", line 1470, in _load_session_info
    self._session_info = api.session_info(self.api_path,
  File "/Users/abhiattaluri/miniforge3/envs/f1env/lib/python3.10/site-packages/fastf1/req.py", line 479, in _cached_api_request
    data = func(api_path, **func_kwargs)
  File "/Users/abhiattaluri/miniforge3/envs/f1env/lib/python3.10/site-packages/fastf1/_api.py",

In [5]:
print(len(raceData.keys()))
raceData.keys()

38


dict_keys(['Australian Grand Prix', 'Malaysian Grand Prix', 'Bahrain Grand Prix', 'Chinese Grand Prix', 'Spanish Grand Prix', 'Monaco Grand Prix', 'Canadian Grand Prix', 'Austrian Grand Prix', 'British Grand Prix', 'German Grand Prix', 'Hungarian Grand Prix', 'Belgian Grand Prix', 'Italian Grand Prix', 'Singapore Grand Prix', 'Japanese Grand Prix', 'Russian Grand Prix', 'United States Grand Prix', 'Brazilian Grand Prix', 'Abu Dhabi Grand Prix', 'Mexican Grand Prix', 'European Grand Prix', 'Azerbaijan Grand Prix', 'French Grand Prix', 'Styrian Grand Prix', '70th Anniversary Grand Prix', 'Tuscan Grand Prix', 'Eifel Grand Prix', 'Portuguese Grand Prix', 'Emilia Romagna Grand Prix', 'Turkish Grand Prix', 'Sakhir Grand Prix', 'Dutch Grand Prix', 'Mexico City Grand Prix', 'São Paulo Grand Prix', 'Qatar Grand Prix', 'Saudi Arabian Grand Prix', 'Miami Grand Prix', 'Las Vegas Grand Prix'])

In [6]:
for key in raceData:
    print(len(raceData[key]))

9
4
11
7
11
10
9
11
11
4
11
11
11
9
9
8
10
6
11
5
1
7
4
2
1
1
1
2
4
2
1
4
4
4
3
4
3
2


In [19]:
raceData['Abu Dhabi Grand Prix'][4]

Unnamed: 0,DriverNumber,Abbreviation,DriverId,TeamName,TeamId,FirstName,LastName,FullName,Position,ClassifiedPosition,GridPosition,Q1,Q2,Q3,Time,Status,Points,PositionChange,Year,Event
44,44,HAM,hamilton,Mercedes,mercedes,Lewis,Hamilton,Lewis Hamilton,1.0,1,1.0,NaT,NaT,NaT,0 days 01:39:40.382000,Finished,25.0,0.0,2018,Abu Dhabi Grand Prix
5,5,VET,vettel,Ferrari,ferrari,Sebastian,Vettel,Sebastian Vettel,2.0,2,3.0,NaT,NaT,NaT,0 days 00:00:02.581000,Finished,18.0,1.0,2018,Abu Dhabi Grand Prix
33,33,VER,max_verstappen,Red Bull Racing,red_bull,Max,Verstappen,Max Verstappen,3.0,3,6.0,NaT,NaT,NaT,0 days 00:00:12.706000,Finished,15.0,3.0,2018,Abu Dhabi Grand Prix
3,3,RIC,ricciardo,Red Bull Racing,red_bull,Daniel,Ricciardo,Daniel Ricciardo,4.0,4,5.0,NaT,NaT,NaT,0 days 00:00:15.379000,Finished,12.0,1.0,2018,Abu Dhabi Grand Prix
77,77,BOT,bottas,Mercedes,mercedes,Valtteri,Bottas,Valtteri Bottas,5.0,5,2.0,NaT,NaT,NaT,0 days 00:00:47.957000,Finished,10.0,-3.0,2018,Abu Dhabi Grand Prix
55,55,SAI,sainz,Renault,renault,Carlos,Sainz,Carlos Sainz,6.0,6,11.0,NaT,NaT,NaT,0 days 00:01:12.548000,Finished,8.0,5.0,2018,Abu Dhabi Grand Prix
16,16,LEC,leclerc,Sauber,sauber,Charles,Leclerc,Charles Leclerc,7.0,7,8.0,NaT,NaT,NaT,0 days 00:01:30.789000,Finished,6.0,1.0,2018,Abu Dhabi Grand Prix
11,11,PER,perez,Racing Point,force_india,Sergio,Perez,Sergio Perez,8.0,8,14.0,NaT,NaT,NaT,0 days 00:01:31.275000,Finished,4.0,6.0,2018,Abu Dhabi Grand Prix
8,8,GRO,grosjean,Haas F1 Team,haas,Romain,Grosjean,Romain Grosjean,9.0,9,7.0,NaT,NaT,NaT,NaT,+1 Lap,2.0,-2.0,2018,Abu Dhabi Grand Prix
20,20,MAG,kevin_magnussen,Haas F1 Team,haas,Kevin,Magnussen,Kevin Magnussen,10.0,10,13.0,NaT,NaT,NaT,NaT,+1 Lap,1.0,3.0,2018,Abu Dhabi Grand Prix


In [7]:
genCount = 0
fullCount = 0
emptyCount = 0
for key in raceData:
    if len(raceData[key]) == 11:
        genCount += 1
        if raceData[key][10].shape[0] > 0:
            fullCount += 1
            print('full', key)
        else:
            emptyCount += 1
            print('empty', key)

print(genCount)
print(fullCount)
print(emptyCount)

full Bahrain Grand Prix
empty Spanish Grand Prix
empty Austrian Grand Prix
empty British Grand Prix
empty Hungarian Grand Prix
empty Belgian Grand Prix
empty Italian Grand Prix
empty Abu Dhabi Grand Prix
8
1
7


In [12]:
raceData['Miami Grand Prix'][0]

Unnamed: 0,DriverNumber,Abbreviation,DriverId,TeamName,TeamId,FirstName,LastName,FullName,Position,ClassifiedPosition,GridPosition,Q1,Q2,Q3,Time,Status,Points,PositionChange,Year,Event
16,16,LEC,,Ferrari,,Charles,Leclerc,Charles Leclerc,,,,NaT,NaT,NaT,NaT,,,,2022,Miami Grand Prix
55,55,SAI,,Ferrari,,Carlos,Sainz,Carlos Sainz,,,,NaT,NaT,NaT,NaT,,,,2022,Miami Grand Prix
1,1,VER,,Red Bull Racing,,Max,Verstappen,Max Verstappen,,,,NaT,NaT,NaT,NaT,,,,2022,Miami Grand Prix
11,11,PER,,Red Bull Racing,,Sergio,Perez,Sergio Perez,,,,NaT,NaT,NaT,NaT,,,,2022,Miami Grand Prix
77,77,BOT,,Alfa Romeo,,Valtteri,Bottas,Valtteri Bottas,,,,NaT,NaT,NaT,NaT,,,,2022,Miami Grand Prix
44,44,HAM,,Mercedes,,Lewis,Hamilton,Lewis Hamilton,,,,NaT,NaT,NaT,NaT,,,,2022,Miami Grand Prix
10,10,GAS,,AlphaTauri,,Pierre,Gasly,Pierre Gasly,,,,NaT,NaT,NaT,NaT,,,,2022,Miami Grand Prix
4,4,NOR,,McLaren,,Lando,Norris,Lando Norris,,,,NaT,NaT,NaT,NaT,,,,2022,Miami Grand Prix
22,22,TSU,,AlphaTauri,,Yuki,Tsunoda,Yuki Tsunoda,,,,NaT,NaT,NaT,NaT,,,,2022,Miami Grand Prix
18,18,STR,,Aston Martin,,Lance,Stroll,Lance Stroll,,,,NaT,NaT,NaT,NaT,,,,2022,Miami Grand Prix


In [20]:
raceData['Spanish Grand Prix'][10]

Unnamed: 0,DriverNumber,Abbreviation,DriverId,TeamName,TeamId,FirstName,LastName,FullName,Position,ClassifiedPosition,GridPosition,Q1,Q2,Q3,Time,Status,Points,PositionChange,Year,Event


In [22]:
empty = []
for key in raceData:
    for idx in range(len(raceData[key])):
        df = raceData[key][idx]
        if df.shape[0] > 0:
            year = df['Year'].iloc[0]
            df.to_csv(f'data/{key}-{year}.csv', index=False)
        else:
            year = 2014 + idx
            race = f'{key}-{year}'
            empty.append(race)