# Setup

In [1]:
import os
import pandas as pd
import numpy as np

from matplotlib import pyplot as plt

In [2]:
# FastF1 setup

import fastf1
import fastf1.plotting

fastf1.Cache.enable_cache('.fastf1/cache')
fastf1.plotting.setup_mpl()

In [3]:
# Kaggle dataset setup

DATASET_PATH = '.kaggle/dataset'

kaggle_has_key = {
    'circuits': True,
    'constructor_results': True,
    'constructor_standings': True,
    'constructors': True,
    'driver_standings': True,
    'drivers': True,
    'lap_times': False,
    'pit_stops': False,
    'qualifying': True,
    'races': True,
    'results': False,
    'seasons': False,
    'sprint_results': True,
    'status': True
}



kaggle_tables = os.listdir(DATASET_PATH)
kaggle_data = {}
kaggle_dataframes = {}

# Checking if new tables have been added
diff = { os.path.splitext(t)[0] for t in kaggle_tables } - set(kaggle_has_key.keys())
if len(diff) != 0:
  raise ValueError(
      f"New table(s): {diff}"
  )

print("Parsing tables...")
for t in kaggle_tables:
  t_path = os.path.join(DATASET_PATH, t)
  t_name, _ = os.path.splitext(t)
  print(f"\t[{t_name}]")
  
  t_df_cols = pd.read_csv(t_path, nrows=1)
  t_dataframe = pd.read_csv(
    t_path,
    header    = 0,
    index_col = 0 if kaggle_has_key[t_name] else None,
    usecols   = list(range(len(t_df_cols.columns)))
  )
  
  t_vals = t_dataframe.to_numpy().tolist()
  t_cols = t_dataframe.columns.to_numpy().tolist()
  n = len(t_vals)
  m = len(t_cols)
  print(f"\t\t{m} columns")
  print(f"\t\t{n} rows")

  t_indexes = t_dataframe.index.to_numpy().tolist()
  if kaggle_has_key[t_name]:
    n = len(t_indexes)
  
  t_data = {}
  for i in range(n):
    if kaggle_has_key[t_name]:
      id = t_indexes[i]
    else:
      id = i + 1
    t_data[id] = {}
    for j in range(m):
      t_data[id][t_cols[j]] = t_vals[i][j]
  
  kaggle_data[t_name] = t_data
  kaggle_dataframes[t_name] = t_dataframe


Parsing tables...
	[circuits]
		8 columns
		76 rows
	[constructors]
		4 columns
		211 rows
	[constructor_results]
		4 columns
		12170 rows
	[constructor_standings]
		6 columns
		12931 rows
	[drivers]
		8 columns
		855 rows
	[driver_standings]
		6 columns
		33882 rows
	[lap_times]
		6 columns
		538121 rows
	[pit_stops]
		7 columns
		9634 rows
	[qualifying]
		8 columns
		9575 rows
	[races]
		17 columns
		1079 rows
	[results]
		18 columns
		25840 rows
	[seasons]
		2 columns
		73 rows
	[sprint_results]
		15 columns
		120 rows
	[status]
		1 columns
		139 rows


# Tests

In [4]:
# Find start and end of dataset

round_max = kaggle_dataframes['races']['round'].max()
years = kaggle_dataframes['races']['year'].unique()
year_min, year_max = years.min(), years.max()

In [13]:
fastf1.get_event(2022, 22)['F1ApiSupport']

True

In [15]:
for year in range(year_min, year_max+1):
    try:
        event = fastf1.get_event(year, 1)
        if event['F1ApiSupport']:
            fast_year_start = year
            break
    except:
        continue

for year in range(year_max, fast_year_start-1, -1):
    f = False
    for round in range(round_max, 0, -1):
        try:
            event = fastf1.get_event(year, round)
            if event['F1ApiSupport']:
                fast_year_end = year
                fast_round_end = round
                f = True
                break
        except:
            continue
    if f:
        break

print(fast_year_start, 1)
print(fast_year_end, fast_round_end)

2018 1
2022 22
