In [49]:
import os
import logging
import fastf1
import pandas as pd

from time import sleep
from pathlib import Path


# Configuring Logging
logging.basicConfig(
    encoding='utf-8',
    level=logging.DEBUG,
    datefmt='%m/%d/%Y %I:%M:%S %p'
)

CACHE_PATH = Path('/workspaces/formula-one-analytics/data/.cache/')

# fastF1 data cache config
if not CACHE_PATH.exists():
    logging.info('CACHE_PATH does not exist creating...')
    CACHE_PATH.mkdir(parents=True, exist_ok=True)

fastf1.Cache.enable_cache('/workspaces/formula-one-analytics/data/.cache/')


INFO:root:CACHE_PATH does not exist creating...
DEBUG:requests_cache.backends:Initializing backend: sqlite /workspaces/formula-one-analytics/data/.cache/fastf1_http_cache
DEBUG:requests_cache.backends.base:Initialized SQLiteDict with serializer: SerializerPipeline(name=pickle, n_stages=2)
DEBUG:requests_cache.backends.sqlite:Opening connection to /workspaces/formula-one-analytics/data/.cache/fastf1_http_cache.sqlite:responses
DEBUG:requests_cache.backends.base:Initialized SQLiteDict with serializer: None
DEBUG:requests_cache.backends.sqlite:Opening connection to /workspaces/formula-one-analytics/data/.cache/fastf1_http_cache.sqlite:redirects


In [48]:
fastf1.Cache.enable_cache('/workspaces/formula-one-analytics/data/.cache/')

NotADirectoryError: Cache directory does not exist! Please check for typos or create it first.

In [25]:
   
# Defining function to event calendar and save to parquet for later
def get_event_calendar(start_year: int, end_year: int, iteration_sleep: int=10) -> list:
    '''
    Sources a list of dataframes with Formula 1 Event Calendar schedule information fro
    the fastF1 API package and API.

    :param start_year: (int) Beginning year of range to pull event calendar schedules for
    :param end_year: (int) End year of range to pull event calendar schedules for
    :iteration_sleep: (int, default=10) Adjustable sleep interval to keep fastF1 api from
    blocking requests for data. fastF1 has protection built in, this allows for further 
    request safety.
    '''
    totalEventList = []
    for idx, year in enumerate(list(range(start_year, end_year+1))):
        totalEventList.append(fastf1.get_event_schedule(year))
        logging.info(f'Requesting the {year=} event schedule')
        sleep(iteration_sleep) # to throttle requests beyond built in fastF1 throttling

    return totalEventList


def concat_event_calendar(event_list: list) -> pd.DataFrame:
    '''
    Concats data from list of dataframes into a single dataframe

    :param event_list: (list) list of dataframes to concat into single dataframe
    :return: pandas Dataframe

    '''

    df = pd.concat(event_list)
    return df


def create_date_part_col(df: pd.DataFrame, date_column: str, date_part:str) -> pd.DataFrame:
    '''
    Creates a new column in supplied pd.DataFrame that is the specified DatetimeIndex part 
    of the given date_column.

    :param df: (pd.DataFrame) pandas DataFrame containing a date or datetime column
    :param date_column: (str) The name of the column with dates to extract the date_part from
    :param date_part: (str) The part of the date to extract ('year', 'month','day')

    :return: pandas DataFrame with the extracted date part in a new column.

    ::Example::
    -----------

    data = [{'eventDate': '2000-03-12', 'event': 'Woodstock'}, 
            {'eventDate': '2003-08-12', 'event': 'EdgeFest'}, 
            {'eventDate': '2009-03-12', 'event': 'Warped Tour'}, 
            {'eventDate': '2020-01-26', 'event': 'Electric Forest'}
       ]
    
    df = pd.DataFrame(data)
    df['eventDate'] = pd.to_datetime(df['eventDate'])

    |  eventDate  |    event           |
    | 2000-03-12  | 'Woodstock'        |
    | 2003-08-12  | 'EdgeFest'         |
    | 2009-03-12  | 'Warped Tour'      |
    | 2020-01-26  | 'Electric Forest'  |

    df = create_date_part_col(df=df, date_column='eventDate', date_part='year')

    |  eventDate  |    event           | eventDate_year |
    | 2000-03-12  | 'Woodstock'        |      2000      |
    | 2003-08-12  | 'EdgeFest'         |      2003      |
    | 2009-03-12  | 'Warped Tour'      |      2009      |
    | 2020-01-26  | 'Electric Forest'  |      2020      |


    '''

    valid_date_parts = {'day', 'month', 'year'}
    if date_part not in valid_date_parts:
        raise ValueError(f'Given date_part must be a valid date part of {valid_date_parts}')

    df['_'.join([date_column, date_part.lower()])] = (
        getattr(pd.DatetimeIndex(df[date_column]), date_part)
        )
    
    return df


In [None]:
# Pulling down Formula Event Calendars from 2000 to 2023 seasons.
my_event_list = get_event_calendar(2000, 2023)
eventDF = concat_event_calendar(my_event_list)

# Adding new date part columns for parquet partitioning optimization
eventDF = create_date_part_col(eventDF, 'EventDate', 'year')
eventDF = create_date_part_col(eventDF, 'EventDate', 'month')

In [39]:
# Saving data to parquet file for easier access later.
eventDF.to_parquet('/workspaces/formula-one-analytics/data/eventCalendar.parquet',
                   partition_cols=['EventDate_year', 'EventDate_month'])

In [2]:
my_sesison = fastf1.get_session(2023, 'Las Vegas', 'Q')
tst_session.load()


DEFAULT CACHE ENABLED!
	Cache directory: /root/.cache/fastf1.
	Size: 24.0 KB


In [40]:
eventDF

Unnamed: 0,RoundNumber,Country,Location,OfficialEventName,EventDate,EventName,EventFormat,Session1,Session1Date,Session1DateUtc,...,Session3DateUtc,Session4,Session4Date,Session4DateUtc,Session5,Session5Date,Session5DateUtc,F1ApiSupport,EventDate_year,EventDate_month
0,1,Australia,Melbourne,,2000-03-12,Australian Grand Prix,conventional,Practice 1,,2000-03-10 00:00:00,...,2000-03-11 00:00:00,Qualifying,,2000-03-11 00:00:00,Race,,2000-03-12 00:00:00,False,2000,3
1,2,Brazil,São Paulo,,2000-03-26,Brazilian Grand Prix,conventional,Practice 1,,2000-03-24 00:00:00,...,2000-03-25 00:00:00,Qualifying,,2000-03-25 00:00:00,Race,,2000-03-26 00:00:00,False,2000,3
2,3,Italy,Imola,,2000-04-09,San Marino Grand Prix,conventional,Practice 1,,2000-04-07 00:00:00,...,2000-04-08 00:00:00,Qualifying,,2000-04-08 00:00:00,Race,,2000-04-09 00:00:00,False,2000,4
3,4,UK,Silverstone,,2000-04-23,British Grand Prix,conventional,Practice 1,,2000-04-21 00:00:00,...,2000-04-22 00:00:00,Qualifying,,2000-04-22 00:00:00,Race,,2000-04-23 00:00:00,False,2000,4
4,5,Spain,Montmeló,,2000-05-07,Spanish Grand Prix,conventional,Practice 1,,2000-05-05 00:00:00,...,2000-05-06 00:00:00,Qualifying,,2000-05-06 00:00:00,Race,,2000-05-07 00:00:00,False,2000,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18,18,United States,Austin,FORMULA 1 LENOVO UNITED STATES GRAND PRIX 2023,2023-10-22,United States Grand Prix,sprint_shootout,Practice 1,2023-10-20 12:30:00-05:00,2023-10-20 17:30:00,...,2023-10-21 17:30:00,Sprint,2023-10-21 17:00:00-05:00,2023-10-21 22:00:00,Race,2023-10-22 14:00:00-05:00,2023-10-22 19:00:00,True,2023,10
19,19,Mexico,Mexico City,FORMULA 1 GRAN PREMIO DE LA CIUDAD DE MÉXICO 2023,2023-10-29,Mexico City Grand Prix,conventional,Practice 1,2023-10-27 12:30:00-06:00,2023-10-27 18:30:00,...,2023-10-28 17:30:00,Qualifying,2023-10-28 15:00:00-06:00,2023-10-28 21:00:00,Race,2023-10-29 14:00:00-06:00,2023-10-29 20:00:00,True,2023,10
20,20,Brazil,São Paulo,FORMULA 1 ROLEX GRANDE PRÊMIO DE SÃO PAULO 2023,2023-11-05,São Paulo Grand Prix,sprint_shootout,Practice 1,2023-11-03 11:30:00-03:00,2023-11-03 14:30:00,...,2023-11-04 14:00:00,Sprint,2023-11-04 15:30:00-03:00,2023-11-04 18:30:00,Race,2023-11-05 14:00:00-03:00,2023-11-05 17:00:00,True,2023,11
21,21,United States,Las Vegas,FORMULA 1 HEINEKEN SILVER LAS VEGAS GRAND PRIX...,2023-11-18,Las Vegas Grand Prix,conventional,Practice 1,2023-11-16 20:30:00-08:00,2023-11-17 04:30:00,...,2023-11-18 04:30:00,Qualifying,2023-11-18 00:00:00-08:00,2023-11-18 08:00:00,Race,2023-11-18 22:00:00-08:00,2023-11-19 06:00:00,True,2023,11
