In [20]:
from time import sleep
import logging
import fastf1
import pandas as pd

logging.basicConfig(
    encoding='utf-8',
    level=logging.DEBUG,
    datefmt='%m/%d/%Y %I:%M:%S %p'
)

In [25]:
   
# Defining function to event calendar and save to parquet for later
def get_event_calendar(start_year: int, end_year: int, iteration_sleep: int=10) -> list:
    '''
    Sources a list of dataframes with Formula 1 Event Calendar schedule information fro
    the fastF1 API package and API.

    :param start_year: (int) Beginning year of range to pull event calendar schedules for
    :param end_year: (int) End year of range to pull event calendar schedules for
    :iteration_sleep: (int, default=10) Adjustable sleep interval to keep fastF1 api from
    blocking requests for data. fastF1 has protection built in, this allows for further 
    request safety.
    '''
    totalEventList = []
    for idx, year in enumerate(list(range(start_year, end_year+1))):
        totalEventList.append(fastf1.get_event_schedule(year))
        logging.info(f'Requesting the {year=} event schedule')
        sleep(iteration_sleep) # to throttle requests beyond built in fastF1 throttling

    return totalEventList


def concat_event_calendar(event_list: list) -> pd.DataFrame:
    '''
    Concats data from list of dataframes into a single dataframe

    :param event_list: (list) list of dataframes to concat into single dataframe
    :return: pandas Dataframe

    '''

    df = pd.concat(event_list)
    return df


def create_date_part_col(df: pd.DataFrame, date_column: str, date_part:str) -> pd.DataFrame:
    '''
    Creates a new column in supplied pd.DataFrame that is the specified DatetimeIndex part 
    of the given date_column.

    :param df: (pd.DataFrame) pandas DataFrame containing a date or datetime column
    :param date_column: (str) The name of the column with dates to extract the date_part from
    :param date_part: (str) The part of the date to extract ('year', 'month','day')

    :return: pandas DataFrame with the extracted date part in a new column.

    ::Example::
    -----------

    data = [{'eventDate': '2000-03-12', 'event': 'Woodstock'}, 
            {'eventDate': '2003-08-12', 'event': 'EdgeFest'}, 
            {'eventDate': '2009-03-12', 'event': 'Warped Tour'}, 
            {'eventDate': '2020-01-26', 'event': 'Electric Forest'}
       ]
    
    df = pd.DataFrame(data)
    df['eventDate'] = pd.to_datetime(df['eventDate'])

    |  eventDate  |    event           |
    | 2000-03-12  | 'Woodstock'        |
    | 2003-08-12  | 'EdgeFest'         |
    | 2009-03-12  | 'Warped Tour'      |
    | 2020-01-26  | 'Electric Forest'  |

    df = create_date_part_col(df=df, date_column='eventDate', date_part='year')

    |  eventDate  |    event           | eventDate_year |
    | 2000-03-12  | 'Woodstock'        |      2000      |
    | 2003-08-12  | 'EdgeFest'         |      2003      |
    | 2009-03-12  | 'Warped Tour'      |      2009      |
    | 2020-01-26  | 'Electric Forest'  |      2020      |


    '''

    valid_date_parts = {'day', 'month', 'year'}
    if date_part not in valid_date_parts:
        raise ValueError(f'Given date_part must be a valid date part of {valid_date_parts}')

    df['_'.join([date_column, date_part.lower()])] = (
        getattr(pd.DatetimeIndex(df[date_column]), date_part)
        )
    
    return df


In [2]:
tst_session = fastf1.get_session(2023, 'Las Vegas', 'Q')
tst_session.load()


DEFAULT CACHE ENABLED!
	Cache directory: /root/.cache/fastf1.
	Size: 24.0 KB


In [22]:
my_event_list = get_event_calendar(2000, 2023)

DEBUG:requests_cache.backends:Initializing backend: sqlite /root/.cache/fastf1/fastf1_http_cache
DEBUG:requests_cache.backends.base:Initialized SQLiteDict with serializer: SerializerPipeline(name=pickle, n_stages=2)
DEBUG:requests_cache.backends.sqlite:Opening connection to /root/.cache/fastf1/fastf1_http_cache.sqlite:responses
DEBUG:requests_cache.backends.base:Initialized SQLiteDict with serializer: None
DEBUG:requests_cache.backends.sqlite:Opening connection to /root/.cache/fastf1/fastf1_http_cache.sqlite:redirects

DEFAULT CACHE ENABLED!
	Cache directory: /root/.cache/fastf1.
	Size: 47.51 MB

DEFAULT CACHE ENABLED!
	Cache directory: /root/.cache/fastf1.
	Size: 47.51 MB
DEBUG:requests_cache.policy.actions:Cache directives from request headers: CacheDirectives()
DEBUG:requests_cache.policy.actions:Pre-read cache checks: Passed
DEBUG:requests_cache.policy.actions:Post-read cache actions: CacheActions(expire_after=datetime.timedelta(seconds=43200), resend_request=True)
DEBUG:requests_c

In [23]:
eventDF = concat_event_calendar(my_event_list)

In [29]:
eventDF = create_date_part_col(eventDF, 'EventDate', 'year')
eventDF = create_date_part_col(eventDF, 'EventDate', 'month')

In [30]:
eventDF.head()

Unnamed: 0,RoundNumber,Country,Location,OfficialEventName,EventDate,EventName,EventFormat,Session1,Session1Date,Session1DateUtc,...,Session3DateUtc,Session4,Session4Date,Session4DateUtc,Session5,Session5Date,Session5DateUtc,F1ApiSupport,EventDate_year,EventDate_month
0,1,Australia,Melbourne,,2000-03-12,Australian Grand Prix,conventional,Practice 1,,2000-03-10,...,2000-03-11,Qualifying,,2000-03-11,Race,,2000-03-12,False,2000,3
1,2,Brazil,São Paulo,,2000-03-26,Brazilian Grand Prix,conventional,Practice 1,,2000-03-24,...,2000-03-25,Qualifying,,2000-03-25,Race,,2000-03-26,False,2000,3
2,3,Italy,Imola,,2000-04-09,San Marino Grand Prix,conventional,Practice 1,,2000-04-07,...,2000-04-08,Qualifying,,2000-04-08,Race,,2000-04-09,False,2000,4
3,4,UK,Silverstone,,2000-04-23,British Grand Prix,conventional,Practice 1,,2000-04-21,...,2000-04-22,Qualifying,,2000-04-22,Race,,2000-04-23,False,2000,4
4,5,Spain,Montmeló,,2000-05-07,Spanish Grand Prix,conventional,Practice 1,,2000-05-05,...,2000-05-06,Qualifying,,2000-05-06,Race,,2000-05-07,False,2000,5


In [47]:
df = pd.concat(eventList)
df['eventYear'] = pd.DatetimeIndex(df['EventDate']).year
df['eventMonth'] = pd.DatetimeIndex(df['EventDate']).month
df.to_parquet('/workspaces/formula-one-analytics/data/eventCalendar.parquet', partition_cols=['eventYear'])


In [35]:
data = [{'eventDate': '2000-03-12', 'make': "toyota", 'model':"corolla"}, 
        {'eventDate': '2000-03-12', 'make': "honda", 'model':"civic"}, 
        {'eventDate': '2000-03-12', 'make': "hyndai", 'model':"nissan"}, 
        {'eventDate': '2000-03-12', 'make': "nissan" ,'model':"sentra"}
       ]
# pass column names in the columns parameter 
tst = pd.DataFrame(data)
tst['eventDate'] = pd.to_datetime(tst['eventDate'])

tst


Unnamed: 0,eventDate,make,model
0,2000-03-12,toyota,corolla
1,2000-03-12,honda,civic
2,2000-03-12,hyndai,nissan
3,2000-03-12,nissan,sentra


In [36]:
tst.dtypes


eventDate    object
make         object
model        object
dtype: object