In [1]:
import sqlite3
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
from sklearn.metrics import r2_score
import statsmodels.stats.api as sms


class DataLoader:
    """
    Class responsible for loading data from an SQLite database.

    Attributes:
        db_path (str): Path to the SQLite database.
        dataframes (dict): Dictionary containing the loaded DataFrames.
    """

    def __init__(self, db_path: str):
        """
        Initialize the DataLoader class with a database path.

        Args:
            db_path (str): Path to the SQLite database.
        """
        self.db_path = db_path
        self.dataframes = {}

    def load_data(self) -> dict:
        """
        Load data from the SQLite database and store it in a dictionary.

        Returns:
            dict: A dictionary whose keys are table names and values are the corresponding DataFrames.
        """
        connection = sqlite3.connect(self.db_path)
        tables = [
            "drivers",
            "fcyphases",
            "laps",
            "qualifyings",
            "races",
            "retirements",
            "starterfields",
        ]
        self.dataframes = {
            table: pd.read_sql_query(f"SELECT * FROM {table}", connection)
            for table in tables
        }
        connection.close()
        return self.dataframes

In [None]:
class DNF():
    
    # Pour chaque driver/team de l'année en cours 
    # Tu prends les observations sur les races de l'année en cours (failures) et l'année en cours et les années train df (accident) -- On obtient un df filtré sur le driver et les races_id correspondants aux années 
    # -- Observations < X : On n'estime pas sur le driver mais sur l'ensemble des drivers ayant assez d'obs -- Valeurs moyennes
    # -- Observations 
    def __init__(self, season,len_train_df, dataframes, driver, team):
        self.season = season
        self.len_train_df = len_train_df
        self.dfs = dataframes
        self.driver = driver
        self.team = team
        
    def calculate_accident_probability(self):
        #Probability is estimated by the mean of a beta distribution
        retirements_df=self.dfs["retirements"]
        races_df = self.dfs['races']
        seasons_to_train = [self.season - x for x in range (1, self.len_train_df +1)]
        accidents_per_train_season_all_driver = retirements_df[retirements_df["season"].isin(seasons_to_train)][["season","accidents"]].reset_index(drop=True)
        accidents_per_train_season_driver = retirements_df[(retirements_df["driver_id"]==self.driver)&(retirements_df["season"].isin(seasons_to_train))][["season","accidents"]].reset_index(drop=True)
        number_of_accidents = accidents_per_train_season["accidents"].sum() # Paramètre z
        number_of_races_train = len(races_df[races_df['season'].isin(seasons_to_train)]) # Paramètre N
        return(accidents_per_train_season[["accidents"]])

    def calculate_failure_probability(self):
        pass
        ''''retirements_df=self.dfs["retirements"]
        # merge 
        season_failures=retirements_df[(retirements_df["driver_id"]==season_drivers)&(retirements_df["season"]==self.season)]
        season_failures=season_failures[["season","failures"]]
        return(season_failures)
        # Depend de la team et de la saison
        # On prend les courses de la team sur la saison actuelle et recupere la frequences de failures''''

In [67]:
db_path = "F1_timingdata_2014_2019.sqlite"
data_loader = DataLoader(db_path=db_path)
dataframes = data_loader.load_data()
retirements_df = dataframes['retirements']
races_df = dataframes['races']
seasons_to_train = [2016 - x for x in range (1, 2 +1)]
x = retirements_df[retirements_df["season"].isin(seasons_to_train)]['accidents'].reset_index(drop=True).fillna(0).sum()
number_of_races_train = len(races_df[races_df['season'].isin(seasons_to_train)])
#retirements_df[retirements_df["season"].isin(seasons_to_train)]

df_races_groupby = races_df.groupby(['season'], as_index=False).count()
df_races_groupby
test = df_races_groupby[df_races_groupby['season']==2014]['id'].iloc[0]
df_filtred = retirements_df[retirements_df["season"].isin(seasons_to_train)][['season', 'driver_id', 'accidents']].dropna()
df_group_by = df_filtred.groupby(['driver_id', 'season'], as_index=False).sum()
for season in seasons_to_train :
    df_group_by[f"{season}_mean_driver"] = df_group_by['accidents'] / df_races_groupby[df_races_groupby['season']==season]['id'].iloc[0]
df_group_by[df_group_by['season']==2014]

Unnamed: 0,driver_id,season,accidents,2015_mean_driver,2014_mean_driver
0,1,2014,0.0,0.0,0.0
2,2,2014,0.0,0.0,0.0
4,3,2014,0.0,0.0,0.0
6,4,2014,0.0,0.0,0.0
8,5,2014,0.0,0.0,0.0
10,6,2014,0.0,0.0,0.0
11,7,2014,1.0,0.052632,0.052632
13,8,2014,0.0,0.0,0.0
15,9,2014,3.0,0.157895,0.157895
17,10,2014,0.0,0.0,0.0


In [None]:
def main():
    db_path = "F1_timingdata_2014_2019.sqlite"
    data_loader = DataLoader(db_path=db_path)
    dataframes = data_loader.load_data()

    dnf = DNF(season = 2016,
              len_train_df = 2, 
              dataframes = dataframes, 
              driver_id = 1, 
              team = "Mercedes"
    )

    dnf.calculate_accident_probability()
    print("Average Min Pit Stop Duration:", pit_stop.avg_min_pit_stop_duration)

    calculated_duration = pit_stop.calculate_pit_stop_duration()
    print("Calculated Pit Stop Duration:", calculated_duration)


if __name__ == "__main__":
    try:
        main()
    except Exception as e:
        print(f"[ERROR] An exception occurred: {e}")