In [1]:
import sqlite3
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
from sklearn.metrics import r2_score
import statsmodels.stats.api as sms


class DataLoader:
    """
    Class responsible for loading data from an SQLite database.

    Attributes:
        db_path (str): Path to the SQLite database.
        dataframes (dict): Dictionary containing the loaded DataFrames.
    """

    def __init__(self, db_path: str):
        """
        Initialize the DataLoader class with a database path.

        Args:
            db_path (str): Path to the SQLite database.
        """
        self.db_path = db_path
        self.dataframes = {}

    def load_data(self) -> dict:
        """
        Load data from the SQLite database and store it in a dictionary.

        Returns:
            dict: A dictionary whose keys are table names and values are the corresponding DataFrames.
        """
        connection = sqlite3.connect(self.db_path)
        tables = [
            "drivers",
            "fcyphases",
            "laps",
            "qualifyings",
            "races",
            "retirements",
            "starterfields",
        ]
        self.dataframes = {
            table: pd.read_sql_query(f"SELECT * FROM {table}", connection)
            for table in tables
        }
        connection.close()
        return self.dataframes


class F1Team:
    """
    Class for data preparation and regression analysis for a given driver
    for a specific season and race.

    Attributes:
        season (int): Season to analyze.
        driver_id (int): ID of the driver.
        race_id (int): ID of the target race for testing.
        dfs (dict): Dictionary of DataFrames.
        best_qualif_times (pd.DataFrame): DataFrame with best qualifying times per race.
        train_data (pd.DataFrame): Training dataset.
        test_data (pd.DataFrame): Test dataset.
        model: Trained regression model.
        predictions (pd.Series): Predictions generated by the model on the test data.
    """

    def __init__(self, team : str,season: int, dataframes: dict):
        """
        Initialize the F1Model class.

        Args:
            season (int): Season to analyze.
            driver_id (int): ID of the driver to analyze.
            race_id (int): ID of the race used for testing.
            dataframes (dict): Dictionary of DataFrames containing the data.
        """
        self.dfs = dataframes
        self.team = team
        self.season=season
        self.drivers_if_of_the_season = self._load_season_team_drivers()
        self.dfs = dataframes



    
    def _load_season_team_drivers(self):
        race_ids_season = self.dfs["races"][self.dfs["races"]["season"] == self.season]["id"]

        staterfields_df=self.dfs["staterfields"]
        staterfields_df[(staterfields_df["race_id"].isin(race_ids_season))&(staterfields_df)]

    def get_pit_stop_avg(self):
        pass

def main():
    
    # Load data
    db_path = "F1_timingdata_2014_2019.sqlite"
    data_loader = DataLoader(db_path=db_path)
    dataframes = data_loader.load_data()
    

    


if __name__ == "__main__":
    main()


Unnamed: 0,id,date,season,location,availablecompounds,comment,nolaps,nolapsplanned,tracklength
0,1,2014-03-16,2014,Melbourne,"A2,A3,I,W",,57,58,5303.0
1,2,2014-03-30,2014,KualaLumpur,"A1,A2,I,W",,56,56,5543.0
2,3,2014-04-06,2014,Sakhir,"A2,A3,I,W","Crash GUT in lap 41, SC somewhen in the end of...",57,57,5412.0
3,4,2014-04-20,2014,Shanghai,"A2,A3,I,W",,54,56,5451.0
4,5,2014-05-11,2014,Catalunya,"A1,A2,I,W",,66,66,4655.0
...,...,...,...,...,...,...,...,...,...
116,117,2019-10-13,2019,Suzuka,"A2,A3,A4,I,W",,52,53,5807.0
117,118,2019-10-27,2019,MexicoCity,"A3,A4,A6,I,W",,71,71,4304.0
118,119,2019-11-03,2019,Austin,"A3,A4,A6,I,W",,56,56,5513.0
119,120,2019-11-17,2019,SaoPaulo,"A2,A3,A4,I,W",,71,71,4309.0


In [88]:
from scipy.stats import fisk
import pandas as pd


class PitStop():
    def __init__(self, team, race_id, len_train_df, season, dataframes):
        self.team = team
        self.race_id=race_id
        self.len_train_df = len_train_df
        self.season = season
        self.dfs = dataframes
        

    def calculate_best_pit_stop_duration(self):
        df_laps=self.dfs["laps"]
        df_races = self.dfs['races']
        location = df_races[df_races['id']==self.race_id]['location'].iloc[0]
        seasons_to_train = [self.season - x for x in range (1, self.len_train_df +1)]
        races_to_train = list(df_races[(df_races['location']==location) & (df_races['season'].isin(seasons_to_train))]['id'])

        min_pit_stop_duration_per_race = df_laps[df_laps['race_id'].isin(races_to_train)].dropna(subset=['pitstopduration']).groupby(['race_id'])[['pitstopduration']].quantile(q=0.025)
        avg_min_pit_stop_duration = min_pit_stop_duration_per_race['pitstopduration'].mean()
        self.avg_min_pit_stop_duration = avg_min_pit_stop_duration

    def calibrate_pit_stop_variability_law(self):
        df_laps=self.dfs["laps"]
        df_staterfields = self.dfs["starterfields"]
        df_races = self.dfs["races"]

        df_laps_with_season = df_laps.merge(
            df_races[["id", "season"]], 
            left_on="race_id",
            right_on="id",
            how="left"
        ).drop(columns=["id"])  # We can drop the duplicated "id" column
        
        df_merged = df_laps_with_season.merge(
            df_staterfields[["race_id", "driver_id", "team"]],
            on=["race_id", "driver_id"],
            how="left"
        )
        
        df = df_merged[
            (df_merged['team'] == self.team) & 
            (df_merged['season'] == self.season) &
            (df_merged['race_id'] < self.race_id) &
            (df_merged['pitstopduration'].notna())
        ].copy()
        
        df["pitstop_diff"]=df['pitstopduration']- self.avg_min_pit_stop_duration
        shape, loc, scale=fisk.fit(df["pitstop_diff"])
        return [shape, loc, scale]

    def calculate_pit_stop_duration(self):
        shape, loc, scale = self.calibrate_pit_stop_variability_law()
        variability = fisk.rvs(shape, loc=loc, scale=scale, size=1, random_state=None)[0]
        pit_stop = self.avg_min_pit_stop_duration + variability
        return pit_stop
        


In [89]:
def main():
    db_path = "F1_timingdata_2014_2019.sqlite"
    data_loader = DataLoader(db_path=db_path)
    dataframes = data_loader.load_data()

    pit_stop = PitStop(
        team="Mercedes", 
        race_id=57, 
        len_train_df=2, 
        season=2016, 
        dataframes=dataframes
    )

    pit_stop.calculate_best_pit_stop_duration()
    print("Average Min Pit Stop Duration:", pit_stop.avg_min_pit_stop_duration)

    calculated_duration = pit_stop.calculate_pit_stop_duration()
    print("Calculated Pit Stop Duration:", calculated_duration)


if __name__ == "__main__":
    try:
        main()
    except Exception as e:
        print(f"[ERROR] An exception occurred: {e}")


Average Min Pit Stop Duration: 22.291975
Calculated Pit Stop Duration: 19.732270109900657


In [90]:
class DNF():
    def __init__(self, season,len_train_df, dataframes, driver, team):
        self.season = season
        self.len_train_df = len_train_df
        self.dataframes = dataframes
        self.driver = driver
        self.team = team
        
    def calculate_accident_probability(self):
        retirements_df=self.dfs["retirements"]
        seasons_to_train = [self.season - x for x in range (1, self.len_train_df +1)]
        accidents_per_train_season=retirements_df[(retirements_df["driver_id"]==self.driver)&(retirements_df["season"].isin(seasons_to_train))]
        accidents_per_train_season=accidents_per_train_season[["season","accidents"]]

        
        
        # Depend du driver
        return(accidents_per_train_season)

    def calculate_failure_probability(self):
        retirements_df=self.dfs["retirements"]
        # merge 
        # df et dfs
        team_drivers_id=
        season_failures=retirements_df[(retirements_df["driver_id"]==self.driver)&(retirements_df["season"]==self.season)]
        season_failures=season_failures[["season","failures"]]
        return(season_failures)
        # Depend de la team et de la saison
        # On prend les courses de la team sur la saison actuelle et recupere la frequences de failures



In [None]:
Dnf_Test=DNF(race_id=12, 
        len_train_df=2, 
        season=2016, 
        dataframes=dataframes)

In [95]:
df=dataframes["starterfields"]
df

Unnamed: 0,race_id,driver_id,team,teamcolor,enginemanufacturer,gridposition,status,resultposition,completedlaps,speedtrap
0,1,1,Mercedes,#00D2BE,Mercedes,1,DNF,19,2,252.8
1,1,2,RedBull,#1E41FF,Renault,2,DQ,22,57,292.7
2,1,3,Mercedes,#00D2BE,Mercedes,3,F,1,57,299.1
3,1,4,McLaren,#FF8700,Mercedes,4,F,2,57,316.9
4,1,5,Ferrari,#DC0000,Ferrari,5,F,4,57,304.5
...,...,...,...,...,...,...,...,...,...,...
2474,121,35,AlfaRomeo,#9B0000,Ferrari,16,F,16,54,334.9
2475,121,11,AlfaRomeo,#9B0000,Ferrari,17,F,13,54,329.6
2476,121,44,Williams,#192c4e,Mercedes,18,F,17,54,329.3
2477,121,45,Williams,#192c4e,Mercedes,19,F,19,53,317.8


In [80]:
retirements_df=dataframes["retirements"]
retirements_df[(retirements_df["driver_id"]==44)]["accidents"]

204    NaN
205    NaN
206    NaN
207    NaN
208    NaN
209    2.0
Name: accidents, dtype: float64