In [7]:
import sqlite3
import numpy as np
import pandas as pd
from scipy.stats import fisk

In [33]:
class PitStop():
    def __init__(self, team, gp_location, season, dataframes):
        self.team = team.name
        self.gp_location = gp_location
        self.len_train_df = 2
        
        self.season = season
        self.dfs = dataframes
        
        # Check if we still need it, because we will get it through Run class
        # Retrieve the race_id for the given season and location
        df_races = self.dfs["races"]
        race_row = df_races[
            (df_races["season"] == self.season) & (df_races["location"] == self.gp_location)
        ]

        
        if race_row.empty:
            raise ValueError(f"No race found for location '{self.gp_location}' in season {self.season}.")
        self.race_id = race_row["id"].iloc[0]

    def calculate_best_pit_stop_duration(self):
        df_laps=self.dfs["laps"]
        df_races = self.dfs['races']
        location = df_races[df_races['id']==self.race_id]['location'].iloc[0]
        seasons_to_train = [self.season - x for x in range (1, self.len_train_df +1)]
        races_to_train = list(df_races[(df_races['location']==location) & (df_races['season'].isin(seasons_to_train))]['id'])

        min_pit_stop_duration_per_race = df_laps[df_laps['race_id'].isin(races_to_train)].dropna(subset=['pitstopduration']).groupby(['race_id'])[['pitstopduration']].quantile(q=0.025)
        avg_min_pit_stop_duration = min_pit_stop_duration_per_race['pitstopduration'].mean()
        self.avg_min_pit_stop_duration = avg_min_pit_stop_duration

    def calibrate_pit_stop_variability_law(self):
        df_laps=self.dfs["laps"]
        df_staterfields = self.dfs["starterfields"]
        df_races = self.dfs["races"]

        df_laps_with_season = df_laps.merge(
            df_races[["id", "season"]], 
            left_on="race_id",
            right_on="id",
            how="left"
        ).drop(columns=["id"])  # We can drop the duplicated "id" column
        
        df_merged = df_laps_with_season.merge(
            df_staterfields[["race_id", "driver_id", "team"]],
            on=["race_id", "driver_id"],
            how="left"
        )
        
        df = df_merged[
            (df_merged['team'] == self.team) & 
            (df_merged['season'] == self.season) &
            (df_merged['race_id'] < self.race_id) &
            (df_merged['pitstopduration'].notna()) & 
            (df_merged['pitstopduration'] < 700)
        ].copy()
        
        df[["pitstop_diff"]]=df[['pitstopduration']]- self.avg_min_pit_stop_duration
        shape, loc, scale=fisk.fit(df["pitstop_diff"])
        return [shape, loc, scale]

    def calculate_pit_stop_duration(self):
        shape, loc, scale = self.calibrate_pit_stop_variability_law()
        variability = fisk.rvs(shape, loc=loc, scale=scale, size=1, random_state=None)[0]
        pit_stop = self.avg_min_pit_stop_duration + variability
        return pit_stop
        


In [200]:


    # def run(self):

        
    #     # # Creer un objet Race (attribut = Recap_Tour / Positions -> liste / Drivers_Alive  -> dico / Safety_Car_Laps -> liste ... )
    #     # # Methodes (compute_lap_time(Driver, lap_number) -> cette fonction check si y'a une SC et si le driver est alive et lance le modele / Change les attributs de positions)
    #     self.get_parameters_for_simulation()
    #     # for i in range(1,no_laps+1) : 
    #     #     for driver in list_all_driver :  (list_all_driver est une liste d'objets driver)
    #     #         driver.change_status(i)
    #     #         # Normal Lap
    #     #         time = self.compute_lap_time(driver, i)
    #     #         driver.cumul_time += time
    #     #         # Pit stop :
    #     #         driver.cumul_time += self.pit_stop(driver)
    #                 # driver.update_info()

    #         # # Overtaking
    #         # # Aggrémenter le recap des tours
    #     print("Winner  :  ")
    #     return(self.laps_recap)  

  


    # def pit_stop(self,driver):
    #     #     Dico ={1: { "compound" : , "pitstop_interval" :, "pit_stop_lap" : aléatoire dans pitstop_interval}} # Le dico est un input et sera mis dans driver.pit_stops_informations
    #     # - Check si i == pitstop_lap : # Récupérer pit_stop_lap dans le dico
            
    #     #     driver.age_tire = 0
    #     #     driver.compound = # dans le dico de strategy avec la clé [driver.next_pitstop] et la deuxième clé [compound] 
    #     #     driver.cumul_time += driver.team.calculate_pit_stop_time() # Driver a un attribut qui est un objet Team de la classe Team 
    #     #     driver.next_pitstop += 1
    #     # - check si i isin race.Safety_car_laps and i isin pitstop_interval : 
    #     #     pitstop_lap = i # Changer la valeur dans le dico
    #     #     driver.age_tire = 0
    #     #     driver.compound = # dans le dico de strategy avec la clé [driver.next_pitstop] et la deuxième clé [compound] 
    #     #     driver.cumul_time += driver.team.calculate_pit_stop_time() # Driver a un attribut qui est un objet Team de la classe Team 
    #     #     driver.next_pitstop += 1 
    

    def pit_stop(self,driver):
        """

        Returns:
            float: Duration of the pit stop in seconds.
        """
        
        pit_stop = PitStop(
        team=driver.team.name, 
        gp_location=self.gp_location,
        season=self.gp_location
        dataframes=self.dataframes
    )

        pit_stop.calculate_best_pit_stop_duration() # Peut etre mettre dans team et le récupérer 
        calculated_duration = pit_stop.calculate_pit_stop_duration()
        # Reset tire attributes
        driver.tire_age = 0
        driver.compound = driver.pit_stops_info[driver.next_pit_stop]["compound"]
        driver.next_pit_stop += 1

        return pit_duration


    # def compute_lap_time(self,driver,nolap):
    #     lap_time=0
    #     if driver.alive:
    #         # FUel & TIre Model
    #         model=driver.model
    #         data_for_predictions=driver.select_features(nolap)
    #         lap_time+=model.predict(data_for_predictions) # Fuel level, tire age, coupound

    #     if nolap.isin(self.safetycar_laps):
    #         lap_time*=1.2

    #     return(lap_time)

if __name__ == "__main__":

    data_loader = DataLoader(db_path="F1_timingdata_2014_2019.sqlite")
    dataframes = data_loader.load_data()

    # Sélectionner une saison et un nom de GP
    season_test = 2019
    gp_location_test = "SaoPaulo"

    run_simulation = Run(
        season=season_test,
        gp_location=gp_location_test,
        dataframes=dataframes
    )


    # Vérifier les sorties
    print("\n=== Test de la classe Run ===")
    print("Nom de la course  :", gp_location_test)
    print("Saison            :", season_test)
    print("Nombre de tours   :", run_simulation.number_of_laps)
    print("Grille de départ  :", run_simulation.starting_grid)
    print(f"{len(run_simulation.drivers_list)} Pilotes instanciés:")

    for d in run_simulation.drivers_list:
        print(f" - {d.name} (Id={d.driver_id}) | Team={d.team.name}")
    
    print("\nDataFrame laps_recap :")
    print(run_simulation.laps_recap)
    

    print("\n=== Fin du test ===")



IndentationError: expected an indented block after 'if' statement on line 77 (587191399.py, line 79)

NameError: name 'N0' is not defined

In [9]:
def main():
    db_path = "F1_timingdata_2014_2019.sqlite"
    data_loader = DataLoader(db_path=db_path)
    dataframes = data_loader.load_data()
    mercedes=Team("Mercedes")
    pit_stop = PitStop(
        team=mercedes, 
        gp_location="SaoPaulo",
        season=2016, 
        dataframes=dataframes
    )

    pit_stop.calculate_best_pit_stop_duration()
    print("Average Min Pit Stop Duration:", pit_stop.avg_min_pit_stop_duration)

    calculated_duration = pit_stop.calculate_pit_stop_duration()
    print("Calculated Pit Stop Duration:", calculated_duration)


if __name__ == "__main__":
    try:
        main()
    except Exception as e:
        print(f"[ERROR] An exception occurred: {e}")


Average Min Pit Stop Duration: 22.886362499999997


Unnamed: 0,race_id,lapno,position,driver_id,laptime,racetime,gap,interval,compound,tireage,...,pitstopduration,nextcompound,startlapprog_vsc,endlapprog_vsc,age_vsc,startlapprog_sc,endlapprog_sc,age_sc,season,team
41870,39,12,5,3,110.969,1134.161,21.431,7.891,A4,14,...,21.531,A3,,,,,,,2016,Mercedes
41954,39,16,5,1,113.739,1523.223,21.749,1.405,A4,18,...,21.940,A2,,,,,,,2016,Mercedes
42905,40,13,1,3,102.553,1280.500,0.000,0.000,A4,15,...,24.934,A3,,,,,,,2016,Mercedes
42906,40,13,2,1,103.018,1299.084,18.584,18.584,A4,15,...,24.512,A2,,,,,,,2016,Mercedes
43177,40,28,3,1,101.221,2791.761,26.918,11.003,A2,15,...,25.030,A4,,,,,,,2016,Mercedes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61876,56,11,1,1,103.680,1143.220,0.000,0.000,A3,13,...,23.601,A3,,,,,,,2016,Mercedes
62280,56,31,1,1,121.052,3253.418,0.000,0.000,A3,20,...,23.575,A2,0.385,1.0,0.615,,,,2016,Mercedes
62281,56,31,2,3,125.712,3268.698,15.280,15.280,A2,21,...,24.149,A2,0.286,1.0,0.714,,,,2016,Mercedes
63101,57,17,1,1,86.713,1509.941,0.000,0.000,A3,19,...,21.709,A2,,,,,,,2016,Mercedes


Calculated Pit Stop Duration: 17.569961490287838


In [238]:
if __name__ == "__main__":
    # Step 1: Load the data using DataLoader
    data_loader = DataLoader(db_path="F1_timingdata_2014_2019.sqlite")
    dataframes = data_loader.load_data()

    # Step 2: Set up the Run instance
    season_test = 2019
    gp_location_test = "SaoPaulo"
    run_simulation = Run(
        season=season_test,
        gp_location=gp_location_test,
        dataframes=dataframes
    )

    # Step 3: Select a driver and set pit stop info
    test_driver = run_simulation.drivers_list[0]  # Take the first driver for testing
    test_driver.pit_stops_info = {
        0: {"compound": "Soft", "pit_stop_lap": 10},  # Example strategy
        1: {"compound": "Medium", "pit_stop_lap": 30}
    }
    test_driver.next_pit_stop = 0
    test_driver.tire_age = 25  # Force a pit stop condition

    # Step 4: Simulate a pit stop at lap 10
    lap = 10
    print(f"Before pit stop: {test_driver.name}")
    print(f"  Tire Age: {test_driver.tire_age}")
    print(f"  Cumulative Lap Time: {test_driver.cumulative_lap_time}")

    pit_duration = run_simulation.pit_stop(test_driver, lap)

    print(f"Pit stop duration: {pit_duration:.2f} seconds")
    print(f"After pit stop: {test_driver.name}")
    print(f"  Tire Age: {test_driver.tire_age}")
    print(f"  Compound: {test_driver.compound}")
    print(f"  Cumulative Lap Time: {test_driver.cumulative_lap_time}")
    print(f"  Next Pit Stop Index: {test_driver.next_pit_stop}")

    print("\n=== End of Test ===")


In [3]:
class DataLoader:
    """
    Class responsible for loading data from an SQLite database.
    """

    def __init__(self, db_path: str):
        self.db_path = db_path
        self.dataframes = {}

    def load_data(self) -> dict:
        """
        Load data from the SQLite database and store it in a dictionary.

        Returns:
            dict: A dictionary where keys are table names and values are DataFrames.
        """
        connection = sqlite3.connect(self.db_path)
        tables = [
            "drivers",
            "fcyphases",
            "laps",
            "qualifyings",
            "races",
            "retirements",
            "starterfields",
        ]
        self.dataframes = {
            table: pd.read_sql_query(f"SELECT * FROM {table}", connection)
            for table in tables
        }
        connection.close()
        return self.dataframes



In [1]:

class FuelAndTireModel:
    """
    Class for data preparation and regression analysis for a given driver
    for a specific season and race.

    Attributes:
        season (int): Season to analyze.
        driver_id (int): ID of the driver.
        race_id (int): ID of the target race for testing.
        dfs (dict): Dictionary of DataFrames.
        best_qualif_times (pd.DataFrame): DataFrame with best qualifying times per race.
        train_data (pd.DataFrame): Training dataset.
        test_data (pd.DataFrame): Test dataset.
        model: Trained regression model.
        predictions (pd.Series): Predictions generated by the model on the test data.
    """

    def __init__(self, season: int, driver_id: int, race_id: int, dataframes: dict):
        """
        Initialize the F1Model class.

        Args:
            season (int): Season to analyze.
            driver_id (int): ID of the driver to analyze.
            race_id (int): ID of the race used for testing.
            dataframes (dict): Dictionary of DataFrames containing the data.
        """
        self.season = season
        self.driver_id = driver_id
        self.race_id = race_id
        self.dfs = dataframes

        self.best_qualif_times = pd.DataFrame()
        self.train_data = pd.DataFrame()
        self.test_data = pd.DataFrame()
        self.model = None
        self.predictions = pd.Series(dtype=float)

    def clean_data(self):
        """
        Cleans the data by removing:
         - Races not finished by the driver.
         - Rainy races (Hungary and Brazil).
         - Laps under safety car or virtual safety car.
         - Laps during or right after a pit stop.
        """
        # Filter season races
        race_ids_season = self.dfs["races"][self.dfs["races"]["season"] == self.season]["id"]
        laps_df = self.dfs["laps"]
        laps_df = laps_df[
            (laps_df["driver_id"] == self.driver_id)
            & (laps_df["race_id"].isin(race_ids_season))
        ]

        starterfields_df = self.dfs["starterfields"]
        starterfields_df = starterfields_df[
            (starterfields_df["driver_id"] == self.driver_id)
            & (starterfields_df["race_id"].isin(race_ids_season))
        ]

        # Keep only finished races (status = 'F')
        finished_races = starterfields_df[starterfields_df["status"] == "F"]
        laps_df = laps_df[laps_df["race_id"].isin(finished_races["race_id"])]

        # Exclude rainy races (e.g., Budapest, SaoPaulo)
        rainy_races = ["Budapest", "SaoPaulo"]
        races_df = self.dfs["races"]
        rainy_race_ids = races_df[races_df["location"].isin(rainy_races)]["id"]
        laps_df = laps_df[~laps_df["race_id"].isin(rainy_race_ids)]

        # Exclude laps under Safety Car (fcyphases)
        fcyphases_df = self.dfs["fcyphases"]
        fcyphases_df = fcyphases_df[fcyphases_df["race_id"].isin(race_ids_season)]
        for _, row in fcyphases_df.iterrows():
            begin = row["startlap"]
            end = row["endlap"]
            laps_df = laps_df[
                ~(
                    (laps_df["race_id"] == row["race_id"])
                    & (laps_df["lapno"].between(begin, end))
                )
            ]

        # Exclude laps with pit stops and the following lap
        laps_df = laps_df.reset_index(drop=True)
        pit_in_index = laps_df[~laps_df["pitintime"].isna()].index
        pit_out_index = [i + 1 for i in pit_in_index]
        laps_df = laps_df[~laps_df.index.isin(list(pit_in_index) + pit_out_index)]

        # Update the laps DataFrame in self.dfs
        self.dfs["laps"] = laps_df

    def get_best_qualif_time(self):
        """
        Retrieves the best qualification time (Q1, Q2, or Q3) for each race
        and adds it as a 'best_qualif_time' column in the 'laps' DataFrame.
        """
        qualif_laps_df = self.dfs["qualifyings"]
        valid_race_ids = self.dfs["laps"]["race_id"].unique()
        qualif_laps_df = qualif_laps_df[
            (qualif_laps_df["driver_id"] == self.driver_id)
            & (qualif_laps_df["race_id"].isin(valid_race_ids))
        ]

        # Compute the best qualifying time
        best_qualif_times = (
            qualif_laps_df[["race_id", "q1laptime", "q2laptime", "q3laptime"]]
            .groupby("race_id")
            .min()
            .min(axis=1)
        )
        self.best_qualif_times = pd.DataFrame(
            best_qualif_times, columns=["best_qualif_time"]
        )

        # Merge with the laps DataFrame
        self.dfs["laps"] = self.dfs["laps"].merge(
            self.best_qualif_times, on="race_id", how="left"
        )

    def add_features(self):
        """
        Adds explanatory variables to the 'laps' DataFrame, including the estimated
        amount of remaining fuel ('fuelc').
        """
        laps_df = self.dfs["laps"].copy()

        # Simple estimation of remaining fuel based on the total number of laps
        laps_df["fuelc"] = (
            100
            - (100 / laps_df.groupby("race_id")["lapno"].transform("max"))
            * laps_df["lapno"]
        )

        self.dfs["laps"] = laps_df

    def clean_to_regression(self):
        """
        Prepares the data for regression by:
         - Creating the 'corrected_lap_time' column = laptime - best_qualif_time.
         - Dropping rows with missing values in the relevant variables.
        """
        required_features = ["laptime", "best_qualif_time", "fuelc", "compound", "tireage"]

        self.dfs["laps"]["corrected_lap_time"] = (
            self.dfs["laps"]["laptime"] - self.dfs["laps"]["best_qualif_time"]
        )

        self.dfs["laps"].dropna(subset=required_features, inplace=True)

    def split_train_test(self):
        """
        Splits the 'laps' DataFrame into two subsets:
         - A training set (train_data) for races between the first race of the season
           and race_id - 1 (inclusive).
         - A test set (test_data) for the race_id race.
        """
        laps_df = self.dfs["laps"]
        start_race_id = np.min(laps_df["race_id"])

        # Training: races in [start_race_id, race_id-1]
        self.train_data = laps_df[laps_df["race_id"].between(start_race_id, self.race_id - 1)]
        # Test: race = race_id
        self.test_data = laps_df[laps_df["race_id"] == self.race_id]

    def regression_analysis(self):
        """
        Performs a regression analysis on the training set and compares the predictions
        with the test set. Returns a summary of the model.

        Returns:
            statsmodels.iolib.summary.Summary: Statistical summary of the model.
        """
        # Define the regression formula
        formula = "corrected_lap_time ~ fuelc + C(compound) + C(compound):tireage"

        # Fit the model
        self.model = smf.ols(formula=formula, data=self.train_data).fit()

        # Predict on the test set
        self.predictions = self.model.predict(self.test_data)

        # Visualization: Predictions vs. actual values
        plt.figure(figsize=(10, 6))
        plt.plot(
            self.test_data["lapno"],
            self.test_data["best_qualif_time"] + self.test_data["corrected_lap_time"],
            label="Actual Lap Time",
            marker="o",
        )
        plt.plot(
            self.test_data["lapno"],
            self.test_data["best_qualif_time"] + self.predictions,
            label="Predicted Lap Time",
            marker="x",
        )
        plt.xlabel("Lap Number")
        plt.ylabel("Lap Time (s)")
        plt.title(f"Predicted vs Actual Lap Times for Race ID {self.race_id}")
        plt.legend()
        plt.grid()
        plt.show()

        # Calculate R² without volatility
        r2_without_volatility = r2_score(
            self.test_data["best_qualif_time"] + self.test_data["corrected_lap_time"],
            self.test_data["best_qualif_time"] + self.predictions,
        )
        print(f"R² on the test set (without volatility): {r2_without_volatility:.4f}")

        # Compute residual variance and standard deviation
        residuals = self.model.resid
        residual_variance = np.var(residuals, ddof=1)
        residual_sigma = np.sqrt(residual_variance)

        # Add random volatility to predictions
        np.random.seed(42)
        predictions_with_volatility = self.predictions + np.random.normal(
            0, residual_sigma, len(self.predictions)
        )

        # Visualization with volatility
        plt.figure(figsize=(10, 6))
        plt.plot(
            self.test_data["lapno"],
            self.test_data["best_qualif_time"] + self.test_data["corrected_lap_time"],
            label="Actual Lap Time",
            marker="o",
        )
        plt.plot(
            self.test_data["lapno"],
            self.test_data["best_qualif_time"] + predictions_with_volatility,
            label="Predicted Lap Time (with volatility)",
            marker="x",
        )
        plt.xlabel("Lap Number")
        plt.ylabel("Lap Time (s)")
        plt.title(
            f"Predicted vs Actual Lap Times with volatility for Race ID {self.race_id}"
        )
        plt.legend()
        plt.grid()
        plt.show()

        # Calculate R² with volatility
        r2_with_volatility = r2_score(
            self.test_data["best_qualif_time"] + self.test_data["corrected_lap_time"],
            self.test_data["best_qualif_time"] + predictions_with_volatility,
        )
        print(f"R² on the test set (with volatility): {r2_with_volatility:.4f}")


        print(f"Variance of residuals (sigma^2): {residual_variance:.4f}")
        print(f"Residual standard deviation (sigma): {residual_sigma:.4f}")
        return self.model.summary()

    def analyze_regression_assumptions(self, model):
        """
        Analyzes the regression assumptions (linearity, normality, homoscedasticity, 
        autocorrelation, multicollinearity).
        
        Args:
            model (statsmodels.regression.linear_model.RegressionResultsWrapper):
                The OLS-fitted regression model.
        """
        residuals = model.resid
        fitted = model.fittedvalues

        # Linearity and Homoscedasticity
        plt.scatter(fitted, residuals, alpha=0.5)
        plt.axhline(y=0, color="red", linestyle="--")
        plt.xlabel("Fitted Values")
        plt.ylabel("Residuals")
        plt.title("Residuals vs. Fitted Values")
        plt.show()

        # Normality of Residuals
        plt.hist(residuals, bins=30, alpha=0.7)
        plt.xlabel("Residuals")
        plt.ylabel("Frequency")
        plt.title("Distribution of Residuals")
        plt.show()

        # Q-Q Plot for Normality
        import statsmodels.api as sm
        sm.qqplot(residuals, line="s")
        plt.title("Q-Q Plot of Residuals")
        plt.show()

        # Durbin-Watson test for autocorrelation
        dw_stat = sms.durbin_watson(residuals)
        print(f"Durbin-Watson statistic: {dw_stat:.4f}")

        # Variance Inflation Factor (VIF) for multicollinearity
        from statsmodels.stats.outliers_influence import variance_inflation_factor
        exog = model.model.exog
        vifs = [variance_inflation_factor(exog, i) for i in range(exog.shape[1])]
        print("Variance Inflation Factors (VIF):")
        for idx, vif in enumerate(vifs):
            print(f"Variable {model.model.exog_names[idx]}: VIF = {vif:.2f}")

        # Estimate of residual variance
        residual_variance = np.var(residuals, ddof=1)
        print(f"Variance of residuals (sigma^2): {residual_variance:.4f}")

    '''def get_parameters(self):
        #recuperer les coefficients de la regression et la volatilité
        parameters = self.model.params
        volatility = self.model.resid
        pass'''

def regression(self):
    formula = "corrected_lap_time ~ fuelc + C(compound) + C(compound):tireage"
    self.model = smf.ols(formula=formula, data=self.train_data).fit()


def get_model_and_variability(self):
    # Return a dictionnary with the regression coefficients
    self.clean_data()
    self.get_best_qualif_time()
    self.add_features()
    self.clean_to_regression()
    self.split_train_test()
    self.regression()
    self.variability =np.std((self.model.resid))
    return(self.model, self.variability)


In [2]:
class DNFModel:
    def get_parameters(self, driver_name: str, season: int):
        """
        Example method that returns probabilities for DNF.
        """
        return 0.2, 0.2  # (prob_accident, prob_failure)

In [4]:

class Team:
    def __init__(self, name: str):
        # Initialiser strategy
        # ET driveréé
        self.name = name


class TeamRegistry:
    """
    Registry (or cache) to ensure a single instance of Team per unique name.
    """
    _teams_cache = {}

    @classmethod
    def get_team(cls, name: str) -> Team:
        if name not in cls._teams_cache:
            cls._teams_cache[name] = Team(name)
        return cls._teams_cache[name]



In [5]:

class Driver:
    def __init__(self, season: int, dataframes: dict, name: str):
        self.season = season
        self.dataframes = dataframes
        self.name = name

        self.team = None
        self.driver_id = None
        self.initials = None

        # Simulation attributes
        self.position = None
        self.cumulative_lap_time = 0
        self.compound = "medium"
        self.tire_age = 0
        self.fuelc = 1
        self.next_pit_stop = 1

        # Strategy and DNF attributes
        self.pit_stops_info = {
            1: {
                "compound": "medium",
                "pitstop_interval": [10, 20],
                "pit_stop_lap": 15  # ou aléatoire entre les bornes de pitstop_interval
            },
            2: {
                "compound": "hard",
                "pitstop_interval": [25, 35],
                "pit_stop_lap": 30
            }
            # Et ainsi de suite pour les autres arrêts
        } # Modifer None

        self.accident_dnf_probability = None
        self.failure_dnf_probability = None
        self.accident_dnf_lap = None
        self.failure_dnf_lap = None
        self.earliest_dnf_lap = None
        self.alive = True

        # Fuel & Tire attributes
        self.fuel_tire_model = None
        self.variability = None

        self.get_driver_parameters(race_id)
        
    def get_driver_parameters(self, race_id):
        drivers_df = self.dataframes["drivers"]
        starterfields_df = self.dataframes["starterfields"]
        races_df = self.dataframes["races"]

        # Get driver ID and initials
        driver_row = drivers_df[drivers_df["name"] == self.name]
        if driver_row.empty:
            raise ValueError(f"Driver '{self.name}' not found in 'drivers' table.")

        self.driver_id = driver_row.iloc[0]["id"]
        self.initials = driver_row.iloc[0]["initials"]

        # Get team
        merged_data = starterfields_df.merge(
            races_df,
            left_on="race_id",
            right_on="id",
            suffixes=("_sf", "_races")
        )
        team_row = merged_data[
            (merged_data["driver_id"] == self.driver_id) &
            (merged_data["season"] == self.season)
        ]
        if not team_row.empty:
            self.team = TeamRegistry.get_team(team_row.iloc[0]["team"])

        # Get DNF probabilities
        dnf_model = DNFModel()
        self.accident_dnf_probability, self.failure_dnf_probability = dnf_model.get_parameters(
            driver_name=self.name,
            season=self.season
        )

        # Get fuel and tire parameters
        fuel_tire_model_object = FuelAndTireModel(season=self.season, driver_id=self.driver_id, race_id=race_id, dataframes=self.dataframes)
        self.fuel_tire_model, self.variability = fuel_tire_model_object.get_model_and_variability()

    def update_status(self, current_lap: int):
        if self.alive and self.earliest_dnf_lap == current_lap:
            self.alive = False

    def update_info(self):
        self.tire_age+=1
        #  #  



In [6]:

class Run:
    def __init__(self, season: int, gp_location: str, dataframes: dict):
        # Reste : 
            # - Initialiser safety_car_laps
            # - Créer compute lap time
        self.season = season
        self.gp_location = gp_location
        self.race_id = None
        self.dataframes = dataframes

        self.safety_car_laps=[12,27]

        self.number_of_laps = None
        self.drivers_list = []
        self.starting_grid = None

        self.laps_summary = pd.DataFrame(
            columns=["lap", "driver_id", "position", "lap_time", "cumulative_lap_time"]
        )

        self._initialize_parameters()
        

    def run(self):
        for lap in range(1,self.number_of_laps):  
            for driver in self.drivers_list:
                lap_time=0
                # State and change the driver status 
                driver.update_status(lap)
                driver.update_info()
                if driver.alive:
                    # Normal lap
                    lap_time+=self.compute_lap_time(driver,lap)
                    # Pit Stop 
                    pit_stop_time=self.pit_stop(driver,lap)
                    lap_time+=pit_stop_time
                    driver.cumulative_lap_time+=lap_time  # Modifier  
            # # Overtaking
            for driver in self.drivers_list:
                self.get_driver_position(driver)
         # # Aggrémenter le recap des tours
            self.laps_summary = pd.concat([
                self.laps_summary,
                pd.DataFrame([{
                    "lap": lap,
                    "driver_id": driver.driver_id,
                    "position": driver.position,
                    "lap_time": lap_time,
                    "cumulative_lap_time": driver.cumulative_lap_time
                }])
            ], ignore_index=True)

    def get_driver_position(self, driver):
        """
        Calcule la position d'un pilote en fonction des temps cumulés.
        """
        sorted_drivers = sorted(
            [d for d in self.drivers_list if d.alive],
            key=lambda d: d.cumulative_lap_time
        )
        for idx, d in enumerate(sorted_drivers, start=1):
            if d == driver:
                driver.position=idx

    def compute_lap_time(self,driver,nolap):
        #We get the model from TireFuel Class, from that model we predict a lap time then we add to that a variability through the variability parameters
        #To predict we need to create the features from the driver parameters.
        lap_time = 0
        features = pd.DataFrame({"fuelc" : self.driver.fuelc,
                                "compound" : self.compound,
                                "tireage" :self.tire_age}, index=[0])
        time_prediction = driver.fuel_tire_model.predict(features).iloc[0]
        lap_time = time_prediction + np.random.normal(0, driver.variability)

        if nolap.isin(self.safetycar_laps):
            lap_time*=1.2

        return(lap_time)


    def _initialize_parameters(self):
        
        races_df = self.dataframes["races"]
        qualifyings_df = self.dataframes["qualifyings"]
        
        # Retrieve the race_id for the given season and location
        race_row = races_df[
            (races_df["season"] == self.season) & (races_df["location"] == self.gp_location)
        ]

        if race_row.empty:
            raise ValueError(f"No race found for location '{self.gp_location}' in season {self.season}.")
        self.race_id = race_row["id"].iloc[0]

        # Get number of laps
        race_row = races_df[
            (races_df["location"] == self.gp_location) & 
            (races_df["season"] == self.season)
        ]
        if race_row.empty:
            raise ValueError(f"No race found for '{self.gp_location}' in season {self.season}.")
        self.number_of_laps = race_row.iloc[0]["nolapsplanned"]

        # Build starting grid
        merged_data = qualifyings_df.merge(
            races_df, 
            left_on="race_id", 
            right_on="id", 
            suffixes=("_qualifying", "_race")
        )
        qualifying_rows = merged_data[
            (merged_data["season"] == self.season) & 
            (merged_data["location"] == self.gp_location)
        ]
        if qualifying_rows.empty:
            raise ValueError(f"No qualifying data found for '{self.gp_location}' in season {self.season}.")
        sorted_rows = qualifying_rows.sort_values(by="position")
        self.starting_grid = list(zip(sorted_rows["driver_id"], sorted_rows["position"]))

        # Instantiate drivers
        driver_ids = sorted_rows["driver_id"].unique().tolist()
        drivers_df = self.dataframes["drivers"]

        for driver_id in driver_ids:
            driver_row = drivers_df[drivers_df["id"] == driver_id]
            if driver_row.empty:
                continue

            driver_name = driver_row.iloc[0]["name"]
            driver_obj = Driver(self.season, self.dataframes, driver_name, race_id=self.race_id)
            self.drivers_list.append(driver_obj)
    
    def pit_stop(self, driver, current_lap):
        """
        Simulates a pit stop for a driver if the conditions are met.

        Args:
            driver (Driver): The driver performing the pit stop.
            current_lap (int): The current lap of the race.
            safety_car_laps (list): List of laps where the safety car is active.

        Returns:
            float: Duration of the pit stop in seconds, or None if no pit stop is performed.
        """
        try:
            # Vérifier si un pit stop est prévu pour le pilote
            if driver.next_pit_stop in driver.pit_stops_info:
                pit_stop_data = driver.pit_stops_info[driver.next_pit_stop]

                # Vérifier si le tour actuel correspond aux conditions pour un arrêt
                is_pit_stop_lap = current_lap == pit_stop_data["pit_stop_lap"]
                is_safety_car_pit_stop = (
                    current_lap in self.safety_car_laps and current_lap in range(
                        pit_stop_data["pitstop_interval"][0], pit_stop_data["pitstop_interval"][1] + 1
                    )
                )

                if is_pit_stop_lap or is_safety_car_pit_stop:
                    # Effectuer le pit stop
                    pit_stop = PitStop(
                        team=driver.team,
                        gp_location=self.gp_location,
                        season=self.season,
                        dataframes=self.dataframes
                    )

                    # Calculer la durée moyenne du pit stop
                    pit_stop.calculate_best_pit_stop_duration()

                    # Simuler la durée du pit stop
                    calculated_duration = pit_stop.calculate_pit_stop_duration()

                    # Mettre à jour les informations du pilote
                    driver.tire_age = 0
                    driver.compound = pit_stop_data["compound"]
                    driver.next_pit_stop += 1

                    return calculated_duration

            # Si aucune condition de pit stop n'est remplie
            return 0

        except ValueError as e:
            print(f"Error during pit stop for driver {driver.name}: {e}")
            return None



    def simulate_dnf_lap(self, driver):
        driver.accident_dnf_lap = (
            np.random.randint(1, self.number_of_laps + 1)
            if np.random.binomial(1, driver.accident_dnf_probability)
            else None
        )
        driver.failure_dnf_lap = (
            np.random.randint(1, self.number_of_laps + 1)
            if np.random.binomial(1, driver.failure_dnf_probability)
            else None
        )
        potential_dnf_laps = [
            lap for lap in (driver.accident_dnf_lap, driver.failure_dnf_lap) if lap is not None
        ]
        driver.earliest_dnf_lap = min(potential_dnf_laps, default=None)


if __name__ == "__main__":
    data_loader = DataLoader(db_path="F1_timingdata_2014_2019.sqlite")
    dataframes = data_loader.load_data()

    run_simulation = Run(season=2019, gp_location="SaoPaulo", dataframes=dataframes)

    for driver in run_simulation.drivers_list:
        run_simulation.simulate_dnf_lap(driver)
        print(f"Driver: {driver.name}")
        print(f"  Accident lap: {driver.accident_dnf_lap}")
        print(f"  Failure lap: {driver.failure_dnf_lap}")
        print(f"  First DNF lap: {driver.earliest_dnf_lap}")


NameError: name 'sqlite3' is not defined

In [8]:
if __name__ == "__main__":
    data_loader = DataLoader(db_path="F1_timingdata_2014_2019.sqlite")
    dataframes = data_loader.load_data()

    run_simulation = Run(season=2019, gp_location="SaoPaulo", dataframes=dataframes)
    run_simulation.run()

    # Affichez un résumé des résultats
    for driver in run_simulation.drivers_list:
        print(f"Driver: {driver.name}, Cumulative Lap Time: {driver.cumulative_lap_time}")
    display(run_simulation.laps_summary)

TypeError: Driver.__init__() got an unexpected keyword argument 'race_id'

In [None]:
    run_simulation.laps_summary

In [37]:
if __name__ == "__main__":
    data_loader = DataLoader(db_path="F1_timingdata_2014_2019.sqlite")
    dataframes = data_loader.load_data()

    run_simulation = Run(season=2019, gp_location="SaoPaulo", dataframes=dataframes)

    for driver in run_simulation.drivers_list:
        # Simulate pit stop
        pit_duration = run_simulation.pit_stop(driver,12)
        print(f"  Pit stop duration: {pit_duration:.2f} seconds" if pit_duration else "  Pit stop failed.")


  Pit stop duration: 23.94 seconds
  Pit stop duration: 21.21 seconds
  Pit stop duration: 22.22 seconds
  Pit stop duration: 44.90 seconds
  Pit stop duration: 25.54 seconds
  Pit stop duration: 31.68 seconds
  Pit stop duration: 24.72 seconds
  Pit stop duration: 21.15 seconds
  Pit stop duration: 21.26 seconds
  Pit stop duration: 21.55 seconds
  Pit stop duration: 20.11 seconds
  Pit stop duration: 21.05 seconds
  Pit stop duration: 20.16 seconds
  Pit stop duration: 25.01 seconds
  Pit stop duration: 19.05 seconds
  Pit stop duration: 22.61 seconds
  Pit stop duration: 19.68 seconds
  Pit stop duration: 16.88 seconds
  Pit stop duration: 15.90 seconds
  Pit stop duration: 20.11 seconds


In [7]:

if __name__ == "__main__":
    
    data_loader = DataLoader(db_path="F1_timingdata_2014_2019.sqlite")
    dataframes = data_loader.load_data()

    # Sélectionner une saison et un nom de GP
    season_test = 2019
    gp_location_test = "SaoPaulo"

    run_simulation = Run(
        season=season_test,
        gp_location=gp_location_test,
        dataframes=dataframes
    )

    for driver in run_simulation.drivers_list:
        run_simulation.simulate_dnf_lap(driver)
        print(f"Driver: {driver.name}")
        print(f"  Accident lap: {driver.accident_dnf_lap}")
        print(f"  Failure lap: {driver.failure_dnf_lap}")
        print(f"  First DNF lap: {driver.earliest_dnf_lap}")

    print("\n=== Fin du test ===")

Driver: Max Verstappen
  Accident lap: None
  Failure lap: 31
  First DNF lap: 31
Driver: Sebastian Vettel
  Accident lap: None
  Failure lap: 46
  First DNF lap: 46
Driver: Lewis Hamilton
  Accident lap: None
  Failure lap: None
  First DNF lap: None
Driver: Charles Leclerc
  Accident lap: None
  Failure lap: None
  First DNF lap: None
Driver: Valtteri Bottas
  Accident lap: None
  Failure lap: 43
  First DNF lap: 43
Driver: Alexander Albon
  Accident lap: None
  Failure lap: None
  First DNF lap: None
Driver: Pierre Gasly
  Accident lap: None
  Failure lap: None
  First DNF lap: None
Driver: Romain Grosjean
  Accident lap: 1
  Failure lap: None
  First DNF lap: 1
Driver: Kimi Raikkonen
  Accident lap: 26
  Failure lap: 40
  First DNF lap: 26
Driver: Kevin Magnussen
  Accident lap: None
  Failure lap: None
  First DNF lap: None
Driver: Lando Norris
  Accident lap: None
  Failure lap: None
  First DNF lap: None
Driver: Daniel Ricciardo
  Accident lap: 28
  Failure lap: 36
  First DNF l