In [2]:
import sqlite3
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
from sklearn.metrics import r2_score
import statsmodels.stats.api as sms


class DataLoader:
    """
    Class responsible for loading data from an SQLite database.

    Attributes:
        db_path (str): Path to the SQLite database.
        dataframes (dict): Dictionary containing the loaded DataFrames.
    """

    def __init__(self, db_path: str):
        """
        Initialize the DataLoader class with a database path.

        Args:
            db_path (str): Path to the SQLite database.
        """
        self.db_path = db_path
        self.dataframes = {}

    def load_data(self) -> dict:
        """
        Load data from the SQLite database and store it in a dictionary.

        Returns:
            dict: A dictionary whose keys are table names and values are the corresponding DataFrames.
        """
        connection = sqlite3.connect(self.db_path)
        tables = [
            "drivers",
            "fcyphases",
            "laps",
            "qualifyings",
            "races",
            "retirements",
            "starterfields",
        ]
        self.dataframes = {
            table: pd.read_sql_query(f"SELECT * FROM {table}", connection)
            for table in tables
        }
        connection.close()
        return self.dataframes


class F1Team:
    """
    Class for data preparation and regression analysis for a given driver
    for a specific season and race.

    Attributes:
        season (int): Season to analyze.
        driver_id (int): ID of the driver.
        race_id (int): ID of the target race for testing.
        dfs (dict): Dictionary of DataFrames.
        best_qualif_times (pd.DataFrame): DataFrame with best qualifying times per race.
        train_data (pd.DataFrame): Training dataset.
        test_data (pd.DataFrame): Test dataset.
        model: Trained regression model.
        predictions (pd.Series): Predictions generated by the model on the test data.
    """

    def __init__(self, team : str,season: int, dataframes: dict):
        """
        Initialize the F1Model class.

        Args:
            season (int): Season to analyze.
            driver_id (int): ID of the driver to analyze.
            race_id (int): ID of the race used for testing.
            dataframes (dict): Dictionary of DataFrames containing the data.
        """
        self.dfs = dataframes
        self.team = team
        self.season=season
        self.drivers_if_of_the_season = self._load_season_team_drivers()
        self.dfs = dataframes



    
    def _load_season_team_drivers(self):
        
        drivers_id=
        return(drivers_id)

    def get_pit_stop_avg(self):
def main():
    
    # Load data
    db_path = "F1_timingdata_2014_2019.sqlite"
    data_loader = DataLoader(db_path=db_path)
    dataframes = data_loader.load_data()


    


if __name__ == "__main__":
    main()


In [4]:
db_path = "F1_timingdata_2014_2019.sqlite"
data_loader = DataLoader(db_path=db_path)
dataframes = data_loader.load_data()

race_ids_season = self.dfs["races"][self.dfs["races"]["season"] == self.season]["id"]

staterfields_df=self.dfs["staterfields"]
staterfields_df[(staterfields_df["race_id"].isin(race_ids_season))&(staterfields_df)]

Unnamed: 0,race_id,driver_id,team,teamcolor,enginemanufacturer,gridposition,status,resultposition,completedlaps,speedtrap
0,1,1,Mercedes,#00D2BE,Mercedes,1,DNF,19,2,252.8
1,1,2,RedBull,#1E41FF,Renault,2,DQ,22,57,292.7
2,1,3,Mercedes,#00D2BE,Mercedes,3,F,1,57,299.1
3,1,4,McLaren,#FF8700,Mercedes,4,F,2,57,316.9
4,1,5,Ferrari,#DC0000,Ferrari,5,F,4,57,304.5
...,...,...,...,...,...,...,...,...,...,...
2474,121,35,AlfaRomeo,#9B0000,Ferrari,16,F,16,54,334.9
2475,121,11,AlfaRomeo,#9B0000,Ferrari,17,F,13,54,329.6
2476,121,44,Williams,#192c4e,Mercedes,18,F,17,54,329.3
2477,121,45,Williams,#192c4e,Mercedes,19,F,19,53,317.8
