In [1]:
import json
import pandas as pd
import sqlite3

In [2]:
def get_avg_pit_stops(conn, existing_df):
    query = '''
    SELECT
        drivers.driverId,
        races.year,
        AVG(pit_stops.stop) AS avg_pit_stops
    FROM
        drivers
    JOIN
        pit_stops ON drivers.driverId = pit_stops.driverId
    JOIN
        races ON pit_stops.raceId = races.raceId    
    WHERE
        races.year != '2023'
    GROUP BY
        drivers.driverId, races.year;
    '''

    df = pd.read_sql_query(query, conn)

    avg_pit_stops_json_df = df.groupby('driverId').apply(
        lambda group: json.dumps({year: avg_pit_stops for _, year, avg_pit_stops in group.itertuples(index=False)})
    ).reset_index(name='avg_num_of_pit_stops')

    result_df = pd.merge(existing_df, avg_pit_stops_json_df, on='driverId', how='left')
    
    return result_df


In [3]:
def get_most_common_pitstop_lap(conn, existing_df):
    query = '''
    WITH MostCommonPitStopLap AS (
        SELECT
            drivers.driverId,
            races.year,
            pit_stops.lap AS most_common_pitstop_lap,
            ROW_NUMBER() OVER (PARTITION BY drivers.driverId, races.year ORDER BY COUNT(pit_stops.lap) DESC) AS lap_rank
        FROM
            drivers
        JOIN
            pit_stops ON drivers.driverId = pit_stops.driverId
        JOIN
            races ON pit_stops.raceId = races.raceId
        WHERE
            races.year != '2023'
        GROUP BY
            drivers.driverId, races.year, pit_stops.lap
    )
    SELECT
        driverId,
        year,
        most_common_pitstop_lap
    FROM
        MostCommonPitStopLap
    WHERE
        lap_rank = 1;
    '''

    df = pd.read_sql_query(query, conn)

    most_common_pitstop_lap_json_df = df.groupby('driverId').apply(
        lambda group: json.dumps({year: most_common_pitstop_lap for _, year, most_common_pitstop_lap in group.itertuples(index=False)})
    ).reset_index(name='most_common_pitstop_lap')

    result_df = pd.merge(existing_df, most_common_pitstop_lap_json_df, on='driverId', how='left')

    return result_df


In [4]:
def get_avg_pit_time(conn, existing_df):
    query = '''
    SELECT
        drivers.driverId,
        races.year,
        AVG(pit_stops.milliseconds) AS avg_pit_time
    FROM
        drivers 
    JOIN
        pit_stops ON drivers.driverId = pit_stops.driverId
    JOIN
        races ON pit_stops.raceId = races.raceId
    WHERE
        races.year != '2023'
    GROUP BY
        drivers.driverId, races.year;
    '''

    df = pd.read_sql_query(query, conn)

    avg_pit_time_json_df = df.groupby('driverId').apply(
        lambda group: json.dumps({year: avg_pit_time for _, year, avg_pit_time in group.itertuples(index=False)})
    ).reset_index(name='avg_pit_stop_time')

    result_df = pd.merge(existing_df, avg_pit_time_json_df, on='driverId', how='left')

    return result_df


In [5]:
def get_min_pit_stop_time(conn, existing_df):
    query = '''
    SELECT
        drivers.driverId,
        races.year,
        MIN(pit_stops.milliseconds) AS min_pit_stop_time
    FROM
        drivers 
    JOIN
        pit_stops ON drivers.driverId = pit_stops.driverId
    JOIN
        races ON pit_stops.raceId = races.raceId
    WHERE
        races.year != '2023'
    GROUP BY
        drivers.driverId, races.year;
    '''

    df = pd.read_sql_query(query, conn)

    min_pit_stop_time_json_df = df.groupby('driverId').apply(
        lambda group: json.dumps({year: min_pit_stop_time for _, year, min_pit_stop_time in group.itertuples(index=False)})
    ).reset_index(name='min_pit_stop_time')

    result_df = pd.merge(existing_df, min_pit_stop_time_json_df, on='driverId', how='left')

    return result_df


In [6]:
def get_max_pit_stop_time(conn, existing_df):
    query = '''
    SELECT
        drivers.driverId,
        races.year,
        MAX(pit_stops.milliseconds) AS max_pit_stop_time
    FROM
        drivers 
    JOIN
        pit_stops ON drivers.driverId = pit_stops.driverId
    JOIN
        races ON pit_stops.raceId = races.raceId
    WHERE
        races.year != '2023'
    GROUP BY
        drivers.driverId, races.year;
    '''

    df = pd.read_sql_query(query, conn)

    max_pit_stop_time_json_df = df.groupby('driverId').apply(
        lambda group: json.dumps({year: max_pit_stop_time for _, year, max_pit_stop_time in group.itertuples(index=False)})
    ).reset_index(name='max_pit_stop_time')

    result_df = pd.merge(existing_df, max_pit_stop_time_json_df, on='driverId', how='left')

    return result_df