In [1]:
import json
import sqlite3
import pandas as pd

In [2]:
def get_lap_time_statistics(conn, drivers, year_exclusion=2023):
    max_query = f'''
        SELECT
            drivers.driverId,
            strftime('%Y', races.date) AS season,
            MAX(lap_times.milliseconds) AS max_lap_time
        FROM
            lap_times
        JOIN
            races ON lap_times.raceId = races.raceId
        JOIN
            drivers ON lap_times.driverId = drivers.driverId
        WHERE
            strftime('%Y', races.date) != '{year_exclusion}'
        GROUP BY
            drivers.driverId, season
    '''

    avg_query = f'''
        SELECT
            drivers.driverId,
            strftime('%Y', races.date) AS season,
            AVG(lap_times.milliseconds) AS avg_lap_time
        FROM
            lap_times
        JOIN
            races ON lap_times.raceId = races.raceId
        JOIN
            drivers ON lap_times.driverId = drivers.driverId
        WHERE
            strftime('%Y', races.date) != '{year_exclusion}'
        GROUP BY
            drivers.driverId, season
    '''

    min_query = f'''
        SELECT
            drivers.driverId,
            strftime('%Y', races.date) AS season,
            MIN(lap_times.milliseconds) AS min_lap_time
        FROM
            lap_times
        JOIN
            races ON lap_times.raceId = races.raceId
        JOIN
            drivers ON lap_times.driverId = drivers.driverId
        WHERE
            strftime('%Y', races.date) != '{year_exclusion}'
        GROUP BY
            drivers.driverId, season
    '''

    max_lap_times_df = pd.read_sql_query(max_query, conn)
    avg_lap_times_df = pd.read_sql_query(avg_query, conn)
    min_lap_times_df = pd.read_sql_query(min_query, conn)

    max_lap_times_df['max_lap_times_json'] = max_lap_times_df.apply(
        lambda row: json.dumps({row['season']: row['max_lap_time']}), axis=1
    )

    avg_lap_times_df['avg_lap_times_json'] = avg_lap_times_df.apply(
        lambda row: json.dumps({row['season']: row['avg_lap_time']}), axis=1
    )

    min_lap_times_df['min_lap_times_json'] = min_lap_times_df.apply(
        lambda row: json.dumps({row['season']: row['min_lap_time']}), axis=1
    )

    max_lap_times_json_df = max_lap_times_df.groupby('driverId')['max_lap_times_json'].apply(list).reset_index()
    avg_lap_times_json_df = avg_lap_times_df.groupby('driverId')['avg_lap_times_json'].apply(list).reset_index()
    min_lap_times_json_df = min_lap_times_df.groupby('driverId')['min_lap_times_json'].apply(list).reset_index()

    drivers = pd.merge(drivers, max_lap_times_json_df, on='driverId', how='left')
    drivers = pd.merge(drivers, avg_lap_times_json_df, on='driverId', how='left')
    drivers = pd.merge(drivers, min_lap_times_json_df, on='driverId', how='left')

    drivers.rename(columns={'max_lap_times_json': 'max_lap_times', 'avg_lap_times_json': 'avg_lap_times', 'min_lap_times_json': 'min_lap_times'}, inplace=True)

    del min_lap_times_json_df, max_lap_times_json_df, avg_lap_times_json_df
    
    print(drivers)

    return drivers



In [3]:
def get_lap_positions(conn, existing_df):
    query = '''
    WITH RankedPositions AS (
        SELECT
            d.driverId,
            r.year,
            lt.position AS position,
            COUNT(lt.position) AS position_count,
            ROW_NUMBER() OVER (PARTITION BY d.driverId, r.year ORDER BY COUNT(lt.position) DESC) AS position_rank,
            COUNT(lt.position) OVER (PARTITION BY d.driverId, r.year) AS total_races
        FROM
            drivers d
        JOIN
            lap_times lt ON d.driverId = lt.driverId
        JOIN
            races r ON lt.raceId = r.raceId
        WHERE
            r.year != 2023 
        GROUP BY
            d.driverId, r.year, lt.position
    )
    SELECT
        driverId,
        year,
        position,
        position_count
    FROM
        RankedPositions
    WHERE
        position_rank <= 3;
    '''

    df = pd.read_sql_query(query, conn)

    lap_positions_json_df = df.groupby('driverId').apply(
        lambda group: json.dumps({year: group[['position', 'position_count']].to_dict(orient='records') for year, group in group.groupby('year')})
    ).reset_index(name='lap_positions')

    merged_df = pd.merge(existing_df, lap_positions_json_df, on='driverId', how='left')

    del lap_positions_json_df

    return merged_df

In [4]:
def get_total_laps_count(conn, df):
    total_laps_query = '''
    SELECT
        lt.driverId,
        COUNT(*) AS total_laps
    FROM
        lap_times lt
    JOIN
        races r ON lt.raceId = r.raceId
    WHERE
        strftime('%Y', r.date) <> '2023'
    GROUP BY
        lt.driverId;
    '''

    total_laps_df = pd.read_sql_query(total_laps_query, conn)

    df = pd.merge(df, total_laps_df, on='driverId', how='left')

    del total_laps_df

    return df


In [5]:
def get_fastest_slowest_laps(conn):
    query = """
    SELECT
        races.year AS year,
        MIN(lap_times.milliseconds) AS fastest_lap_time,
        MAX(lap_times.milliseconds) AS slowest_lap_time
    FROM
        lap_times
    JOIN
        races ON lap_times.raceId = races.raceId
    GROUP BY
        races.year
    ORDER BY
        races.year;
    """

    result_df = pd.read_sql_query(query, conn)
    return result_df
