In [3]:
import json
import sqlite3
import pandas as pd

def create_driver_years_df(conn, driver_years_json_path):
    # Read the JSON file containing driver IDs and years
    with open(driver_years_json_path, 'r') as json_file:
        driver_years_data = json.load(json_file)

    # Define lists to store data for DataFrame creation
    driver_ids = []
    years = []
    max_lap_times_first_year = []
    min_lap_times_first_year = []
    avg_lap_times_first_year = []
    max_lap_times_last_year = []
    min_lap_times_last_year = []
    avg_lap_times_last_year = []

    # Iterate through each driver's data
    for driver_id_str, years_list in driver_years_data.items():
        # Convert driver ID to int
        driver_id = int(driver_id_str)

        # Sort the years list
        years_list.sort()

        # Fetch lap time statistics for the first and last years
        first_year = years_list[0]
        last_year = years_list[-1]

        # Fetch max, min, and avg lap times for the first and last years
        query_first_year = f'''
            SELECT
                MAX(milliseconds) AS max_lap_time,
                MIN(milliseconds) AS min_lap_time,
                AVG(milliseconds) AS avg_lap_time
            FROM
                lap_times
            WHERE
                driverId = {driver_id}
                AND strftime('%Y', (SELECT date FROM races WHERE raceId = lap_times.raceId)) = '{first_year}'
        '''

        query_last_year = f'''
            SELECT
                MAX(milliseconds) AS max_lap_time,
                MIN(milliseconds) AS min_lap_time,
                AVG(milliseconds) AS avg_lap_time
            FROM
                lap_times
            WHERE
                driverId = {driver_id}
                AND strftime('%Y', (SELECT date FROM races WHERE raceId = lap_times.raceId)) = '{last_year}'
        '''

        # Execute queries
        cursor = conn.cursor()
        cursor.execute(query_first_year)
        result_first_year = cursor.fetchone()

        cursor.execute(query_last_year)
        result_last_year = cursor.fetchone()

        # Append data to lists
        driver_ids.append(driver_id)
        years.append(first_year)
        max_lap_times_first_year.append(result_first_year[0])
        min_lap_times_first_year.append(result_first_year[1])
        avg_lap_times_first_year.append(result_first_year[2])
        max_lap_times_last_year.append(result_last_year[0])
        min_lap_times_last_year.append(result_last_year[1])
        avg_lap_times_last_year.append(result_last_year[2])

    # Create a DataFrame
    df = pd.DataFrame({
        'driverId': driver_ids,
        'year': years,
        'max_lap_time_first_year': max_lap_times_first_year,
        'min_lap_time_first_year': min_lap_times_first_year,
        'avg_lap_time_first_year': avg_lap_times_first_year,
        'max_lap_time_last_year': max_lap_times_last_year,
        'min_lap_time_last_year': min_lap_times_last_year,
        'avg_lap_time_last_year': avg_lap_times_last_year
    })

    return df

# Connect to the SQLite database
conn = sqlite3.connect('formula1-originalDB.db')

# Specify the path for the input JSON file
input_json_path = 'driver_years.json'

# Create the DataFrame
driver_years_df = create_driver_years_df(conn, input_json_path)

# Print the resulting DataFrame
print(driver_years_df)

# Close the database connection
conn.close()


     driverId  year  max_lap_time_first_year  min_lap_time_first_year  \
0           1  2007                  1453884                    72506   
1           2  2000                   218816                    73593   
2           3  2006                   157809                    74707   
3           4  2001                   228415                    74432   
4           5  2007                  1394884                    73998   
..        ...   ...                      ...                      ...   
134       854  2021                  2240706                    69394   
135       855  2022                  1288793                    69380   
136       856  2022                   122870                    86624   
137       857  2023                  2000574                    69862   
138       858  2023                  1106846                    69611   

     avg_lap_time_first_year  max_lap_time_last_year  min_lap_time_last_year  \
0               90600.860174               