In [1]:
import json
import sqlite3
import pandas as pd
conn = sqlite3.connect('formula1-originalDB.db')
query = '''
    SELECT
        driverId,
        forename,
        surname,
        dob,
        nationality
    FROM
        drivers
'''

drivers = pd.read_sql_query(query, conn)

print(drivers)

     driverId  forename     surname         dob nationality
0           1     Lewis    Hamilton  1985-01-07     British
1           2      Nick    Heidfeld  1977-05-10      German
2           3      Nico     Rosberg  1985-06-27      German
3           4  Fernando      Alonso  1981-07-29     Spanish
4           5    Heikki  Kovalainen  1981-10-19     Finnish
..        ...       ...         ...         ...         ...
852       854      Mick  Schumacher  1999-03-22      German
853       855    Guanyu        Zhou  1999-05-30     Chinese
854       856      Nyck    de Vries  1995-02-06       Dutch
855       857     Oscar     Piastri  2001-04-06  Australian
856       858     Logan    Sargeant  2000-12-31    American

[857 rows x 5 columns]


In [8]:
query = '''
SELECT
    drivers.driverId,
    races.year,
    AVG(pit_stops.stop) AS avg_pit_stops
FROM
    drivers
JOIN
    pit_stops ON drivers.driverId = pit_stops.driverId
JOIN
    races ON pit_stops.raceId = races.raceId
GROUP BY
    drivers.driverId, races.year;
'''


df = pd.read_sql_query(query, conn)

print(df)

     driverId  year  avg_pit_stops
0           1  2011       2.267857
1           1  2012       1.638889
2           1  2013       1.844444
3           1  2014       1.692308
4           1  2015       1.846154
..        ...   ...            ...
291       855  2023       1.892857
292       856  2022       1.000000
293       856  2023       1.722222
294       857  2023       1.789474
295       858  2023       2.115385

[296 rows x 3 columns]


In [14]:
query = '''
SELECT
    drivers.driverId,
    races.year,
    AVG(pit_stops.milliseconds) AS avg_pit_time
FROM
    drivers 
JOIN
     pit_stops ON drivers.driverId = pit_stops.driverId
JOIN
    races ON pit_stops.raceId = races.raceId
GROUP BY
    drivers.driverId, races.year;
'''

df = pd.read_sql_query(query, conn)

print(df)

     driverId  year   avg_pit_time
0           1  2011   22666.446429
1           1  2012   22671.861111
2           1  2013   22399.111111
3           1  2014   53061.974359
4           1  2015   23666.000000
..        ...   ...            ...
291       855  2023  152159.535714
292       856  2022   24628.000000
293       856  2023  115816.888889
294       857  2023  212774.842105
295       858  2023   88475.884615

[296 rows x 3 columns]


In [13]:
query = '''
WITH MostCommonPitStopLap AS (
    SELECT
        drivers.driverId,
        races.year,
        pit_stops.lap AS most_common_pitstop_lap,
        ROW_NUMBER() OVER (PARTITION BY drivers.driverId, races.year ORDER BY COUNT(pit_stops.lap) DESC) AS lap_rank
    FROM
        drivers
    JOIN
        pit_stops ON drivers.driverId = pit_stops.driverId
    JOIN
        races ON pit_stops.raceId = races.raceId
    GROUP BY
        drivers.driverId, races.year, pit_stops.lap
)
SELECT
    driverId,
    year,
    most_common_pitstop_lap
FROM
    MostCommonPitStopLap
WHERE
    lap_rank = 1;
'''

df = pd.read_sql_query(query, conn)

print(df)

     driverId  year  most_common_pitstop_lap
0           1  2011                       16
1           1  2012                       36
2           1  2013                       31
3           1  2014                       26
4           1  2015                       13
..        ...   ...                      ...
291       855  2023                       54
292       856  2022                       19
293       856  2023                       27
294       857  2023                       54
295       858  2023                        8

[296 rows x 3 columns]
