In [1]:
import sqlite3
import pandas as pd
import nbimporter

In [2]:
from final_scores import calculate_final_driver_places 
from lap_times_functions import get_lap_times_stats
from lap_times_functions import get_median_lap_position
from lap_times_functions import get_total_laps_count
from pit_stop_functions import get_pit_stop_stats_in_miliseconds
from pit_stop_functions import get_most_common_pit_stop_lap
from constructor_table import get_drivers_with_constructor_relationship
from constructor_table import get_constructor_points
from drivers_functions import get_age_at_first_race
from circuits_functions import get_best_performing_circuits
from results_functions import get_most_common_positions

In [3]:
conn = sqlite3.connect('formula1.db')

In [4]:
drivers_final_positions = calculate_final_driver_places(conn)
print(drivers_final_positions)

      year  driverId date_of_birth driver_nationality  driver_points  \
73    1950       642    1906-10-30            Italian           30.0   
66    1950       579    1911-06-24          Argentine           27.0   
72    1950       786    1898-06-09            Italian           24.0   
70    1950       627    1905-11-05             French           13.0   
56    1950       647    1918-07-13            Italian           11.0   
...    ...       ...           ...                ...            ...   
3133  2022       855    1999-05-30            Chinese            6.0   
3136  2022       848    1996-03-23               Thai            4.0   
3139  2022       849    1995-06-29           Canadian            2.0   
3145  2022       856    1995-02-06              Dutch            2.0   
3140  2022       807    1987-08-19             German            0.0   

      driver_end_position  
73                      1  
66                      2  
72                      3  
70                     

In [5]:
drivers_final_positions = drivers_final_positions.sort_values(by=['year', 'driverId'])

for index, row in drivers_final_positions.iterrows():
    temp_df = drivers_final_positions[(drivers_final_positions['driverId'] == row['driverId']) & (drivers_final_positions['year'] <= row['year'])]
    
    first_place_count = temp_df[temp_df['driver_end_position'] == 1].shape[0]
    second_place_count = temp_df[temp_df['driver_end_position'] == 2].shape[0]
    third_place_count = temp_df[temp_df['driver_end_position'] == 3].shape[0]
    top_5_count = temp_df[temp_df['driver_end_position'] <= 5].shape[0]

    drivers_final_positions.at[index, 'first_place_count'] = first_place_count
    drivers_final_positions.at[index, 'second_place_count'] = second_place_count
    drivers_final_positions.at[index, 'third_place_count'] = third_place_count
    drivers_final_positions.at[index, 'top_5_count'] = top_5_count

print(drivers_final_positions)


      year  driverId date_of_birth driver_nationality  driver_points  \
51    1950       427    1917-10-30             French            0.0   
53    1950       498    1922-10-05          Argentine            0.0   
49    1950       501    1921-06-29           American            0.0   
25    1950       509    1928-07-16           American            0.0   
37    1950       518    1913-05-05           American            0.0   
...    ...       ...           ...                ...            ...   
3139  2022       849    1995-06-29           Canadian            2.0   
3131  2022       852    2000-05-11           Japanese           12.0   
3134  2022       854    1999-03-22             German           12.0   
3133  2022       855    1999-05-30            Chinese            6.0   
3145  2022       856    1995-02-06              Dutch            2.0   

      driver_end_position  first_place_count  second_place_count  \
51                     23                0.0                 0.0   

In [6]:
driver_years = get_age_at_first_race(conn)
drivers_final_positions = pd.merge(drivers_final_positions, driver_years, on=['driverId'], how='left')

In [7]:
lap_times_stats = get_lap_times_stats(conn)
print(lap_times_stats)

     year  driverId  min_lap_time_diff  avg_lap_time_diff  max_lap_time_diff
0    2007         1           0.000000           7.852700        1465.791089
1    2008         1           0.000000           7.666610         101.287948
2    2009         1           0.074346           6.171578          79.374498
3    2010         1           0.000000          10.709896        2567.450593
4    2011         1           0.000000           8.270089        1600.104940
..    ...       ...                ...                ...                ...
642  2021       853           2.817997          21.635194        2443.049302
643  2021       854           2.234354          18.256438        2428.528386
644  2022       854           0.704944          14.242761        3432.659375
645  2022       855           0.000000          12.770114        1625.453523
646  2022       856           3.086993           8.555862          46.221588

[647 rows x 5 columns]


In [8]:
lap_positions = get_median_lap_position(conn)
lap_times_stats = pd.merge(lap_times_stats, lap_positions, on=['driverId', 'year'], how='left')
print(lap_times_stats)

     year  driverId  min_lap_time_diff  avg_lap_time_diff  max_lap_time_diff  \
0    2007         1           0.000000           7.852700        1465.791089   
1    2008         1           0.000000           7.666610         101.287948   
2    2009         1           0.074346           6.171578          79.374498   
3    2010         1           0.000000          10.709896        2567.450593   
4    2011         1           0.000000           8.270089        1600.104940   
..    ...       ...                ...                ...                ...   
642  2021       853           2.817997          21.635194        2443.049302   
643  2021       854           2.234354          18.256438        2428.528386   
644  2022       854           0.704944          14.242761        3432.659375   
645  2022       855           0.000000          12.770114        1625.453523   
646  2022       856           3.086993           8.555862          46.221588   

     median_lap_position  
0           

In [9]:
lap_count = get_total_laps_count(conn)
lap_times_stats = pd.merge(lap_times_stats, lap_count, on=['driverId', 'year'], how='left')
print(lap_times_stats)

     year  driverId  min_lap_time_diff  avg_lap_time_diff  max_lap_time_diff  \
0    2007         1           0.000000           7.852700        1465.791089   
1    2008         1           0.000000           7.666610         101.287948   
2    2009         1           0.074346           6.171578          79.374498   
3    2010         1           0.000000          10.709896        2567.450593   
4    2011         1           0.000000           8.270089        1600.104940   
..    ...       ...                ...                ...                ...   
642  2021       853           2.817997          21.635194        2443.049302   
643  2021       854           2.234354          18.256438        2428.528386   
644  2022       854           0.704944          14.242761        3432.659375   
645  2022       855           0.000000          12.770114        1625.453523   
646  2022       856           3.086993           8.555862          46.221588   

     median_lap_position  total_laps  


In [10]:
pit_stop_stats = get_pit_stop_stats_in_miliseconds(conn)
print(pit_stop_stats)

     driverId  year  min_pit_stop_time   avg_pit_time  max_pit_stop_time
0           1  2011              13173   22666.446429              35688
1           1  2012              17598   22671.861111              31081
2           1  2013              17385   22399.111111              30085
3           1  2014              19710   53061.974359            1137295
4           1  2015              16579   23666.000000              30216
..        ...   ...                ...            ...                ...
270       853  2021              15054  211286.047619            2076977
271       854  2021              15058  201851.200000            2075728
272       854  2022              14144  111302.675000            3065174
273       855  2022              14128   67903.444444            1174235
274       856  2022              24628   24628.000000              24628

[275 rows x 5 columns]


In [11]:
most_common_pit_stop_lap = get_most_common_pit_stop_lap(conn)
pit_stop_stats = pd.merge(pit_stop_stats, most_common_pit_stop_lap, on=['driverId', 'year'], how='left')
print(pit_stop_stats)

     driverId  year  min_pit_stop_time   avg_pit_time  max_pit_stop_time  \
0           1  2011              13173   22666.446429              35688   
1           1  2012              17598   22671.861111              31081   
2           1  2013              17385   22399.111111              30085   
3           1  2014              19710   53061.974359            1137295   
4           1  2015              16579   23666.000000              30216   
..        ...   ...                ...            ...                ...   
270       853  2021              15054  211286.047619            2076977   
271       854  2021              15058  201851.200000            2075728   
272       854  2022              14144  111302.675000            3065174   
273       855  2022              14128   67903.444444            1174235   
274       856  2022              24628   24628.000000              24628   

     most_common_pitstop_lap  
0                         16  
1                        

In [12]:
constructor_driver_connection = get_drivers_with_constructor_relationship(conn)

In [13]:
drivers_with_constructors = pd.merge(drivers_final_positions, constructor_driver_connection, on=['driverId', 'year'], how='left')
print(drivers_with_constructors)

      year  driverId date_of_birth driver_nationality  driver_points  \
0     1950       427    1917-10-30             French            0.0   
1     1950       498    1922-10-05          Argentine            0.0   
2     1950       501    1921-06-29           American            0.0   
3     1950       501    1921-06-29           American            0.0   
4     1950       509    1928-07-16           American            0.0   
...    ...       ...           ...                ...            ...   
3482  2022       849    1995-06-29           Canadian            2.0   
3483  2022       852    2000-05-11           Japanese           12.0   
3484  2022       854    1999-03-22             German           12.0   
3485  2022       855    1999-05-30            Chinese            6.0   
3486  2022       856    1995-02-06              Dutch            2.0   

      driver_end_position  first_place_count  second_place_count  \
0                      23                0.0                 0.0   

In [14]:
connstructor_points = get_constructor_points(conn)
print(connstructor_points)

     year  constructorId  constructor_points  constructor_place
0    2022              9               759.0                  1
1    2022              6               554.0                  2
2    2022            131               515.0                  3
3    2022            214               173.0                  4
4    2022              1               159.0                  5
..    ...            ...                 ...                ...
895  1958            105                 6.0                  5
896  1958             32                 3.0                  6
897  1958            125                 0.0                  7
898  1958            127                 0.0                  7
899  1958             95                 0.0                  7

[900 rows x 4 columns]


In [15]:
connstructor_points = connstructor_points.sort_values(by=['year', 'constructorId'])

for index, row in connstructor_points.iterrows():
    temp_df = connstructor_points[(connstructor_points['constructorId'] == row['constructorId']) & (connstructor_points['year'] <= row['year'])]
    
    temp_first_place_count = temp_df[temp_df['constructor_place'] == 1].shape[0]
    temp_second_place_count = temp_df[temp_df['constructor_place'] == 2].shape[0]
    temp_third_place_count = temp_df[temp_df['constructor_place'] == 3].shape[0]
    temp_top_5_count = temp_df[temp_df['constructor_place'] <= 5].shape[0]
    
    connstructor_points.at[index, 'constructor_first_place_count'] = temp_first_place_count
    connstructor_points.at[index, 'constructor_second_place_count'] = temp_second_place_count
    connstructor_points.at[index, 'constructor_third_place_count'] = temp_third_place_count
    connstructor_points.at[index, 'constructor_top_5_count'] = temp_top_5_count

print(connstructor_points)


     year  constructorId  constructor_points  constructor_place  \
892  1958              6                40.0                  2   
896  1958             32                 3.0                  6   
894  1958             66                18.0                  4   
893  1958             87                31.0                  3   
899  1958             95                 0.0                  7   
..    ...            ...                 ...                ...   
6    2022            117                55.0                  6   
2    2022            131               515.0                  3   
7    2022            210                37.0                  8   
8    2022            213                35.0                  9   
3    2022            214               173.0                  4   

     constructor_first_place_count  constructor_second_place_count  \
892                            0.0                             1.0   
896                            0.0                     

In [16]:
drivers_with_constructors = pd.merge(drivers_with_constructors, connstructor_points, on=['constructorId', 'year'], how='left')
print(drivers_with_constructors)

      year  driverId date_of_birth driver_nationality  driver_points  \
0     1950       427    1917-10-30             French            0.0   
1     1950       498    1922-10-05          Argentine            0.0   
2     1950       501    1921-06-29           American            0.0   
3     1950       501    1921-06-29           American            0.0   
4     1950       509    1928-07-16           American            0.0   
...    ...       ...           ...                ...            ...   
3482  2022       849    1995-06-29           Canadian            2.0   
3483  2022       852    2000-05-11           Japanese           12.0   
3484  2022       854    1999-03-22             German           12.0   
3485  2022       855    1999-05-30            Chinese            6.0   
3486  2022       856    1995-02-06              Dutch            2.0   

      driver_end_position  first_place_count  second_place_count  \
0                      23                0.0                 0.0   

In [17]:
best_performing_circuits = get_best_performing_circuits(conn)
drivers_with_constructors = pd.merge(drivers_with_constructors, best_performing_circuits, on=['year','driverId' ], how='left')
print(drivers_with_constructors)

      year  driverId date_of_birth driver_nationality  driver_points  \
0     1950       427    1917-10-30             French            0.0   
1     1950       498    1922-10-05          Argentine            0.0   
2     1950       501    1921-06-29           American            0.0   
3     1950       501    1921-06-29           American            0.0   
4     1950       509    1928-07-16           American            0.0   
...    ...       ...           ...                ...            ...   
3482  2022       849    1995-06-29           Canadian            2.0   
3483  2022       852    2000-05-11           Japanese           12.0   
3484  2022       854    1999-03-22             German           12.0   
3485  2022       855    1999-05-30            Chinese            6.0   
3486  2022       856    1995-02-06              Dutch            2.0   

      driver_end_position  first_place_count  second_place_count  \
0                      23                0.0                 0.0   

In [18]:
lap_pit_df = pd.merge(lap_times_stats, pit_stop_stats, on=['driverId', 'year'], how='left', suffixes=('_lap', '_pit'))

lap_pit_df = lap_pit_df.fillna('unknown')

print(lap_pit_df)

     year  driverId  min_lap_time_diff  avg_lap_time_diff  max_lap_time_diff  \
0    2007         1           0.000000           7.852700        1465.791089   
1    2008         1           0.000000           7.666610         101.287948   
2    2009         1           0.074346           6.171578          79.374498   
3    2010         1           0.000000          10.709896        2567.450593   
4    2011         1           0.000000           8.270089        1600.104940   
..    ...       ...                ...                ...                ...   
642  2021       853           2.817997          21.635194        2443.049302   
643  2021       854           2.234354          18.256438        2428.528386   
644  2022       854           0.704944          14.242761        3432.659375   
645  2022       855           0.000000          12.770114        1625.453523   
646  2022       856           3.086993           8.555862          46.221588   

     median_lap_position  total_laps mi

In [19]:
final_df = pd.merge(drivers_with_constructors, lap_pit_df, on=['driverId', 'year'], how='left')

In [20]:
final_df

Unnamed: 0,year,driverId,date_of_birth,driver_nationality,driver_points,driver_end_position,first_place_count,second_place_count,third_place_count,top_5_count,...,best_performing_circuit_name,min_lap_time_diff,avg_lap_time_diff,max_lap_time_diff,median_lap_position,total_laps,min_pit_stop_time,avg_pit_time,max_pit_stop_time,most_common_pitstop_lap
0,1950,427,1917-10-30,French,0.0,23,0.0,0.0,0.0,0.0,...,Circuit de Monaco,,,,,,,,,
1,1950,498,1922-10-05,Argentine,0.0,23,0.0,0.0,0.0,0.0,...,Circuit de Monaco,,,,,,,,,
2,1950,501,1921-06-29,American,0.0,23,0.0,0.0,0.0,0.0,...,Circuit de Monaco,,,,,,,,,
3,1950,501,1921-06-29,American,0.0,23,0.0,0.0,0.0,0.0,...,Circuit de Monaco,,,,,,,,,
4,1950,509,1928-07-16,American,0.0,23,0.0,0.0,0.0,0.0,...,Indianapolis Motor Speedway,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3482,2022,849,1995-06-29,Canadian,2.0,20,0.0,0.0,0.0,0.0,...,Suzuka Circuit,1.341754,16.016565,3433.583029,18.0,1155.0,14459.0,120419.354167,3051904.0,19.0
3483,2022,852,2000-05-11,Japanese,12.0,16,0.0,0.0,0.0,0.0,...,Autodromo Enzo e Dino Ferrari,0.858179,16.069448,3378.520605,13.0,1101.0,18443.0,123340.804348,3011861.0,18.0
3484,2022,854,1999-03-22,German,12.0,16,0.0,0.0,0.0,0.0,...,Red Bull Ring,0.704944,14.242761,3432.659375,14.0,1142.0,14144.0,111302.675,3065174.0,14.0
3485,2022,855,1999-05-30,Chinese,6.0,18,0.0,0.0,0.0,0.0,...,Circuit Gilles Villeneuve,0.000000,12.770114,1625.453523,14.0,1060.0,14128.0,67903.444444,1174235.0,9.0


In [21]:
positions = get_most_common_positions(conn)

In [22]:
positions['most_common_ending_position'] = positions['most_common_ending_position'].replace({'\\N': 'Did Not Finish'})

In [23]:
final_df = pd.merge(final_df, positions, on=['driverId', 'year'], how='left')

In [24]:
final_df.fillna('unknown', inplace=True)

In [26]:
final_df.to_csv('final-with-pistop-miliseconds-with-unknown.csv', index=False)

In [27]:
final_df

Unnamed: 0,year,driverId,date_of_birth,driver_nationality,driver_points,driver_end_position,first_place_count,second_place_count,third_place_count,top_5_count,...,avg_lap_time_diff,max_lap_time_diff,median_lap_position,total_laps,min_pit_stop_time,avg_pit_time,max_pit_stop_time,most_common_pitstop_lap,most_common_starting_position,most_common_ending_position
0,1950,427,1917-10-30,French,0.0,23,0.0,0.0,0.0,0.0,...,unknown,unknown,unknown,unknown,unknown,unknown,unknown,unknown,13,Did Not Finish
1,1950,498,1922-10-05,Argentine,0.0,23,0.0,0.0,0.0,0.0,...,unknown,unknown,unknown,unknown,unknown,unknown,unknown,unknown,3,Did Not Finish
2,1950,501,1921-06-29,American,0.0,23,0.0,0.0,0.0,0.0,...,unknown,unknown,unknown,unknown,unknown,unknown,unknown,unknown,20,Did Not Finish
3,1950,501,1921-06-29,American,0.0,23,0.0,0.0,0.0,0.0,...,unknown,unknown,unknown,unknown,unknown,unknown,unknown,unknown,20,Did Not Finish
4,1950,509,1928-07-16,American,0.0,23,0.0,0.0,0.0,0.0,...,unknown,unknown,unknown,unknown,unknown,unknown,unknown,unknown,28,24
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3482,2022,849,1995-06-29,Canadian,2.0,20,0.0,0.0,0.0,0.0,...,16.016565,3433.583029,18.0,1155.0,14459.0,120419.354167,3051904.0,19.0,20,16
3483,2022,852,2000-05-11,Japanese,12.0,16,0.0,0.0,0.0,0.0,...,16.069448,3378.520605,13.0,1101.0,18443.0,123340.804348,3011861.0,18.0,16,8
3484,2022,854,1999-03-22,German,12.0,16,0.0,0.0,0.0,0.0,...,14.242761,3432.659375,14.0,1142.0,14144.0,111302.675,3065174.0,14.0,12,11
3485,2022,855,1999-05-30,Chinese,6.0,18,0.0,0.0,0.0,0.0,...,12.770114,1625.453523,14.0,1060.0,14128.0,67903.444444,1174235.0,9.0,15,10
