In [1]:
import sqlite3
import pandas as pd
import nbimporter

In [2]:
from final_scores import calculate_final_driver_places 
from lap_times_functions import get_lap_times_stats
from lap_times_functions import get_median_lap_position
from lap_times_functions import get_total_laps_count
from pit_stop_functions import get_pit_stop_stats_in_miliseconds
from pit_stop_functions import get_most_common_pit_stop_lap
from constructor_table import get_drivers_with_constructor_relationship
from constructor_table import get_constructor_points

In [3]:
conn = sqlite3.connect('formula1.db')

In [4]:
drivers_final_positions = calculate_final_driver_places(conn)
print(drivers_final_positions)

      year  driverId driver_nationality  driver_points  driver_end_position
73    1950       642            Italian           30.0                    1
66    1950       579          Argentine           27.0                    2
72    1950       786            Italian           24.0                    3
70    1950       627             French           13.0                    4
56    1950       647            Italian           11.0                    5
...    ...       ...                ...            ...                  ...
3133  2022       855            Chinese            6.0                   18
3136  2022       848               Thai            4.0                   19
3139  2022       849           Canadian            2.0                   20
3145  2022       856              Dutch            2.0                   20
3140  2022       807             German            0.0                   22

[3146 rows x 5 columns]


In [5]:
drivers_final_positions = drivers_final_positions.sort_values(by=['year', 'driverId'])

for index, row in drivers_final_positions.iterrows():
    temp_df = drivers_final_positions[(drivers_final_positions['driverId'] == row['driverId']) & (drivers_final_positions['year'] <= row['year'])]
    
    first_place_count = temp_df[temp_df['driver_end_position'] == 1].shape[0]
    second_place_count = temp_df[temp_df['driver_end_position'] == 2].shape[0]
    third_place_count = temp_df[temp_df['driver_end_position'] == 3].shape[0]
    top_5_count = temp_df[temp_df['driver_end_position'] <= 5].shape[0]

    drivers_final_positions.at[index, 'first_place_count'] = first_place_count
    drivers_final_positions.at[index, 'second_place_count'] = second_place_count
    drivers_final_positions.at[index, 'third_place_count'] = third_place_count
    drivers_final_positions.at[index, 'top_5_count'] = top_5_count

print(drivers_final_positions)


      year  driverId driver_nationality  driver_points  driver_end_position  \
51    1950       427             French            0.0                   23   
53    1950       498          Argentine            0.0                   23   
49    1950       501           American            0.0                   23   
25    1950       509           American            0.0                   23   
37    1950       518           American            0.0                   23   
...    ...       ...                ...            ...                  ...   
3139  2022       849           Canadian            2.0                   20   
3131  2022       852           Japanese           12.0                   16   
3134  2022       854             German           12.0                   16   
3133  2022       855            Chinese            6.0                   18   
3145  2022       856              Dutch            2.0                   20   

      first_place_count  second_place_count  third_

In [6]:
lap_times_stats = get_lap_times_stats(conn)
print(lap_times_stats)

     year  driverId  min_lap_time_diff  avg_lap_time_diff  max_lap_time_diff
0    2007         1           0.000000           7.852700        1465.791089
1    2008         1           0.000000           7.666610         101.287948
2    2009         1           0.074346           6.171578          79.374498
3    2010         1           0.000000          10.709896        2567.450593
4    2011         1           0.000000           8.270089        1600.104940
..    ...       ...                ...                ...                ...
642  2021       853           2.817997          21.635194        2443.049302
643  2021       854           2.234354          18.256438        2428.528386
644  2022       854           0.704944          14.242761        3432.659375
645  2022       855           0.000000          12.770114        1625.453523
646  2022       856           3.086993           8.555862          46.221588

[647 rows x 5 columns]


In [7]:
lap_positions = get_median_lap_position(conn)
lap_times_stats = pd.merge(lap_times_stats, lap_positions, on=['driverId', 'year'], how='left')
print(lap_times_stats)

     year  driverId  min_lap_time_diff  avg_lap_time_diff  max_lap_time_diff  \
0    2007         1           0.000000           7.852700        1465.791089   
1    2008         1           0.000000           7.666610         101.287948   
2    2009         1           0.074346           6.171578          79.374498   
3    2010         1           0.000000          10.709896        2567.450593   
4    2011         1           0.000000           8.270089        1600.104940   
..    ...       ...                ...                ...                ...   
642  2021       853           2.817997          21.635194        2443.049302   
643  2021       854           2.234354          18.256438        2428.528386   
644  2022       854           0.704944          14.242761        3432.659375   
645  2022       855           0.000000          12.770114        1625.453523   
646  2022       856           3.086993           8.555862          46.221588   

     median_lap_position  
0           

In [8]:
lap_count = get_total_laps_count(conn)
lap_times_stats = pd.merge(lap_times_stats, lap_count, on=['driverId', 'year'], how='left')
print(lap_times_stats)

     year  driverId  min_lap_time_diff  avg_lap_time_diff  max_lap_time_diff  \
0    2007         1           0.000000           7.852700        1465.791089   
1    2008         1           0.000000           7.666610         101.287948   
2    2009         1           0.074346           6.171578          79.374498   
3    2010         1           0.000000          10.709896        2567.450593   
4    2011         1           0.000000           8.270089        1600.104940   
..    ...       ...                ...                ...                ...   
642  2021       853           2.817997          21.635194        2443.049302   
643  2021       854           2.234354          18.256438        2428.528386   
644  2022       854           0.704944          14.242761        3432.659375   
645  2022       855           0.000000          12.770114        1625.453523   
646  2022       856           3.086993           8.555862          46.221588   

     median_lap_position  total_laps  


In [9]:
pit_stop_stats = get_pit_stop_stats_in_miliseconds(conn)
print(pit_stop_stats)

     driverId  year  min_pit_stop_time   avg_pit_time  max_pit_stop_time
0           1  2011              13173   22666.446429              35688
1           1  2012              17598   22671.861111              31081
2           1  2013              17385   22399.111111              30085
3           1  2014              19710   53061.974359            1137295
4           1  2015              16579   23666.000000              30216
..        ...   ...                ...            ...                ...
270       853  2021              15054  211286.047619            2076977
271       854  2021              15058  201851.200000            2075728
272       854  2022              14144  111302.675000            3065174
273       855  2022              14128   67903.444444            1174235
274       856  2022              24628   24628.000000              24628

[275 rows x 5 columns]


In [10]:
most_common_pit_stop_lap = get_most_common_pit_stop_lap(conn)
pit_stop_stats = pd.merge(pit_stop_stats, most_common_pit_stop_lap, on=['driverId', 'year'], how='left')
print(pit_stop_stats)

     driverId  year  min_pit_stop_time   avg_pit_time  max_pit_stop_time  \
0           1  2011              13173   22666.446429              35688   
1           1  2012              17598   22671.861111              31081   
2           1  2013              17385   22399.111111              30085   
3           1  2014              19710   53061.974359            1137295   
4           1  2015              16579   23666.000000              30216   
..        ...   ...                ...            ...                ...   
270       853  2021              15054  211286.047619            2076977   
271       854  2021              15058  201851.200000            2075728   
272       854  2022              14144  111302.675000            3065174   
273       855  2022              14128   67903.444444            1174235   
274       856  2022              24628   24628.000000              24628   

     most_common_pitstop_lap  
0                         16  
1                        

In [11]:
constructor_driver_connection = get_drivers_with_constructor_relationship(conn)

In [12]:
drivers_with_constructors = pd.merge(drivers_final_positions, constructor_driver_connection, on=['driverId', 'year'], how='left')
print(drivers_with_constructors)

      year  driverId driver_nationality  driver_points  driver_end_position  \
0     1950       427             French            0.0                   23   
1     1950       498          Argentine            0.0                   23   
2     1950       501           American            0.0                   23   
3     1950       501           American            0.0                   23   
4     1950       509           American            0.0                   23   
...    ...       ...                ...            ...                  ...   
3482  2022       849           Canadian            2.0                   20   
3483  2022       852           Japanese           12.0                   16   
3484  2022       854             German           12.0                   16   
3485  2022       855            Chinese            6.0                   18   
3486  2022       856              Dutch            2.0                   20   

      first_place_count  second_place_count  third_

In [13]:
connstructor_points = get_constructor_points(conn)
print(connstructor_points)

     year  constructorId constructor_nationality  constructor_points  \
0    2022              9                Austrian               759.0   
1    2022              6                 Italian               554.0   
2    2022            131                  German               515.0   
3    2022            214                  French               173.0   
4    2022              1                 British               159.0   
..    ...            ...                     ...                 ...   
895  1958            105                 Italian                 6.0   
896  1958             32                 British                 3.0   
897  1958            125                 British                 0.0   
898  1958            127                 Italian                 0.0   
899  1958             95                  German                 0.0   

     constructor_place  
0                    1  
1                    2  
2                    3  
3                    4  
4         

In [14]:
connstructor_points = connstructor_points.sort_values(by=['year', 'constructorId'])

for index, row in connstructor_points.iterrows():
    temp_df = connstructor_points[(connstructor_points['constructorId'] == row['constructorId']) & (connstructor_points['year'] <= row['year'])]
    
    temp_first_place_count = temp_df[temp_df['constructor_place'] == 1].shape[0]
    temp_second_place_count = temp_df[temp_df['constructor_place'] == 2].shape[0]
    temp_third_place_count = temp_df[temp_df['constructor_place'] == 3].shape[0]
    temp_top_5_count = temp_df[temp_df['constructor_place'] <= 5].shape[0]
    
    connstructor_points.at[index, 'constructor_first_place_count'] = temp_first_place_count
    connstructor_points.at[index, 'constructor_second_place_count'] = temp_second_place_count
    connstructor_points.at[index, 'constructor_third_place_count'] = temp_third_place_count
    connstructor_points.at[index, 'constructor_top_5_count'] = temp_top_5_count

print(connstructor_points)


     year  constructorId constructor_nationality  constructor_points  \
892  1958              6                 Italian                40.0   
896  1958             32                 British                 3.0   
894  1958             66                 British                18.0   
893  1958             87                 British                31.0   
899  1958             95                  German                 0.0   
..    ...            ...                     ...                 ...   
6    2022            117                 British                55.0   
2    2022            131                  German               515.0   
7    2022            210                American                37.0   
8    2022            213                 Italian                35.0   
3    2022            214                  French               173.0   

     constructor_place  constructor_first_place_count  \
892                  2                            0.0   
896                  

In [15]:
drivers_with_constructors = pd.merge(drivers_with_constructors, connstructor_points, on=['constructorId', 'year'], how='left')
print(drivers_with_constructors)

      year  driverId driver_nationality  driver_points  driver_end_position  \
0     1950       427             French            0.0                   23   
1     1950       498          Argentine            0.0                   23   
2     1950       501           American            0.0                   23   
3     1950       501           American            0.0                   23   
4     1950       509           American            0.0                   23   
...    ...       ...                ...            ...                  ...   
3482  2022       849           Canadian            2.0                   20   
3483  2022       852           Japanese           12.0                   16   
3484  2022       854             German           12.0                   16   
3485  2022       855            Chinese            6.0                   18   
3486  2022       856              Dutch            2.0                   20   

      first_place_count  second_place_count  third_

In [16]:
lap_pit_df = pd.merge(lap_times_stats, pit_stop_stats, on=['driverId', 'year'], how='outer', suffixes=('_lap', '_pit'))

lap_pit_df = lap_pit_df.fillna('unknown')

print(lap_pit_df)

     year  driverId  min_lap_time_diff  avg_lap_time_diff  max_lap_time_diff  \
0    2007         1           0.000000           7.852700        1465.791089   
1    2008         1           0.000000           7.666610         101.287948   
2    2009         1           0.074346           6.171578          79.374498   
3    2010         1           0.000000          10.709896        2567.450593   
4    2011         1           0.000000           8.270089        1600.104940   
..    ...       ...                ...                ...                ...   
642  2021       853           2.817997          21.635194        2443.049302   
643  2021       854           2.234354          18.256438        2428.528386   
644  2022       854           0.704944          14.242761        3432.659375   
645  2022       855           0.000000          12.770114        1625.453523   
646  2022       856           3.086993           8.555862          46.221588   

     median_lap_position  total_laps mi

In [17]:
final_df = pd.merge(drivers_with_constructors, lap_pit_df, on=['driverId', 'year'], how='outer')

final_df = final_df.fillna('unknown')

print(final_df)

final_df.to_csv('final-with-pistop-miliseconds.csv', index=False)

      year  driverId driver_nationality driver_points driver_end_position  \
0     1950       427             French           0.0                23.0   
1     1950       498          Argentine           0.0                23.0   
2     1950       501           American           0.0                23.0   
3     1950       501           American           0.0                23.0   
4     1950       509           American           0.0                23.0   
...    ...       ...                ...           ...                 ...   
3486  2022       856              Dutch           2.0                20.0   
3487  2002        19            unknown       unknown             unknown   
3488  2007        28            unknown       unknown             unknown   
3489  1996        58            unknown       unknown             unknown   
3490  1999        72            unknown       unknown             unknown   

     first_place_count second_place_count third_place_count top_5_count  \
