In [138]:
import pandas as pd

- `results.csv` joins `races.csv` to get `year`
- `results.csv` joins `drivers.csv` to get `driver`
- `results.csv` joins `constructors.csv` to get `constructor`
- `results.csv` joins `pitStops.csv` to get `duration`

In [139]:
results = pd.read_csv('data/results.csv')
results

Unnamed: 0,resultId,raceId,driverId,constructorId,number,grid,position,positionText,positionOrder,points,laps,time,milliseconds,fastestLap,rank,fastestLapTime,fastestLapSpeed,statusId
0,1,18,1,1,22,1,1,1,1,10.0,58,1:34:50.616,5690616,39,2,1:27.452,218.300,1
1,2,18,2,2,3,5,2,2,2,8.0,58,+5.478,5696094,41,3,1:27.739,217.586,1
2,3,18,3,3,7,7,3,3,3,6.0,58,+8.163,5698779,41,5,1:28.090,216.719,1
3,4,18,4,4,5,11,4,4,4,5.0,58,+17.181,5707797,58,7,1:28.603,215.464,1
4,5,18,5,1,23,3,5,5,5,4.0,58,+18.014,5708630,43,1,1:27.418,218.385,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25835,25841,1096,854,210,47,12,16,16,16,0.0,57,\N,\N,39,12,1:29.833,211.632,11
25836,25842,1096,825,210,20,16,17,17,17,0.0,57,\N,\N,40,20,1:31.158,208.556,11
25837,25843,1096,1,131,44,5,18,18,18,0.0,55,\N,\N,42,11,1:29.788,211.738,9
25838,25844,1096,849,3,6,20,19,19,19,0.0,55,\N,\N,45,14,1:30.309,210.517,130


In [140]:
results = pd.read_csv('data/results.csv')
results = results[['raceId', 'driverId', 'constructorId', 'points', 'position']]

races = pd.read_csv('data/races.csv')
races.rename(columns={'name': 'raceName'}, inplace=True)
races = races[['raceId', 'year', 'raceName']]

drivers = pd.read_csv('data/drivers.csv')
drivers['driverFullname'] = drivers['forename'] + ' ' + drivers['surname']
drivers = drivers[['driverId', 'driverFullname']]
    
constructors = pd.read_csv('data/constructors.csv')
constructors.rename(columns={'name': 'constructorName'}, inplace=True)
constructors = constructors[['constructorId', 'constructorName']]

pitstops = pd.read_csv('data/pit_stops.csv')
pitstops = pitstops[['raceId', 'driverId', 'stop', 'duration', 'milliseconds']]

In [141]:
results = results.merge(races, on='raceId', how='inner')
results = results.merge(drivers, on='driverId', how='left')
results = results.merge(constructors, on='constructorId', how='left')
results = results.merge(pitstops, on=['raceId', 'driverId'], how='left')

---

### Getting `times.csv`

In [142]:
columnsTimes = ['year', 'raceName', 'driverFullname', 'constructorName', 'position', 'stop', 'milliseconds']
times = results[columnsTimes].sort_values(by=['year']) \
                             .dropna(subset=['milliseconds'])

times['duration'] = times['milliseconds'] / 1000

times.to_csv('data/times.csv', index=False)
times.to_csv('../Public-Viz-F1/times.csv', index=False)

---
### Getting `driversWinners.csv` & `constructorsWinners.csv`

In [143]:
columnsDriverWinners = ['year', 'raceId', 'raceName', 'driverFullname', 'points']
totalPoints = results[columnsDriverWinners] \
              .groupby(['year', 'driverFullname'])['points'] \
              .sum() \
              .reset_index(name='points')
              
# keep only the winners per year
driversWinners = totalPoints.loc[totalPoints.groupby('year')['points'].idxmax()] 

# indicator variable for the winners
driversWinners['winner'] = 1

# get the cumulative count of wins
driversWinners['wins'] = driversWinners.groupby(['driverFullname'])['winner'].cumsum()

# Get unique car drivers
car_drivers = driversWinners['driverFullname'].unique()

# Create a new dataframe with all years and cumulative sum for each driver
years = range(driversWinners['year'].min(), driversWinners['year'].max() + 1)
newDriversWinners = pd.DataFrame({'year': years})
for driver in car_drivers:
    drivers_cumsum = driversWinners[driversWinners['driverFullname'] == driver]['year'].value_counts().sort_index().cumsum()
    newDriversWinners[driver] = newDriversWinners['year'].map(drivers_cumsum).fillna(method='ffill').fillna(0)
    # method = 'ffill' gets the last non-null value and fills the nulls with it

# pivot to make the columns turn into rows
newDriversWinners = newDriversWinners.melt(id_vars=['year'], var_name='driverFullname', value_name='wins')

newDriversWinners.to_csv('data/driversWinners.csv', index=False)
newDriversWinners.to_csv('../Public-Viz-F1/driversWinners.csv', index=False)

newDriversWinners

Unnamed: 0,year,driverFullname,wins
0,1950,Nino Farina,1.0
1,1951,Nino Farina,1.0
2,1952,Nino Farina,1.0
3,1953,Nino Farina,1.0
4,1954,Nino Farina,1.0
...,...,...,...
2331,2018,Max Verstappen,0.0
2332,2019,Max Verstappen,0.0
2333,2020,Max Verstappen,0.0
2334,2021,Max Verstappen,0.0


In [144]:
columnsConstructorsWinners = ['year', 'raceId', 'raceName', 'constructorName', 'points']
totalPoints = results[columnsConstructorsWinners] \
              .groupby(['year', 'constructorName'])['points'] \
              .sum() \
              .reset_index(name='points')
              
# keep only the winners per year
constructorsWinners = totalPoints.loc[totalPoints.groupby('year')['points'].idxmax()] 

# indicator variable for the winners
constructorsWinners['winner'] = 1

# get the cumulative count of wins
constructorsWinners['wins'] = constructorsWinners.groupby(['constructorName'])['winner'].cumsum()

# Get unique car constructors
car_constructors = constructorsWinners['constructorName'].unique()

# Create a new dataframe with all years and cumulative sum for each constructor
years = range(constructorsWinners['year'].min(), constructorsWinners['year'].max() + 1)
newConstructorsWinners = pd.DataFrame({'year': years})
for constructor in car_constructors:
    constructor_cumsum = constructorsWinners[constructorsWinners['constructorName'] == constructor]['year'].value_counts().sort_index().cumsum()
    newConstructorsWinners[constructor] = newConstructorsWinners['year'].map(constructor_cumsum).fillna(method='ffill').fillna(0)
    # method = 'ffill' gets the last non-null value and fills the nulls with it

# pivot to make the columns turn into rows
newConstructorsWinners = newConstructorsWinners.melt(id_vars=['year'], var_name='constructorName', value_name='wins')

newConstructorsWinners.to_csv('data/constructorsWinners.csv', index=False)
newConstructorsWinners.to_csv('../Public-Viz-F1/constructorsWinners.csv', index=False)

newConstructorsWinners

Unnamed: 0,year,constructorName,wins
0,1950,Alfa Romeo,1.0
1,1951,Alfa Romeo,1.0
2,1952,Alfa Romeo,1.0
3,1953,Alfa Romeo,1.0
4,1954,Alfa Romeo,1.0
...,...,...,...
1309,2018,Red Bull,4.0
1310,2019,Red Bull,4.0
1311,2020,Red Bull,4.0
1312,2021,Red Bull,4.0
