# Feature Engineering - SMA of Podium Rank for Constructor

In [28]:
import pandas as pd

In [29]:
df = pd.read_csv('./FULL_F1_DF.csv')

In [30]:
df.head()

Unnamed: 0,season,round,weather_warm,weather_cold,weather_dry,weather_wet,weather_cloudy,driver,grid,podium,...,circuit_id_buddh,circuit_id_fuji,circuit_id_pacific,circuit_id_europe,circuit_id_fair_park,circuit_id_dijon_prenois,circuit_id_rio,direction_a,direction_c,direction_nan
0,1992,16,False,False,False,False,True,mansell,1,20,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,0
1,1995,17,False,False,False,False,False,michael_schumacher,3,15,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,0
2,1988,16,True,False,False,False,True,senna,1,2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,0
3,1994,16,True,False,False,False,True,michael_schumacher,2,20,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,0
4,1993,16,True,False,False,False,False,prost,2,2,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,0


# Initial Processing

### Order data by season and round

In [31]:
df['timestamp'] = df['season'] + df['round'].div(100)

In [32]:
df = df.sort_values('timestamp')

### Undo One-Hot Encode for Nationality

In [33]:
cons = [n for n in df.columns if n.startswith('constructor')]
cons.remove('constructor_points')
cons.remove('constructor_wins')
cons.remove('constructor_standings_pos')
cons

['constructor_alfa',
 'constructor_arrows',
 'constructor_bar',
 'constructor_benetton',
 'constructor_brabham',
 'constructor_ferrari',
 'constructor_footwork',
 'constructor_force_india',
 'constructor_haas',
 'constructor_jaguar',
 'constructor_jordan',
 'constructor_larrousse',
 'constructor_ligier',
 'constructor_lotus_f1',
 'constructor_mclaren',
 'constructor_mercedes',
 'constructor_minardi',
 'constructor_prost',
 'constructor_red_bull',
 'constructor_renault',
 'constructor_sauber',
 'constructor_team_lotus',
 'constructor_toro_rosso',
 'constructor_toyota',
 'constructor_tyrrell',
 'constructor_williams']

In [34]:
df_cons = df[cons]
df_cons

Unnamed: 0,constructor_alfa,constructor_arrows,constructor_bar,constructor_benetton,constructor_brabham,constructor_ferrari,constructor_footwork,constructor_force_india,constructor_haas,constructor_jaguar,...,constructor_minardi,constructor_prost,constructor_red_bull,constructor_renault,constructor_sauber,constructor_team_lotus,constructor_toro_rosso,constructor_toyota,constructor_tyrrell,constructor_williams
2669,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2645,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2647,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2648,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2649,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15035,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
15049,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
15038,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
15046,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [35]:
df['constructor'] = df_cons.idxmax(1)
df

Unnamed: 0,season,round,weather_warm,weather_cold,weather_dry,weather_wet,weather_cloudy,driver,grid,podium,...,circuit_id_pacific,circuit_id_europe,circuit_id_fair_park,circuit_id_dijon_prenois,circuit_id_rio,direction_a,direction_c,direction_nan,timestamp,constructor
2669,1983,1,False,False,True,False,False,manfred_winkelhock,25,14,...,0.0,0.0,0.0,0.0,0.0,1,0,0,1983.01,constructor_alfa
2645,1983,1,False,False,True,False,False,keke_rosberg,1,15,...,0.0,0.0,0.0,0.0,0.0,1,0,0,1983.01,constructor_williams
2647,1983,1,False,False,True,False,False,tambay,3,4,...,0.0,0.0,0.0,0.0,0.0,1,0,0,1983.01,constructor_ferrari
2648,1983,1,False,False,True,False,False,piquet,4,1,...,0.0,0.0,0.0,0.0,0.0,1,0,0,1983.01,constructor_brabham
2649,1983,1,False,False,True,False,False,warwick,5,7,...,0.0,0.0,0.0,0.0,0.0,1,0,0,1983.01,constructor_alfa
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15035,2022,22,True,False,False,False,False,hamilton,5,18,...,0.0,0.0,0.0,0.0,0.0,1,0,0,2022.22,constructor_mercedes
15049,2022,22,True,False,False,False,False,latifi,20,19,...,0.0,0.0,0.0,0.0,0.0,1,0,0,2022.22,constructor_williams
15038,2022,22,True,False,False,False,False,vettel,9,10,...,0.0,0.0,0.0,0.0,0.0,1,0,0,2022.22,constructor_alfa
15046,2022,22,True,False,False,False,False,gasly,17,14,...,0.0,0.0,0.0,0.0,0.0,1,0,0,2022.22,constructor_alfa


# Creating Moving Averages

We will split the dataframe apart into single nationality dataframes, create SMA's for each nationality's podium rankings, then merge them back together.
Since the rolling average doesn't work for the first 30 entries, we'll just impute the ordinary mean at first.

In [36]:
df_final = df[0:0]
df_final['podium_SMA_constructor'] = None
df_final.head()

df_temp = df[0:0]
df_temp['podium_SMA_constructor'] = None

In [38]:
final_cols = list(df_final.columns)
temp_cols = list(df_temp.columns)
for i in range(0,len(final_cols)):
    print(final_cols[i] == temp_cols[i])

True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True


In [39]:
cons

['constructor_alfa',
 'constructor_arrows',
 'constructor_bar',
 'constructor_benetton',
 'constructor_brabham',
 'constructor_ferrari',
 'constructor_footwork',
 'constructor_force_india',
 'constructor_haas',
 'constructor_jaguar',
 'constructor_jordan',
 'constructor_larrousse',
 'constructor_ligier',
 'constructor_lotus_f1',
 'constructor_mclaren',
 'constructor_mercedes',
 'constructor_minardi',
 'constructor_prost',
 'constructor_red_bull',
 'constructor_renault',
 'constructor_sauber',
 'constructor_team_lotus',
 'constructor_toro_rosso',
 'constructor_toyota',
 'constructor_tyrrell',
 'constructor_williams']

In [40]:
for constructor in cons:
    df_temp = df.loc[df[constructor] == 1]
    df_temp['podium_SMA_constructor'] = df_temp['podium'].rolling(30).mean()
    df_temp = df_temp.fillna(df_temp['podium'].head(30).mean())

    df_temp = df.loc[df[constructor] == 1]
    df_temp['podium_SMA_constructor'] = df_temp['podium'].rolling(30).mean()
    df_temp = df_temp.fillna(df_temp['podium'].head(30).mean())
    df_final = pd.concat([df_final, df_temp], axis=0)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_temp['podium_SMA_constructor'] = df_temp['podium'].rolling(30).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_temp['podium_SMA_constructor'] = df_temp['podium'].rolling(30).mean()


In [41]:
df_final['constructor'].unique()

array(['constructor_alfa', 'constructor_arrows', 'constructor_bar',
       'constructor_benetton', 'constructor_brabham',
       'constructor_ferrari', 'constructor_footwork',
       'constructor_force_india', 'constructor_haas',
       'constructor_jaguar', 'constructor_jordan',
       'constructor_larrousse', 'constructor_ligier',
       'constructor_lotus_f1', 'constructor_mclaren',
       'constructor_mercedes', 'constructor_minardi', 'constructor_prost',
       'constructor_red_bull', 'constructor_renault',
       'constructor_sauber', 'constructor_team_lotus',
       'constructor_toro_rosso', 'constructor_toyota',
       'constructor_tyrrell', 'constructor_williams'], dtype=object)

In [42]:
df_final = df_final.sort_values('timestamp')

In [43]:
df_final

Unnamed: 0,season,round,weather_warm,weather_cold,weather_dry,weather_wet,weather_cloudy,driver,grid,podium,...,circuit_id_europe,circuit_id_fair_park,circuit_id_dijon_prenois,circuit_id_rio,direction_a,direction_c,direction_nan,timestamp,constructor,podium_SMA_constructor
2654,1983,1,False,False,True,False,False,baldi,10,21,...,0.0,0.0,0.0,0.0,1,0,0,1983.01,constructor_alfa,14.400000
2645,1983,1,False,False,True,False,False,keke_rosberg,1,15,...,0.0,0.0,0.0,0.0,1,0,0,1983.01,constructor_williams,9.766667
2662,1983,1,False,False,True,False,False,laffite,18,3,...,0.0,0.0,0.0,0.0,1,0,0,1983.01,constructor_williams,9.766667
2652,1983,1,False,False,True,False,False,cheever,8,18,...,0.0,0.0,0.0,0.0,1,0,0,1983.01,constructor_renault,9.466667
2646,1983,1,False,False,True,False,False,prost,2,6,...,0.0,0.0,0.0,0.0,1,0,0,1983.01,constructor_renault,9.466667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15036,2022,22,True,False,False,False,False,norris,7,6,...,0.0,0.0,0.0,0.0,1,0,0,2022.22,constructor_mclaren,10.033333
15035,2022,22,True,False,False,False,False,hamilton,5,18,...,0.0,0.0,0.0,0.0,1,0,0,2022.22,constructor_mercedes,5.566667
2449,2022,22,True,False,False,False,False,russell,6,5,...,0.0,0.0,0.0,0.0,1,0,0,2022.22,constructor_mercedes,5.166667
1037,2022,22,True,False,False,False,False,leclerc,3,2,...,0.0,0.0,0.0,0.0,1,0,0,2022.22,constructor_ferrari,6.766667
