In [None]:
import numpy as np
import scipy 
import scipy.stats
from scipy import stats
import seaborn as sns
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import itertools
from itertools import groupby
import pickle
import os
import math
from sympy import S, symbols
from string import digits
from numpy import *
from scipy.integrate import simps

from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline

%config InlineBackend.figure_format = 'retina'
%matplotlib inline

plt.style.use('fivethirtyeight')
#sns.mpl.rcParams['figure.figsize'] = (16, 10)

# Directory to store pickled dataframes
directory = '/Users/dianaow/Documents/formula-1-race-data/dataframes/'

In [None]:
def read_from_pickle(directory, filename):
    df = pd.DataFrame()
    filepath = directory + filename
    with open(filepath, 'rb') as file:
        df = pickle.load(file)
            
    return df

In [None]:
df_races_maxlaps = read_from_pickle(directory, "df_races_maxlaps.pickle")
df_tyres = read_from_pickle(directory, "df_tyres.pickle")

tyre_degrad_nomin = read_from_pickle(directory, "tyre_degrad_nomin.pickle")
tyre_degrad_top10 = read_from_pickle(directory, "tyre_degrad_top10.pickle")
tyre_degrad_rl = read_from_pickle(directory, "tyre_degrad_rl.pickle")

In [None]:
#ALL_COEFFS = read_from_pickle(directory, "ALL_COEFFS.pickle")
agg_driver_coeffs = read_from_pickle(directory, "agg_driver_coeffs.pickle")

In [None]:
agg_tyre_coeffsC = read_from_pickle(directory, "agg_tyre_coeffsC.pickle")
agg_driver_coeffsC = read_from_pickle(directory, "agg_driver_coeffsC.pickle")
final = read_from_pickle(directory, "final.pickle")

In [None]:
race_names = final.name.unique()
driver_names = final.driverRef.unique()

### Pitstop timings

In [None]:
xl = pd.ExcelFile("./formula-1-race-data/Pitstop_Timings.xlsx", encoding='utf-8')
xl.sheet_names
pitstop_timings = xl.parse("Sheet11")

In [None]:
# Assumption: 2017's pitstop timings will be the same as 2016
# In 2017, Sauber's nasr, was replaced by wehrlein.
# There were a couple of driver switches mid-season in 2017, but let's assume the pitstop timings are constant in all teams
pitstop_timings.replace("nasr", "wehrlein", inplace=True)

In [None]:
ts = pitstop_timings.drop(['Constructor', 'driverRef'], axis=1)
ts = ts.apply(lambda x: x*1000)
pitstop_timings_f = pd.concat([pitstop_timings[['Constructor', 'driverRef']], ts], axis=1)

In [70]:
pitstop_timings_f.head()

Unnamed: 0,Constructor,driverRef,Australian Grand Prix,Bahrain Grand Prix,Chinese Grand Prix,Russian Grand Prix,Spanish Grand Prix,Monaco Grand Prix,Canadian Grand Prix,Azerbaijan Grand Prix,...,German Grand Prix,Belgium Grand Prix,Italian Grand Prix,Singapore Grand Prix,Malaysian Grand Prix,Japanese Grand Prix,United States Grand Prix,Mexican Grand Prix,Brazilian Grand Prix,Abu Dhabi Grand Prix
0,Mercedes,hamilton,21530.0,24510.0,22300.0,29710.0,,26320.0,22240.0,20060.0,...,18480.0,22430.0,23630.0,28320.0,23480.0,22670.0,23530.0,21710.0,1481620.0,21220.0
1,Mercedes,bottas,21530.0,24510.0,22300.0,29710.0,,26320.0,22240.0,20060.0,...,18480.0,22430.0,23630.0,28320.0,23480.0,22670.0,23530.0,21710.0,1481620.0,21220.0
2,Red Bull,ricciardo,21980.0,24220.0,22430.0,31210.0,21680.0,25050.0,22870.0,19840.0,...,18740.0,22530.0,23630.0,28660.0,23860.0,22790.0,23490.0,21780.0,24040.0,21220.0
3,Red Bull,verstappen,21980.0,24220.0,22430.0,31210.0,21680.0,25050.0,22870.0,19840.0,...,18740.0,22530.0,23630.0,28660.0,23860.0,22790.0,23490.0,21780.0,24040.0,21220.0
4,Williams,stroll,21380.0,24090.0,22120.0,29550.0,21380.0,24670.0,22180.0,19450.0,...,18590.0,22190.0,23820.0,28550.0,23530.0,22460.0,23460.0,21860.0,24720.0,21540.0


### Find optimal pit laps and tyre combination

In [None]:
def tyre_combis(tyre_to_use_list, name , year):
    
    maxlaps = df_races_maxlaps[(df_races_maxlaps['name'] == name) & (df_races_maxlaps['year'] == year) ]['total laps'].values[0]
    
    def all_exist(avalue, bvalue):
        return list(all(any(x in y for y in bvalue) for x in avalue))
    
    combins = itertools.combinations_with_replacement(tyre_to_use_list, maxlaps)
    combins_list = list(combins)
    
    return combins_list

In [None]:
def calc_optimal(tyre_to_use_list, name, driver, year, tyre_degrad_models, pitstop_timings, show_table=False): 
    
    timings = []
    tyre_strategy = []
    freqs = []
    strategy = []
    
    # Filter tyre degradation models for the race
    model = tyre_degrad_models[(tyre_degrad_models['name'] == name) & (tyre_degrad_models['driverRef'] == driver)].reset_index(drop=True)
    #model = model[model["coeffs new"] != "Did not run a full stint on this tyre during the race"]
    
    # Generate tyre combinations (Driver must use first tyre specified)
    combis_all = tyre_combis(tyre_to_use_list, name , year)

    if len(tyre_to_use_list) == 2:
        combis = list(filter(lambda x: len(set(x)) > 1, combis_all))
        
    elif len(tyre_to_use_list) == 3:
        combis = list(filter(lambda x: len(set(x)) > 2, combis_all))
        
    elif len(tyre_to_use_list) == 4:
        combis = list(filter(lambda x: len(set(x)) > 3, combis_all))
    
    # Delete intger at end of tyre combination
    def remove_int(x):
        return x.translate(None, digits).rstrip(" ")
    
    combis_new = [map(remove_int, combis[x]) for x in range(len(combis))]
    
    tyre_to_use_list = map(remove_int, tyre_to_use_list)

    # Function to calculate total race time if tyre combination was used
    def calc_time_per_combi(x):

        freq = [len(list(group)) for key, group in groupby(combis[x])]
        #print freq
        #print combis_new[x]

        timing = []
        
        f1 = model[(model['tyre'] == combis_new[x][freq[0]-1]) & (model['stint'] == 0+1)]['coeffs'].reset_index(drop=True)[0]
        f1 = f1[0:freq[0]+1]
        t1 = simps(f1, dx=5)
        timing.append(t1)        
        #print "stint 1 : " + str(combis_new[x][freq[0]-1])
        #print "laps1 : " + str(freq[0])
        #print "y points 1: " + str(f1)
        #print timing
         
        if len(freq) > 1:
            f2 = model[(model['tyre'] == combis_new[x][freq[0]+freq[1]-1]) & (model['stint'] == 1+1)]['coeffs'].reset_index(drop=True)[0]     
            if f2.any():
                f2 = f2[0:freq[1]+1]
                f2 = list(np.asarray(f2) + f1[freq[0]])
                t2 = simps(f2, dx=5)
                timing.append(t2)
                #print str(f1[freq[0]])
                #print "stint 2 : " + str(combis_new[x][freq[0]+freq[1]-1])
                #print "laps 2: " + str(freq[1])
                #print "coeffs 2: " + str(f2)
                #print timing
                
        if len(freq) > 2:
            f3 = model[(model['tyre'] == combis_new[x][freq[0]+freq[1]+freq[2]-1]) & (model['stint'] == 2+1)]['coeffs'].reset_index(drop=True)[0]
            if f3.any():
                f3 = f3[0:freq[2]+1] 
                f3 = list(np.asarray(f3) + f2[freq[1]])
                t3 = simps(f3, dx=5) 
                timing.append(t3)
                #print str(f2[freq[1]])
                #print "stint 3 : " + str(combis_new[x][freq[0]+freq[1]+freq[2]-1])
                #print "laps 3: " + str(freq[2])
                #print "coeffs 3: " + str(f3)
                #print timing

        summed = sum([int(i) for i in timing])
        #print "total : " + str(summed)
        #print "------------------------------"
        
        # Add pitstop timing for each driver
        pt = pitstop_timings[["Constructor", "driverRef", name]]
        ptd = pt[pt['driverRef'] == driver][name].values[0]
        if len(freq) == 3:
            total = summed + ptd*2
        elif len(freq) == 2:
            total = summed + ptd
        elif len(freq) == 1:
            total = summed
        
        return total 

    # Create dataframe of results of all combinations
    for x in range(len(combis)):
        c = calc_time_per_combi(x)
        timings.append(c)
        tyre_strategy.append(combis[x])
        freq = [len(list(group)) for key, group in groupby(combis[x])]
        freqs.append(freq)

    df = pd.DataFrame({"tyre strategy": combis,
                            "tyre freq": freqs,
                            "timings": timings}).sort_values("timings", ascending=True)

    # Find the optimal tyre strategy 
    # Lowest timing: Optimal laps where pitstops should happen
    def find_optimal(df):
        mini = df.iloc[:,0].idxmin()
        optimal = df['tyre freq'][mini]
        strategy = list(df['tyre strategy'][mini])
        timing = df['timings'][mini]
        return optimal, strategy, timing

    optimal, strategy, timing = find_optimal(df)

    if show_table == True:
        return df
    else:
        return optimal, strategy, timing

### Calculate all the optimal pitlaps

#### Pirelli Tyre Choices -> What did Pirelli pick for teams to use?

In [None]:
xl = pd.ExcelFile("/Users/dianaow/Downloads/F1_Tyre_data.xlsx")
xl.sheet_names
pirelli = xl.parse("Sheet7")

In [None]:
pirelli = pirelli[pirelli['year'].isin([2016,2017])]
pirelli.sort_values(['name', 'year'], ascending=True)

In [None]:
# 1: SS, S, M
# 2: S, M, H
# 3: US, SS, S

for i,row in pirelli.iterrows(): 
    if (pirelli.loc[i,'Super Soft'] == 'Super Soft') & (pirelli.loc[i,'Soft'] == 'Soft') & (pirelli.loc[i,'Medium'] == "Medium"):  
        pirelli.loc[i,'mandatory combi'] = float(1)
    elif (pirelli.loc[i,'Soft'] == "Soft") & (pirelli.loc[i,'Medium'] == "Medium") & (pirelli.loc[i,'Hard'] == "Hard"):
        pirelli.loc[i,'mandatory combi'] = float(2)
    elif (pirelli.loc[i,'Super Soft'] == 'Super Soft') & (pirelli.loc[i,'Soft'] == 'Soft') & (pirelli.loc[i,'Ultra Soft'] == 'Ultra Soft'):
        pirelli.loc[i,'mandatory combi'] = float(3)  

In [None]:
pirelli_2017 = pirelli[pirelli['year'].isin([2017])]
pirelli_2017.sort_values(['name', 'year'], ascending=True)

# Race tracks that use US, SS, S in 2017
race_USSSM =  list(pirelli_2017[pirelli_2017['mandatory combi'] == 3]['name'].values)

# Race tracks that use S, M, H in 2017
race_SMH =  list(pirelli_2017[pirelli_2017['mandatory combi'] == 2]['name'].values)

# Race tracks that use SS, S, M in 2017
race_SSSM =  list(pirelli_2017[pirelli_2017['mandatory combi'] == 1]['name'].values)

In [None]:
import itertools
# List of all possible tyre strategies based on pirelli's history of mandatory. 
# SS, S, M combinations
c1 = ['Super soft', 'Soft', 'Medium']

# US, SS, S combinations
c2 = ['Ultra soft', 'Super soft', 'Soft']

# S, M, H combinations
c3 = ['Soft', 'Medium', 'Hard']

#### Generate all possible 1-stop strategies

In [None]:
SSSM = [c1] * 2
combi_1stop_SSSM =  list(itertools.product(*SSSM))
USSM = [c2] * 2
combi_1stop_USSM =  list(itertools.product(*USSM))
SMH = [c3] * 2
combi_1stop_SMH =  list(itertools.product(*SMH))

#### Generate all possible 2-stop strategies

In [None]:
SSSM = [c1] * 3
combi_2stop_SSSM =  list(itertools.product(*SSSM))
USSM = [c2] * 3
combi_2stop_USSM =  list(itertools.product(*USSM))
SMH = [c3] * 3
combi_2stop_SMH =  list(itertools.product(*SMH))

In [None]:
combins_SSSM = combi_1stop_SSSM + combi_2stop_SSSM
combins_USSM = combi_1stop_USSM + combi_2stop_USSM 
combins_SMH = combi_1stop_SMH + combi_2stop_SMH

In [None]:
from collections import Counter # Counter counts the number of occurrences of each item

def suffix_for_dup(lists):
    combins_new = []
    for c in lists:
        c = list(c)
        counts = Counter(c) # so we have: {'name':3, 'state':1, 'city':1, 'zip':2}
        for s,num in counts.items():
            if (num > 1) & (num != len(c)+1): # ignore strings that only appear once
                for suffix in range(1, num + 1): # suffix starts at 1 and increases by 1 each time
                    c[c.index(s)] = s + str(suffix) # replace each appearance of s
                combins_new.append(c)
                       
    return combins_new

In [None]:
combins_SSSM_suffix = suffix_for_dup(combins_SSSM)
combins_USSM_suffix = suffix_for_dup(combins_USSM)
combins_SMH_suffix = suffix_for_dup(combins_SMH)

In [None]:
# To make iteration runs faster, remove combinations with the same tyre type repeated throughout
# as this is not realistic as teams have to use pirelli's mandatory tyre set
combins_SSSM_suffix

In [None]:
def calc_optimal_ALL(df, pitstop_timings_f, race_names, driver_names, combins):

    races = []
    results = []
    tyres = []
    ds = []
    cs = []
    ts = []

    for name, driver, combi in itertools.product(race_names, driver_names, combins):
        try: 
            optimal, strategy, timing = calc_optimal(combi, name, driver, 2016, df, pitstop_timings_f, show_table = False)
            results.append(optimal)
            races.append(name)
            tyres.append(strategy)
            ds.append(driver)
            ts.append(timing)
            
        except:
            results.append("error")
            tyres.append("error")
            ts.append("error")
            races.append(name)
            ds.append(driver)
            
    df = pd.DataFrame({'name': races,
                       'drivers': ds,
                       'total time': ts,
                       'optimal': results,
                       'tyre strategy': tyres})
    
    #df[['crossover point', 'tyre strategy']] = df['optimal'].apply(pd.Series)

    #df = df.drop("optimal", axis=1)
    
    return df

In [None]:
SSM_optimal_chn = calc_optimal_ALL(final, pitstop_timings_f, ['Chinese Grand Prix'], driver_names, combins_SSSM_suffix[4:10])

In [None]:
SSM_optimal_chn1 = calc_optimal_ALL(final, pitstop_timings_f, ['Chinese Grand Prix'], driver_names, [['Super soft', 'Soft'], ['Super soft', 'Medium']])

In [None]:
SSM_optimal_bah1 = calc_optimal_ALL(final, pitstop_timings_f, ['Bahrain Grand Prix'], driver_names, [['Super soft', 'Soft'], ['Super soft', 'Medium']])

In [None]:
SSM_optimal_bah2 = calc_optimal_ALL(final, pitstop_timings_f, ['Bahrain Grand Prix'], driver_names, combins_SSSM_suffix[4:10])

In [None]:
SSM_optimal_aze1 = calc_optimal_ALL(final, pitstop_timings_f, ['Azerbaijan Grand Prix'], driver_names, [['Super soft', 'Soft'], ['Super soft', 'Medium']])

In [None]:
SSM_optimal_aze2 = calc_optimal_ALL(final, pitstop_timings_f, ['Azerbaijan Grand Prix'], driver_names, combins_SSSM_suffix[4:10])

In [64]:
chn = pd.concat([SSM_optimal_chn, SSM_optimal_chn1])
bah = pd.concat([SSM_optimal_bah1, SSM_optimal_bah2])
aze = pd.concat([SSM_optimal_aze1, SSM_optimal_aze2])

In [67]:
chn.groupby(['drivers']).apply(lambda x: (x.sort_values('total time')).head(3))

Unnamed: 0_level_0,Unnamed: 1_level_0,drivers,name,optimal,total time,tyre strategy
drivers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
alonso,5,alonso,Chinese Grand Prix,"[15, 15, 26]",4.12905e+06,"[Super soft, Super soft, Super soft, Super sof..."
alonso,3,alonso,Chinese Grand Prix,"[15, 16, 25]",4.60096e+06,"[Super soft, Super soft, Super soft, Super sof..."
alonso,1,alonso,Chinese Grand Prix,"[16, 15, 25]",4.68822e+06,"[Super soft1, Super soft1, Super soft1, Super ..."
bottas,11,bottas,Chinese Grand Prix,"[15, 15, 26]",4.12863e+06,"[Super soft, Super soft, Super soft, Super sof..."
bottas,9,bottas,Chinese Grand Prix,"[15, 17, 24]",4.57779e+06,"[Super soft, Super soft, Super soft, Super sof..."
bottas,7,bottas,Chinese Grand Prix,"[16, 15, 25]",4.6878e+06,"[Super soft1, Super soft1, Super soft1, Super ..."
button,12,button,Chinese Grand Prix,error,error,error
button,13,button,Chinese Grand Prix,error,error,error
button,14,button,Chinese Grand Prix,error,error,error
ericsson,23,ericsson,Chinese Grand Prix,"[15, 15, 26]",4.13115e+06,"[Super soft, Super soft, Super soft, Super sof..."


In [68]:
aze.groupby(['drivers']).apply(lambda x: (x.sort_values('total time')).head(3))

Unnamed: 0_level_0,Unnamed: 1_level_0,drivers,name,optimal,total time,tyre strategy
drivers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
alonso,5,alonso,Azerbaijan Grand Prix,"[14, 14, 23]",2.70199e+06,"[Super soft, Super soft, Super soft, Super sof..."
alonso,3,alonso,Azerbaijan Grand Prix,"[13, 16, 22]",2.95034e+06,"[Super soft, Super soft, Super soft, Super sof..."
alonso,1,alonso,Azerbaijan Grand Prix,"[14, 14, 23]",3.20977e+06,"[Super soft1, Super soft1, Super soft1, Super ..."
bottas,11,bottas,Azerbaijan Grand Prix,"[14, 14, 23]",2.70173e+06,"[Super soft, Super soft, Super soft, Super sof..."
bottas,9,bottas,Azerbaijan Grand Prix,"[13, 16, 22]",2.95008e+06,"[Super soft, Super soft, Super soft, Super sof..."
bottas,7,bottas,Azerbaijan Grand Prix,"[14, 14, 23]",3.20951e+06,"[Super soft1, Super soft1, Super soft1, Super ..."
button,4,button,Azerbaijan Grand Prix,error,error,error
button,5,button,Azerbaijan Grand Prix,error,error,error
button,12,button,Azerbaijan Grand Prix,error,error,error
ericsson,23,ericsson,Azerbaijan Grand Prix,"[14, 14, 23]",2.70273e+06,"[Super soft, Super soft, Super soft, Super sof..."


In [66]:
chn.to_pickle(os.path.join(directory,  "chn.pickle"))
bah.to_pickle(os.path.join(directory,  "bah.pickle"))
aze.to_pickle(os.path.join(directory,  "aze.pickle"))