In [1]:
from weekend_functions import *

In [2]:
track_name = 'australia'

**import directly from google sheets using this info**

https://skills.ai/blog/import-google-sheets-to-pandas/#:~:text=2%3A%20Reading%20Data%20from%20a%20Public%20Google%20Sheet%20into%20Pandas&text=Pandas%20can%20directly%20read%20CSV,format%3Dcsv%26gid%3D%7Bsheet_id%7D%20.

In [9]:
# read in from google sheets the sheet for this weekend's events
weekend_df = pd.read_csv(f'https://docs.google.com/spreadsheets/d/14kBO9LAo4-uPrQlH6xm_Fm2OcNB15xUdnzUbIaRFjOU/export?format=csv&gid={sheet_gid[track_name]}')

fp_df = drops_keep_fp(weekend_df)
fp_df = drop_empties(fp_df)
check_df(fp_df)

fp_df

Column "fp2_australia" has an issue with the finishing positins represented in the data!
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 400]




Unnamed: 0,Team,Driver,fp1_australia,fp2_australia
0,Ferrari,Charles Leclerc,4,1
1,Red Bull Racing-RBPT,Max Verstappen,2,2
2,Ferrari,Carlos Sainz Jr.,8,3
3,Aston Martin Aramco-Mercedes,Lance Stroll,7,4
4,Aston Martin Aramco-Mercedes,Fernando Alonso,18,5
5,Mercedes,George Russell,3,6
6,McLaren-Mercedes,Oscar Piastri,10,7
7,Red Bull Racing-RBPT,Sergio Perez,6,8
8,McLaren-Mercedes,Lando Norris,1,9
9,VisaCashApp RB,Yuki Tsunoda,5,10


In [24]:
def driver_eval_fp(df, track):
    '''
    this function evaluates the drivers' performance progression through the free practice sessions,
    assuming they are trying to drive as quick as possible while they learn the track.
    
    parameters:
    df: dataframe of drivers, teams, and free practice results
    track: string, string name 
    
    returns:
    driver_eval_fp: dataframe, a dataframe with the free practice driver analysis results
    
    '''
    sessions = [x for x in df.columns if 'fp' in x]
    track = ' '.join(track_name.split('_')).title()
    print(f'There are {len(sessions)} practice sessions in this dataframe for {track} in 2024.')
    
    df_session_cols = []
    for i in range(len(sessions)):
        df_session_cols.append(f'FP{i+1} Results')
    
    df_spread_cols = ['FP1-FP2 Change', 'FP2-FP3 Change', 'FP1-FP3 Change']
    
    driver_eval_fp = pd.DataFrame(columns=['Driver'] + df_session_cols + ['Total Finish', 'Average Finish'] + df_spread_cols)    
    
    for driver in df['Driver'].values:
        
        # extract each driver and fp session results for the sessions present in the data, add to new_row which will be built out in this for loop
        new_row = df.loc[df.Driver == driver][['Driver'] + sessions].values.tolist()[0]
        
        # add up total win position
        tot = 0
        for session in sessions:
            tot += int(df.loc[df['Driver'] == driver][session].values)
        
        # calculate average win position
        avg = tot/len(sessions)
        
        # calculate session spreads
        fp1_fp2_change = float('nan')
        fp2_fp3_change = float('nan')
        fp1_fp3_change = float('nan')
        
        for session in sessions:
            if 'fp3' in session:
                fp2_fp3_change = int(df.loc[df.Driver == driver][sessions[1]].values) - int(df.loc[df.Driver == driver][sessions[2]].values)
                fp1_fp3_change = int(df.loc[df.Driver == driver][sessions[0]].values) - int(df.loc[df.Driver == driver][sessions[2]].values)
            if 'fp2' in session: 
                fp1_fp2_change = int(df.loc[df.Driver == driver][sessions[0]].values) - int(df.loc[df.Driver == driver][sessions[1]].values)
                
        new_row.extend([tot, avg, fp1_fp2_change, fp2_fp3_change, fp1_fp3_change])

        driver_eval_fp.loc[len(driver_eval_fp)] = new_row
        
    return driver_eval.dropna(axis=1)

In [25]:
driver_eval = driver_eval_fp(fp_df, track_name)

There are 2 practice sessions in this dataframe for Australia in 2024.


In [28]:
driver_eval = driver_eval.sort_values(by=['Total Finish', 'FP2 Results'])

In [29]:
driver_eval

Unnamed: 0,Driver,FP1 Results,FP2 Results,Total Finish,Average Finish,FP1-FP2 Change
1,Max Verstappen,2,2,4,2.0,0
0,Charles Leclerc,4,1,5,2.5,3
5,George Russell,3,6,9,4.5,-3
8,Lando Norris,1,9,10,5.0,-8
2,Carlos Sainz Jr.,8,3,11,5.5,5
3,Lance Stroll,7,4,11,5.5,3
7,Sergio Perez,6,8,14,7.0,-2
9,Yuki Tsunoda,5,10,15,7.5,-5
6,Oscar Piastri,10,7,17,8.5,3
4,Fernando Alonso,18,5,23,11.5,13


In [30]:
driver_eval.loc[driver_eval.Driver.isin(['Max Verstappen', 'Oscar Piastri', 'Daniel Ricciardo', 'Nico Hulkenberg', 'Pierre Gasly'])]

Unnamed: 0,Driver,FP1 Results,FP2 Results,Total Finish,Average Finish,FP1-FP2 Change
1,Max Verstappen,2,2,4,2.0,0
6,Oscar Piastri,10,7,17,8.5,3
11,Daniel Ricciardo,11,12,23,11.5,-1
14,Pierre Gasly,17,15,32,16.0,2
15,Nico Hulkenberg,16,16,32,16.0,0


In [13]:
def team_eval_fp(df, prac):
    '''
    This function evaluates team strength following each free practice session. This is done on several parameters:
    
    Average Finish: average finishing position of the team based on driver positions. A lower value in this column is better.
    Total Finish: sum of the finishing positions of each driver. A lower value in this column is better.
    Finish Spread: difference between finishing positions of the team's drivers. A lower value in this column means the drivers finished closer together on the grid.
    
    Parameters:
        df: the full odds df to be evaluated. *Important* make sure the free practice session to be evaluated has been added to the dataframe prior to running
        prac: the free practice session to be evaluated, named following the format <session>_<race> like 'fp1_bahrain' or 'fp2_spa' etc.
    
    Returns:
        df: this is a dataframe of the results calculated in the function, sorted by Average Finish, then by Total Finish, then by Finish Spread.
    
    '''
    
    team_eval_fp = pd.DataFrame(columns =['Team', 'Average Finish', 'Total Finish', 'Finish Spread'])
    
    for team in df['Team'].drop_duplicates().values:
        team_sum = df.loc[df['Team'] == team][prac].sum()
        team_avg_finish = df.loc[df['Team'] == team][prac].mean()
        team_spread = df.loc[df['Team'] == team][prac].max() - df.loc[df['Team'] == team][prac].min()

        team_eval_fp.loc[len(team_eval_fp.index)] = [team, team_avg_finish, team_sum, team_spread]
        
    return team_eval_fp.sort_values(by=['Average Finish', 'Total Finish', 'Finish Spread']).reset_index(drop=True)

In [14]:
team_eval_fp1 = team_eval_fp(fp_df, f'fp1_{track_name}')
team_eval_fp1

Unnamed: 0,Team,Average Finish,Total Finish,Finish Spread
0,Red Bull Racing-RBPT,4.0,8,4
1,McLaren-Mercedes,5.5,11,9
2,Ferrari,6.0,12,4
3,Mercedes,6.0,12,6
4,VisaCashApp RB,8.0,16,6
5,Aston Martin Aramco-Mercedes,12.5,25,11
6,Williams-Mercedes,13.0,26,2
7,Haas-Ferrari,14.5,29,3
8,Alpine-Renault,16.0,32,2
9,Stake F1 Team Kick Sauber,19.5,39,1


In [15]:
team_eval_fp2 = team_eval_fp(fp_df, f'fp2_{track_name}')
team_eval_fp2

Unnamed: 0,Team,Average Finish,Total Finish,Finish Spread
0,Ferrari,2.0,4,2
1,Aston Martin Aramco-Mercedes,4.5,9,1
2,Red Bull Racing-RBPT,5.0,10,6
3,McLaren-Mercedes,8.0,16,2
4,VisaCashApp RB,11.0,22,2
5,Mercedes,12.0,24,12
6,Stake F1 Team Kick Sauber,12.5,25,3
7,Alpine-Renault,16.0,32,2
8,Haas-Ferrari,17.5,35,3
9,Williams-Mercedes,206.5,413,387


In [321]:
team_eval_fp3 = team_eval_fp(fp_df, f'fp3_{track_name}')
team_eval_fp3

Unnamed: 0,Team,Average Finish,Total Finish,Finish Spread
0,Ferrari,2.5,5,3
1,Red Bull,5.5,11,5
2,McLaren,6.0,12,2
3,Aston Martin,6.0,12,8
4,Mercedes,9.0,18,6
5,Haas,11.5,23,5
6,RB,12.0,24,2
7,Sauber,16.5,33,1
8,Williams,17.0,34,4
9,Alpine,19.0,38,2
