In [1]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
from selenium import webdriver

In [16]:
def parse_driver_name(row):
    words = row.split()
    parsed_string = ' '.join(words[:-1])
    return parsed_string

In [62]:
def FP_scrape_results(start,end,num):
    num = str(num)
    FP_results = pd.DataFrame()
    for year in range(start, end):
        races_url = f'https://www.formula1.com/en/results.html/{year}/races.html'
        response = requests.get(races_url)
        soup = BeautifulSoup(response.text, 'html.parser')

        race_links = []
        filter_links = soup.find_all('a', attrs={'class': 'resultsarchive-filter-item-link FilterTrigger'})
        for link in filter_links:
            href = link.get('href')
            if f'/en/results.html/{year}/races/' in href:
                race_links.append(href)

        year_df = pd.DataFrame()

        round_num = 1
        for race_link in race_links:
            FP_link = race_link.replace('race-result.html', f'practice-{num}.html')
            try:
                df = pd.read_html(f'https://www.formula1.com{FP_link}')[0]
                df = df[['Pos','Driver']]
            except:
                try: 
                    ignore = pd.read_html(f'https://www.formula1.com{race_link}')[0]
                    round_num+=1
                    continue
                except:
                    continue
            df['season'] = year
            df['round'] = round_num
            df = df.loc[:, ~df.columns.str.contains('Unnamed')]
            year_df = pd.concat([year_df, df], ignore_index=True)
            round_num += 1

        FP_results = pd.concat([FP_results, year_df], ignore_index=True)
    FP_results.rename(columns={'Pos': f'fp{num}_pos'}, inplace=True)
    return FP_results

# print(FP1_results.shape)


In [63]:
FP1_results = FP_scrape_results(2019,2021,1)
FP2_results = FP_scrape_results(2019,2021,2)
FP3_results = FP_scrape_results(2019,2021,3)

In [64]:
FP1_results["Driver"] = FP1_results["Driver"].apply(parse_driver_name)
FP2_results["Driver"] = FP2_results["Driver"].apply(parse_driver_name)
FP3_results["Driver"] = FP3_results["Driver"].apply(parse_driver_name)

In [65]:
print(FP1_results.head())
print(FP2_results.head())
print(FP3_results.head())

   fp1_pos            Driver  season  round
0        1    Lewis Hamilton    2019      1
1        2  Sebastian Vettel    2019      1
2        3   Charles Leclerc    2019      1
3        4    Max Verstappen    2019      1
4        5   Valtteri Bottas    2019      1
   fp2_pos            Driver  season  round
0        1    Lewis Hamilton    2019      1
1        2   Valtteri Bottas    2019      1
2        3    Max Verstappen    2019      1
3        4      Pierre Gasly    2019      1
4        5  Sebastian Vettel    2019      1
   fp3_pos            Driver  season  round
0        1    Lewis Hamilton    2019      1
1        2  Sebastian Vettel    2019      1
2        3   Charles Leclerc    2019      1
3        4   Romain Grosjean    2019      1
4        5   Kevin Magnussen    2019      1


In [None]:
# FP1_data.to_csv('/Users/anirudhkrishna/GitHub/FormulaData/FP1_data.csv', index=False)

In [66]:
free_practice_results = FP1_results.merge(FP2_results, on=['Driver', 'season', 'round'], how='outer').merge(FP3_results, on=['Driver', 'season', 'round'], how='outer')


In [67]:
print(free_practice_results)

     fp1_pos              Driver  season  round  fp2_pos  fp3_pos
0        1.0      Lewis Hamilton    2019      1      1.0      1.0
1        2.0    Sebastian Vettel    2019      1      5.0      2.0
2        3.0     Charles Leclerc    2019      1      9.0      3.0
3        4.0      Max Verstappen    2019      1      3.0      9.0
4        5.0     Valtteri Bottas    2019      1      2.0      7.0
..       ...                 ...     ...    ...      ...      ...
772      NaN      George Russell    2020      8     20.0     18.0
773      NaN      Kimi Räikkönen    2020     15     17.0     19.0
774      NaN      George Russell    2020     15     20.0     16.0
775      NaN     Kevin Magnussen    2020     17     16.0     18.0
776      NaN  Antonio Giovinazzi    2020     17     17.0     16.0

[777 rows x 6 columns]


In [68]:
free_practice_results.to_csv('/Users/anirudhkrishna/GitHub/FormulaData/free_practice_results.csv', index=False)