## F1 DHL Fastest Lap 2020-2022
https://www.formula1.com/en/results.html/2022/fastest-laps.html

In [1056]:
# Import necessary libraries
from bs4 import BeautifulSoup
from csv import writer
import pandas as pd
import requests
from datetime import datetime

In [1057]:
# Reading Webpage, Parse with Beautiful Soup
F1_2022_webpage = requests.get("https://www.formula1.com/en/results.html/2022/fastest-laps.html")
# Initialize a BeautifulSoup object to read and parse the webpage read
# This is like calling the __init__ function in BeautifulSoup
bs_2022 = BeautifulSoup(F1_2022_webpage.content, "html.parser")

F1_2021_webpage = requests.get("https://www.formula1.com/en/results.html/2021/fastest-laps.html")
bs_2021 = BeautifulSoup(F1_2021_webpage.content, "html.parser")

F1_2020_webpage = requests.get("https://www.formula1.com/en/results.html/2020/fastest-laps.html")
bs_2020 = BeautifulSoup(F1_2020_webpage.content, "html.parser")

In [1058]:
table_2022 = bs_2022.find("table")

table_2021 = bs_2021.find("table")

table_2020 = bs_2020.find("table")

In [1059]:
# Finding all the necessary rows from pages
rows_2022 = table_2022.find_all("tr") 
rows_2021 = table_2021.find_all("tr")
rows_2020 = table_2020.find_all("tr")

In [1060]:
final_data = []

for row in rows_2022[1:]:
    cells = row.find_all(["td", "th"])
    cells_text = [cell.get_text(strip = True) for cell in cells]
    for cell in cells_text:  # Adding year to the data
        cells_text.insert(1,int(2022)) 
        final_data.append(cells_text)
        break # this prints list w/ nested lists as values

for row in rows_2021[1:]:
    cells = row.find_all(["td", "th"])
    cells_text = [cell.get_text(strip = True) for cell in cells]

    for cell in cells_text:  # Adding year to the data
        cells_text.insert(1,int(2021)) 
        final_data.append(cells_text)
        break # this prints list w/ nested lists as values
        
for row in rows_2020[1:]:
    cells = row.find_all(["td", "th"])
    cells_text = [cell.get_text(strip = True) for cell in cells]
    # print(cells_text) - uncomment to see below
    # results in ['', 'Bahrain', 'CharlesLeclercLEC', 'Ferrari', '1:34.570', ''] 
    #            ['', 'Saudi Arabia', 'CharlesLeclercLEC', 'Ferrari', '1:31.634', '']
    #            ['', 'Australia', 'CharlesLeclercLEC', 'Ferrari', '1:20.260', '']
    #            etc... 
    for cell in cells_text:  # Adding year to the data
        cells_text.insert(1,int(2020)) 
        final_data.append(cells_text)
        break # this prints list w/ nested lists as values

print(final_data)

[['', 2022, 'Bahrain', 'CharlesLeclercLEC', 'Ferrari', '1:34.570', ''], ['', 2022, 'Saudi Arabia', 'CharlesLeclercLEC', 'Ferrari', '1:31.634', ''], ['', 2022, 'Australia', 'CharlesLeclercLEC', 'Ferrari', '1:20.260', ''], ['', 2022, 'Emilia Romagna', 'MaxVerstappenVER', 'Red Bull Racing RBPT', '1:18.446', ''], ['', 2022, 'Miami', 'MaxVerstappenVER', 'Red Bull Racing RBPT', '1:31.361', ''], ['', 2022, 'Spain', 'SergioPerezPER', 'Red Bull Racing RBPT', '1:24.108', ''], ['', 2022, 'Monaco', 'LandoNorrisNOR', 'McLaren Mercedes', '1:14.693', ''], ['', 2022, 'Azerbaijan', 'SergioPerezPER', 'Red Bull Racing RBPT', '1:46.046', ''], ['', 2022, 'Canada', 'CarlosSainzSAI', 'Ferrari', '1:15.749', ''], ['', 2022, 'Great Britain', 'LewisHamiltonHAM', 'Mercedes', '1:30.510', ''], ['', 2022, 'Austria', 'MaxVerstappenVER', 'Red Bull Racing RBPT', '1:07.275', ''], ['', 2022, 'France', 'CarlosSainzSAI', 'Ferrari', '1:35.781', ''], ['', 2022, 'Hungary', 'LewisHamiltonHAM', 'Mercedes', '1:21.386', ''], ['',

In [1061]:
df = pd.DataFrame(final_data, columns =['','Year', 'Grand Prix', 'Driver', 'Car', 'Lap Time', ''])
del df[""]
df

Unnamed: 0,Year,Grand Prix,Driver,Car,Lap Time
0,2022,Bahrain,CharlesLeclercLEC,Ferrari,1:34.570
1,2022,Saudi Arabia,CharlesLeclercLEC,Ferrari,1:31.634
2,2022,Australia,CharlesLeclercLEC,Ferrari,1:20.260
3,2022,Emilia Romagna,MaxVerstappenVER,Red Bull Racing RBPT,1:18.446
4,2022,Miami,MaxVerstappenVER,Red Bull Racing RBPT,1:31.361
5,2022,Spain,SergioPerezPER,Red Bull Racing RBPT,1:24.108
6,2022,Monaco,LandoNorrisNOR,McLaren Mercedes,1:14.693
7,2022,Azerbaijan,SergioPerezPER,Red Bull Racing RBPT,1:46.046
8,2022,Canada,CarlosSainzSAI,Ferrari,1:15.749
9,2022,Great Britain,LewisHamiltonHAM,Mercedes,1:30.510


In [1062]:
def normalize_laptime (x):
    if ":" not in x:
        return '0:' + x
    else:
        return '0' + x

In [1063]:
# Calling normalize laptime function data
df['Lap Time'] = df['Lap Time'].apply(normalize_laptime)

In [1064]:
# Red Bull changed name from "Red Bull Honda" (2020-2021) to "Red Bull RBPT" (2022); changing all occurences to updated name
def normalize_carname (x):
    if "Red Bull Racing Honda" in x:
        RB_name = x.replace("Red Bull Racing Honda", "Red Bull Racing RBPT")
        return RB_name
    elif "McLaren Mercedes" in x: 
        ML = x.replace("McLaren Mercedes", "McLaren")
        return ML
    elif "McLaren Renault" in x: 
        ML_2 = x.replace("McLaren Renault", "McLaren")
        return ML_2
    elif "Alfa Romeo Ferrari" in x: 
        AR = x.replace("Alfa Romeo Ferrari", "Alfa Romeo") 
        return AR
    elif "AlphaTauri Honda" in x: 
        AT = x.replace("AlphaTauri Honda", "AlphaTauri")
        return AT
    else: 
        return x

In [1065]:
# Calling normalize car function
df['Car'] = df['Car'].apply(normalize_carname)

In [1066]:
df

Unnamed: 0,Year,Grand Prix,Driver,Car,Lap Time
0,2022,Bahrain,CharlesLeclercLEC,Ferrari,01:34.570
1,2022,Saudi Arabia,CharlesLeclercLEC,Ferrari,01:31.634
2,2022,Australia,CharlesLeclercLEC,Ferrari,01:20.260
3,2022,Emilia Romagna,MaxVerstappenVER,Red Bull Racing RBPT,01:18.446
4,2022,Miami,MaxVerstappenVER,Red Bull Racing RBPT,01:31.361
5,2022,Spain,SergioPerezPER,Red Bull Racing RBPT,01:24.108
6,2022,Monaco,LandoNorrisNOR,McLaren,01:14.693
7,2022,Azerbaijan,SergioPerezPER,Red Bull Racing RBPT,01:46.046
8,2022,Canada,CarlosSainzSAI,Ferrari,01:15.749
9,2022,Great Britain,LewisHamiltonHAM,Mercedes,01:30.510


In [1067]:
df['Lap Time'] = pd.to_datetime(df['Lap Time']).dt.strftime('%H:%M')

In [1068]:
# Coverting Lap Time to dtype: float64
def convert(x):
    x = x.split(":")
    x = x[0]+"."+x[1]
    x = float(x)
    return x

df['Lap Time'] = df['Lap Time'].apply(convert)
df

Unnamed: 0,Year,Grand Prix,Driver,Car,Lap Time
0,2022,Bahrain,CharlesLeclercLEC,Ferrari,1.34
1,2022,Saudi Arabia,CharlesLeclercLEC,Ferrari,1.31
2,2022,Australia,CharlesLeclercLEC,Ferrari,1.2
3,2022,Emilia Romagna,MaxVerstappenVER,Red Bull Racing RBPT,1.18
4,2022,Miami,MaxVerstappenVER,Red Bull Racing RBPT,1.31
5,2022,Spain,SergioPerezPER,Red Bull Racing RBPT,1.24
6,2022,Monaco,LandoNorrisNOR,McLaren,1.14
7,2022,Azerbaijan,SergioPerezPER,Red Bull Racing RBPT,1.46
8,2022,Canada,CarlosSainzSAI,Ferrari,1.15
9,2022,Great Britain,LewisHamiltonHAM,Mercedes,1.3


In [1069]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 60 entries, 0 to 59
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Year        60 non-null     int64  
 1   Grand Prix  60 non-null     object 
 2   Driver      60 non-null     object 
 3   Car         60 non-null     object 
 4   Lap Time    60 non-null     float64
dtypes: float64(1), int64(1), object(3)
memory usage: 2.5+ KB


In [1070]:
df.to_csv("F1-DHLFastestLap2020-22.csv", encoding = 'utf-8')
# Saves to Notebook; downloaded onto Mac