In [1]:
# We first need to import the packages that will be used by the code
from bs4 import BeautifulSoup as soup
import requests
 


In [2]:
# Provide the url to the requests call to fetch the required html
url = 'https://www.formula1.com/en/results.html/2021/races/1073/hungary/race-result.html'
html = requests.get(url, verify=True)

In [3]:
# Use Beautiful Soup to parse the html document
page_soup = soup(html.text, 'html.parser')

In [4]:
# A way to make the code more aesthetic 
pretty_page = page_soup.prettify()

In [5]:
# Find the table that holds the archived result of the f1 race
race_results = page_soup.find("table",{"class":"resultsarchive-table"})

In [6]:
# Access the rows of the table and get wanted information
results_rows = race_results.findChildren(['tr'])
for row in results_rows:
    cells = row.findChildren()
    print("Place: " + cells[1].text + ", Driver: " + cells[3].text)

Place: Pos, Driver: No
Place: 1, Driver: 
Esteban
Ocon
OCO

Place: DQ, Driver: 
Sebastian
Vettel
VET

Place: 2, Driver: 
Lewis
Hamilton
HAM

Place: 3, Driver: 
Carlos
Sainz
SAI

Place: 4, Driver: 
Fernando
Alonso
ALO

Place: 5, Driver: 
Pierre
Gasly
GAS

Place: 6, Driver: 
Yuki
Tsunoda
TSU

Place: 7, Driver: 
Nicholas
Latifi
LAT

Place: 8, Driver: 
George
Russell
RUS

Place: 9, Driver: 
Max
Verstappen
VER

Place: 10, Driver: 
Kimi
RÃ¤ikkÃ¶nen
RAI

Place: 11, Driver: 
Daniel
Ricciardo
RIC

Place: 12, Driver: 
Mick
Schumacher
MSC

Place: 13, Driver: 
Antonio
Giovinazzi
GIO

Place: NC, Driver: 
Nikita
Mazepin
MAZ

Place: NC, Driver: 
Lando
Norris
NOR

Place: NC, Driver: 
Valtteri
Bottas
BOT

Place: NC, Driver: 
Sergio
Perez
PER

Place: NC, Driver: 
Charles
Leclerc
LEC

Place: NC, Driver: 
Lance
Stroll
STR



In [7]:
# The text of the drivers name is not formated nicely, so we will format them
places_array = []
for row in results_rows:
    cells = row.findChildren()
    names = cells[3].c()
    driver = ''
    
    # I am not sure why I can't directly access the names so we create a loop and a counter to know when we are at which child
    # Html element
    i = 0
    for name in names:
        if (i == 2):
            driver = driver + "(" + name.text + ')'
        else:
            driver = driver + name.text + ' ' 
        i = i + 1
    
    print("Place: " + cells[1].text + ", Driver: " + driver)
    places_array.append("Place: " + cells[1].text + ", Driver: " + driver)


Place: Pos, Driver: No 
Place: 1, Driver: Esteban Ocon (OCO)
Place: DQ, Driver: Sebastian Vettel (VET)
Place: 2, Driver: Lewis Hamilton (HAM)
Place: 3, Driver: Carlos Sainz (SAI)
Place: 4, Driver: Fernando Alonso (ALO)
Place: 5, Driver: Pierre Gasly (GAS)
Place: 6, Driver: Yuki Tsunoda (TSU)
Place: 7, Driver: Nicholas Latifi (LAT)
Place: 8, Driver: George Russell (RUS)
Place: 9, Driver: Max Verstappen (VER)
Place: 10, Driver: Kimi RÃ¤ikkÃ¶nen (RAI)
Place: 11, Driver: Daniel Ricciardo (RIC)
Place: 12, Driver: Mick Schumacher (MSC)
Place: 13, Driver: Antonio Giovinazzi (GIO)
Place: NC, Driver: Nikita Mazepin (MAZ)
Place: NC, Driver: Lando Norris (NOR)
Place: NC, Driver: Valtteri Bottas (BOT)
Place: NC, Driver: Sergio Perez (PER)
Place: NC, Driver: Charles Leclerc (LEC)
Place: NC, Driver: Lance Stroll (STR)


In [8]:
# The position string needs to be removed and the dq driver moved to the end so use the find string function
# to check if the word contains the other word
for string in places_array:
    if string.find("Place: Pos") != -1:
        places_array.remove(string)
    if string.find("Place: DQ") != -1:
        places_array.remove(string)
        places_array.append(string)
for string in places_array:
    print(string)

Place: 1, Driver: Esteban Ocon (OCO)
Place: 2, Driver: Lewis Hamilton (HAM)
Place: 3, Driver: Carlos Sainz (SAI)
Place: 4, Driver: Fernando Alonso (ALO)
Place: 5, Driver: Pierre Gasly (GAS)
Place: 6, Driver: Yuki Tsunoda (TSU)
Place: 7, Driver: Nicholas Latifi (LAT)
Place: 8, Driver: George Russell (RUS)
Place: 9, Driver: Max Verstappen (VER)
Place: 10, Driver: Kimi RÃ¤ikkÃ¶nen (RAI)
Place: 11, Driver: Daniel Ricciardo (RIC)
Place: 12, Driver: Mick Schumacher (MSC)
Place: 13, Driver: Antonio Giovinazzi (GIO)
Place: NC, Driver: Nikita Mazepin (MAZ)
Place: NC, Driver: Lando Norris (NOR)
Place: NC, Driver: Valtteri Bottas (BOT)
Place: NC, Driver: Sergio Perez (PER)
Place: NC, Driver: Charles Leclerc (LEC)
Place: NC, Driver: Lance Stroll (STR)
Place: DQ, Driver: Sebastian Vettel (VET)


In [9]:
# The above information does not contain the race name, so lets find that and add it to the begining of our list
race_name = page_soup.find("h1",{"class":"ResultsArchiveTitle"}).text.strip()
places_array.insert(0,race_name)
for string in places_array:
    print(string)

FORMULA 1 ROLEX MAGYAR NAGYDÃJ 2021
         - RACE RESULT
Place: 1, Driver: Esteban Ocon (OCO)
Place: 2, Driver: Lewis Hamilton (HAM)
Place: 3, Driver: Carlos Sainz (SAI)
Place: 4, Driver: Fernando Alonso (ALO)
Place: 5, Driver: Pierre Gasly (GAS)
Place: 6, Driver: Yuki Tsunoda (TSU)
Place: 7, Driver: Nicholas Latifi (LAT)
Place: 8, Driver: George Russell (RUS)
Place: 9, Driver: Max Verstappen (VER)
Place: 10, Driver: Kimi RÃ¤ikkÃ¶nen (RAI)
Place: 11, Driver: Daniel Ricciardo (RIC)
Place: 12, Driver: Mick Schumacher (MSC)
Place: 13, Driver: Antonio Giovinazzi (GIO)
Place: NC, Driver: Nikita Mazepin (MAZ)
Place: NC, Driver: Lando Norris (NOR)
Place: NC, Driver: Valtteri Bottas (BOT)
Place: NC, Driver: Sergio Perez (PER)
Place: NC, Driver: Charles Leclerc (LEC)
Place: NC, Driver: Lance Stroll (STR)
Place: DQ, Driver: Sebastian Vettel (VET)


In [10]:
# The above logic can be replicated bellow, and a different url for a different race can be provided with
# the same formatting result achieved (the above names were appended with a 1)

url1 = 'https://www.formula1.com/en/results.html/2021/races/1064/bahrain/race-result.html'
html1 = requests.get(url1, verify=True)
page_soup1 = soup(html1.text, 'html.parser')
pretty_page1 = page_soup1.prettify()
#print(pretty_page)
race_results1 = page_soup1.find("table",{"class":"resultsarchive-table"})
results_rows1 = race_results1.findChildren(['tr'])
    
# The text of the drivers name is not formated nicely, so we will format them
places_array1 = []
for row in results_rows1:
    cells = row.findChildren()
    names = cells[3].findChildren()
    driver = ''
    i = 0
    for name in names:
        if (i == 2):
            driver = driver + "(" + name.text + ')'
        else:
            driver = driver + name.text + ' ' 
        i = i + 1
    
    #print("Place: " + cells[1].text + ", Driver: " + driver)
    places_array1.append("Place: " + cells[1].text + ", Driver: " + driver)

# The position string needs to be removed and the dq driver moved to the end
for string in places_array1:
    if string.find("Place: Pos") != -1:
        places_array1.remove(string)
    if string.find("Place: DQ") != -1:
        places_array1.remove(string)
        places_array1.append(string)

# The above information does not contain the race name, so lets find that and add it to the begining of our list
race_name1 = page_soup1.find("h1",{"class":"ResultsArchiveTitle"}).text.strip()
places_array1.insert(0,race_name1)
for string in places_array1:
    print(string)

FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2021
         - RACE RESULT
Place: 1, Driver: Lewis Hamilton (HAM)
Place: 2, Driver: Max Verstappen (VER)
Place: 3, Driver: Valtteri Bottas (BOT)
Place: 4, Driver: Lando Norris (NOR)
Place: 5, Driver: Sergio Perez (PER)
Place: 6, Driver: Charles Leclerc (LEC)
Place: 7, Driver: Daniel Ricciardo (RIC)
Place: 8, Driver: Carlos Sainz (SAI)
Place: 9, Driver: Yuki Tsunoda (TSU)
Place: 10, Driver: Lance Stroll (STR)
Place: 11, Driver: Kimi RÃ¤ikkÃ¶nen (RAI)
Place: 12, Driver: Antonio Giovinazzi (GIO)
Place: 13, Driver: Esteban Ocon (OCO)
Place: 14, Driver: George Russell (RUS)
Place: 15, Driver: Sebastian Vettel (VET)
Place: 16, Driver: Mick Schumacher (MSC)
Place: 17, Driver: Pierre Gasly (GAS)
Place: 18, Driver: Nicholas Latifi (LAT)
Place: NC, Driver: Fernando Alonso (ALO)
Place: NC, Driver: Nikita Mazepin (MAZ)


In [11]:
# We can also turn the above logic into a function so that we can dynamically provide whatever url we want
def drivers_position(url):
    html1 = requests.get(url, verify=True)
    page_soup1 = soup(html1.text, 'html.parser')
    pretty_page1 = page_soup1.prettify()
    #print(pretty_page)
    race_results1 = page_soup1.find("table",{"class":"resultsarchive-table"})
    results_rows1 = race_results1.findChildren(['tr'])

    # The text of the drivers name is not formated nicely, so we will format them
    places_array1 = []
    for row in results_rows1:
        cells = row.findChildren()
        names = cells[3].findChildren()
        driver = ''
        i = 0
        for name in names:
            if (i == 2):
                driver = driver + "(" + name.text + ')'
            else:
                driver = driver + name.text + ' ' 
            i = i + 1

        #print("Place: " + cells[1].text + ", Driver: " + driver)
        places_array1.append("Place: " + cells[1].text + ", Driver: " + driver)

    # The position string needs to be removed and the dq driver moved to the end
    for string in places_array1:
        if string.find("Place: Pos") != -1:
            places_array1.remove(string)
        if string.find("Place: DQ") != -1:
            places_array1.remove(string)
            places_array1.append(string)

    # The above information does not contain the race name, so lets find that and add it to the begining of our list
    race_name1 = page_soup1.find("h1",{"class":"ResultsArchiveTitle"}).text.strip()
    places_array1.insert(0,race_name1)
    for string in places_array1:
        print(string)

# We can now provide any url we like (even urls that dont work so we need to be careful)
Urls = []
Urls.append("https://www.formula1.com/en/results.html/2021/races/1064/bahrain/race-result.html")
Urls.append("https://www.formula1.com/en/results.html/2021/races/1066/portugal/race-result.html")
Urls.append("https://www.formula1.com/en/results.html/2021/races/1086/spain/race-result.html")
Urls.append("https://www.formula1.com/en/results.html/2021/races/1067/monaco/race-result.html")
Urls.append("https://www.formula1.com/en/results.html/2021/races/1068/azerbaijan/race-result.html")
Urls.append("https://www.formula1.com/en/results.html/2021/races/1070/france/race-result.html")
Urls.append("https://www.formula1.com/en/results.html/2021/races/1092/austria/race-result.html")

# now we can loop through the urls calling the function each time
for url in Urls:
    drivers_position(url)
    print("")

FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2021
         - RACE RESULT
Place: 1, Driver: Lewis Hamilton (HAM)
Place: 2, Driver: Max Verstappen (VER)
Place: 3, Driver: Valtteri Bottas (BOT)
Place: 4, Driver: Lando Norris (NOR)
Place: 5, Driver: Sergio Perez (PER)
Place: 6, Driver: Charles Leclerc (LEC)
Place: 7, Driver: Daniel Ricciardo (RIC)
Place: 8, Driver: Carlos Sainz (SAI)
Place: 9, Driver: Yuki Tsunoda (TSU)
Place: 10, Driver: Lance Stroll (STR)
Place: 11, Driver: Kimi RÃ¤ikkÃ¶nen (RAI)
Place: 12, Driver: Antonio Giovinazzi (GIO)
Place: 13, Driver: Esteban Ocon (OCO)
Place: 14, Driver: George Russell (RUS)
Place: 15, Driver: Sebastian Vettel (VET)
Place: 16, Driver: Mick Schumacher (MSC)
Place: 17, Driver: Pierre Gasly (GAS)
Place: 18, Driver: Nicholas Latifi (LAT)
Place: NC, Driver: Fernando Alonso (ALO)
Place: NC, Driver: Nikita Mazepin (MAZ)

FORMULA 1 HEINEKEN GRANDE PRÃMIO DE PORTUGAL 2021
         - RACE RESULT
Place: 1, Driver: Lewis Hamilton (HAM)
Place: 2, Driver: Max Versta

In [12]:
# Now we want to navigate to the pitstop information on the f1 page
# instead of finding the url's ourselves, we will find and replace race result with pitstop
new_urls = []
for url in Urls:
    new_url = url.replace("race-result.html", "pit-stop-summary.html")
    new_urls.append(new_url)
    print(new_url)

https://www.formula1.com/en/results.html/2021/races/1064/bahrain/pit-stop-summary.html
https://www.formula1.com/en/results.html/2021/races/1066/portugal/pit-stop-summary.html
https://www.formula1.com/en/results.html/2021/races/1086/spain/pit-stop-summary.html
https://www.formula1.com/en/results.html/2021/races/1067/monaco/pit-stop-summary.html
https://www.formula1.com/en/results.html/2021/races/1068/azerbaijan/pit-stop-summary.html
https://www.formula1.com/en/results.html/2021/races/1070/france/pit-stop-summary.html
https://www.formula1.com/en/results.html/2021/races/1092/austria/pit-stop-summary.html


In [18]:
# Now we will create the function that lets us pass in a url, and outputs the driver who spent the most time in the pit lane
# First we need to access each drivers total time spent in the pitlane

# Note i got stuck here and could not acheive this, due to the intricacies involved in drivers appearing multiple times

url2 = new_urls[0]
html2 = requests.get(url2, verify=True)
page_soup2 = soup(html2.text, 'html.parser')
race_results2 = page_soup2.find("table",{"class":"resultsarchive-table"})
results_rows2 = race_results2.findChildren(['tr'])
for row in results_rows2:
    cells = row.findChildren()
    print("Driver: " + cells[2].text + ", Time spent in pitlane: " + cells[10].text)

Driver: No, Time spent in pitlane: 
Driver: 11, Time spent in pitlane: 23.993
Driver: 10, Time spent in pitlane: 38.338
Driver: 14, Time spent in pitlane: 24.373
Driver: 4, Time spent in pitlane: 24.899
Driver: 16, Time spent in pitlane: 24.925
Driver: 18, Time spent in pitlane: 24.884
Driver: 99, Time spent in pitlane: 31.998
Driver: 44, Time spent in pitlane: 24.839
Driver: 3, Time spent in pitlane: 24.688
Driver: 7, Time spent in pitlane: 24.107
Driver: 31, Time spent in pitlane: 25.226
Driver: 63, Time spent in pitlane: 24.621
Driver: 6, Time spent in pitlane: 26.046
Driver: 47, Time spent in pitlane: 25.798
Driver: 55, Time spent in pitlane: 24.353
Driver: 22, Time spent in pitlane: 25.046
Driver: 77, Time spent in pitlane: 24.262
Driver: 33, Time spent in pitlane: 24.767
Driver: 11, Time spent in pitlane: 24.105
Driver: 10, Time spent in pitlane: 24.317
Driver: 5, Time spent in pitlane: 24.626
Driver: 44, Time spent in pitlane: 24.076
Driver: 18, Time spent in pitlane: 25.525
Dri