# Web Scraping

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from datetime import datetime
from meteostat import Hourly

In [2]:

# List of URLs for different years
years = [
    "2012", "2013", "2014", "2015", "2016", "2017",
    "2018", "2019", "2020", "2021", "2022", "2023"
]

# Dictionary to store DataFrames for each year
dataframes_by_year = {}

# Iterate through each year's URL
for year in years:
    url = f"https://www.f1cfa.com/f1-tyres-statistics.asp?t={year}&gpn=All&tipo=All&driver=All"
    
    # Send an HTTP GET request and get the content
    response = requests.get(url)
    html = response.content

    # Create a BeautifulSoup object to parse the HTML
    soup = BeautifulSoup(html, 'html.parser')

    # Find the table with the specified id
    table = soup.find('table', {'id': 'circuitos'})

    # Check if the table was found
    if table:
        # Extract table data into a list of lists
        table_data = []
        for row in table.find_all('tr'):
            row_data = [cell.get_text(strip=True) for cell in row.find_all(['th', 'td'])]
            table_data.append(row_data)

        # Convert the list of lists into a pandas DataFrame
        headers = table_data[0]
        data = table_data[1:]
        df = pd.DataFrame(data, columns=headers)
        
        # Store the DataFrame in the dictionary
        dataframes_by_year[year] = df
    else:
        print(f"Table with id 'circuitos' not found for year {year}.")

# Access the DataFrame for a specific year
print("DataFrame for year 2012:")
print(dataframes_by_year["2012"])

DataFrame for year 2012:
                Driver         GP        Tyres From  To #Laps
0        Jenson Button  Australia    Soft Used    1  16    16
1        Jenson Button  Australia   Medium New   17  36    20
2        Jenson Button  Australia   Medium New   37  58    22
3     Sebastian Vettel  Australia    Soft Used    1  16    16
4     Sebastian Vettel  Australia    Soft Used   17  37    21
...                ...        ...          ...  ...  ..   ...
1392    Lewis Hamilton     Brazil     Hard New   19  54    36
1393   Romain Grosjean     Brazil     Hard New    1   5     5
1394  Pastor Maldonado     Brazil  Medium Used    1   2     2
1395       Bruno Senna     Brazil   Medium New    1   1     1
1396      Sergio Perez     Brazil   Medium New    1   1     1

[1397 rows x 6 columns]


In [3]:
print("Info for year 2012:")
print(dataframes_by_year["2012"].info())

Info for year 2012:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1397 entries, 0 to 1396
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Driver  1397 non-null   object
 1   GP      1397 non-null   object
 2   Tyres   1397 non-null   object
 3   From    1397 non-null   object
 4   To      1397 non-null   object
 5   #Laps   1397 non-null   object
dtypes: object(6)
memory usage: 65.6+ KB
None


In [None]:
print(dataframes_by_year["2012"].info())

``````
Round	  Race	             Circuit	                                     Date	
1	  Australian         Grand Prix	Albert Park	                        March 16 – 18	
2	  Malaysian          Grand Prix	Sepang International Circuit	    March 23 – 25	
3	  Chinese            Grand Prix	Shanghai International Circuit	    April 13 – 15	
4	  Bahrain            Grand Prix	Bahrain International Circuit	    April 20 – 22	
5	  Spanish            Grand Prix	Circuit de Catalunya	            May 11 – 13	
6	  Monaco             Grand Prix	Monte-Carlo	                        May 24 – 27	
7	  Canadian           Grand Prix	Circuit Gilles Villeneuve	        June 8 – 10	
8	  European           Grand Prix	Valencia Street Circuit	            June 22 – 24	
9	  British            Grand Prix	Silverstone	                        July 6 – 8	
10	  German             Grand Prix	Hockenheimring	                    July 20 – 22	
11	  Hungarian          Grand Prix	Hungaroring	                        July 27 – 29	
12	  Belgian            Grand Prix	Spa-Francorchamps	                August 31 – September 2	
13	  Italian            Grand Prix	Monza	                            September 7 – 9	
14	  Singapore          Grand Prix	Singapore	                        September 21 – 23	
15	  Japanese           Grand Prix	Suzuka	                            October 5 – 7	
16	  Korean             Grand Prix	Korean International Circuit	    October 12 – 14	
17	  Indian             Grand Prix	Buddh International Circuit	        October 26 – 28	
18	  Abu Dhabi          Grand Prix	Yas Marina	                        November 2 – 4	
19	  United States      Grand Prix	Circuit of the Americas	            November 16 – 18	
20	  Brazilian          Grand Prix	Interlagos	                        November 23 – 25	
``````

In [4]:
print(dataframes_by_year["2013"])

               Driver         GP           Tyres From  To #Laps
0     Fernando Alonso  Australia  Supersoft Used    1   9     9
1     Fernando Alonso  Australia      Medium New   10  20    11
2     Fernando Alonso  Australia      Medium New   21  39    19
3     Fernando Alonso  Australia      Medium New   40  58    19
4      Kimi Räikkönen  Australia  Supersoft Used    1   9     9
...               ...        ...             ...  ...  ..   ...
1352      Charles Pic     Brazil        Hard New   26  58    33
1353  Valtteri Bottas     Brazil      Medium New    1  17    17
1354  Valtteri Bottas     Brazil        Hard New   18  41    24
1355  Valtteri Bottas     Brazil      Medium New   42  45     4
1356  Romain Grosjean     Brazil      Medium New    1   3     3

[1357 rows x 6 columns]


In [5]:
print("Info for year 2013:")
print(dataframes_by_year["2013"].info())

Info for year 2013:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1357 entries, 0 to 1356
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Driver  1357 non-null   object
 1   GP      1357 non-null   object
 2   Tyres   1357 non-null   object
 3   From    1357 non-null   object
 4   To      1357 non-null   object
 5   #Laps   1357 non-null   object
dtypes: object(6)
memory usage: 63.7+ KB
None


``````
Round	    Race	            Circuit	                                         Date	
1	   Australian             Grand Prix	Albert Park                 	    March 15 – 17	
2	   Malaysian              Grand Prix	Sepang International Circuit	    March 22 – 24	
3	   Chinese                Grand Prix	Shanghai International Circuit	    April 12 – 14	
4	   Bahrain                Grand Prix	Bahrain International Circuit	    April 19 – 21	
5	   Spanish                Grand Prix	Circuit de Catalunya	            May 10 – 12	
6	   Monaco                 Grand Prix	Monte-Carlo	                        May 23 – 26	
7	   Canadian               Grand Prix	Circuit Gilles Villeneuve	        June 7 – 9	
8	   British                Grand Prix	Silverstone	                        June 28 – 30	
9	   German                 Grand Prix	Nurburgring	                        July 5 – 7	
10	   Hungarian              Grand Prix	Hungaroring	                        July 26 – 28	
11	   Belgian                Grand Prix	Spa-Francorchamps	                August 23 – 25	
12	   Italian                Grand Prix	Monza	                            September 6 – 8	
13	   Singapore              Grand Prix	Singapore	                        September 20 – 22	
14	   Korean                 Grand Prix	Korean International Circuit	    October 4 – 6	
15	   Japanese               Grand Prix	Suzuka	                            October 11 – 13	
16	   Indian                 Grand Prix	Buddh International Circuit	        October 25 – 27	
17	   Abu Dhabi              Grand Prix	Yas Marina	                        November 1 – 3	
18	   United States          Grand Prix	Circuit of the Americas	            November 15 – 17	
19	   Brazilian              Grand Prix	Interlagos	                         November 22 – 24	
``````

In [6]:
print(dataframes_by_year["2014"])

                Driver         GP          Tyres From  To #Laps
0         Nico Rosberg  Australia       Soft New    1  12    12
1         Nico Rosberg  Australia       Soft New   13  38    26
2         Nico Rosberg  Australia     Medium New   39  57    19
3     Daniel Ricciardo  Australia       Soft New    1  12    12
4     Daniel Ricciardo  Australia       Soft New   13  36    24
...                ...        ...            ...  ...  ..   ...
1212     Jenson Button  Abu Dhabi       Soft New    7  28    22
1213     Jenson Button  Abu Dhabi       Soft New   29  55    27
1214   Nico Hulkenberg  Abu Dhabi       Soft New   16  37    22
1215   Nico Hulkenberg  Abu Dhabi  Supersoft New   38  55    18
1216    Lewis Hamilton     Russia     Medium New   28  53    26

[1217 rows x 6 columns]


In [7]:
print("Info for year 2014:")
print(dataframes_by_year["2014"].info())

Info for year 2014:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1217 entries, 0 to 1216
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Driver  1217 non-null   object
 1   GP      1217 non-null   object
 2   Tyres   1217 non-null   object
 3   From    1217 non-null   object
 4   To      1217 non-null   object
 5   #Laps   1217 non-null   object
dtypes: object(6)
memory usage: 57.2+ KB
None


In [8]:
print(dataframes_by_year["2015"])

                Driver             GP           Tyres From  To #Laps
0       Lewis Hamilton      Australia       Soft Used    1  25    25
1       Lewis Hamilton      Australia      Medium New   26  58    33
2         Nico Rosberg      Australia       Soft Used    1  26    26
3         Nico Rosberg      Australia      Medium New   27  58    32
4     Sebastian Vettel      Australia       Soft Used    1  24    24
...                ...            ...             ...  ...  ..   ...
1058   Valtteri Bottas        Austria  Supersoft Used    1  26    26
1059   Valtteri Bottas        Austria        Soft New   27  71    45
1060   Marcus Ericsson  Great Britain     Medium Used   42  42     1
1061   Valtteri Bottas      Abu Dhabi        Soft New    9  10     2
1062   Valtteri Bottas      Abu Dhabi        Soft New   11  29    19

[1063 rows x 6 columns]


In [9]:
print("Info for year 2015:")
print(dataframes_by_year["2015"].info())

Info for year 2015:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1063 entries, 0 to 1062
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Driver  1063 non-null   object
 1   GP      1063 non-null   object
 2   Tyres   1063 non-null   object
 3   From    1063 non-null   object
 4   To      1063 non-null   object
 5   #Laps   1063 non-null   object
dtypes: object(6)
memory usage: 50.0+ KB
None


In [10]:
print(dataframes_by_year["2016"])

                 Driver         GP           Tyres From  To #Laps
0          Nico Rosberg  Australia  Supersoft Used    1  12    12
1          Nico Rosberg  Australia        Soft New   13  18     6
2          Nico Rosberg  Australia      Medium New   19  57    39
3        Lewis Hamilton  Australia  Supersoft Used    1  16    16
4        Lewis Hamilton  Australia      Medium New   17  57    41
...                 ...        ...             ...  ...  ..   ...
1360      Jenson Button  Abu Dhabi        Soft New    1  12    12
1361    Valtteri Bottas  Abu Dhabi   Ultrasoft New    1   6     6
1362    Kevin Magnussen  Abu Dhabi        Soft New    1   1     1
1363    Kevin Magnussen  Abu Dhabi        Soft New    2   5     4
1364  Esteban Gutierrez  Abu Dhabi        Soft New    9  28    20

[1365 rows x 6 columns]


In [11]:
print("Info for year 2016:")
print(dataframes_by_year["2016"].info())

Info for year 2016:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1365 entries, 0 to 1364
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Driver  1365 non-null   object
 1   GP      1365 non-null   object
 2   Tyres   1365 non-null   object
 3   From    1365 non-null   object
 4   To      1365 non-null   object
 5   #Laps   1365 non-null   object
dtypes: object(6)
memory usage: 64.1+ KB
None


In [12]:
print(dataframes_by_year["2017"])

               Driver         GP           Tyres From  To #Laps
0    Sebastian Vettel  Australia  Ultrasoft Used    1  23    23
1    Sebastian Vettel  Australia        Soft New   24  57    34
2      Lewis Hamilton  Australia  Ultrasoft Used    1  17    17
3      Lewis Hamilton  Australia        Soft New   18  57    40
4     Valtteri Bottas  Australia  Ultrasoft Used    1  25    25
..                ...        ...             ...  ...  ..   ...
913      Carlos Sainz  Abu Dhabi   Ultrasoft New    1  31    31
914      Carlos Sainz  Abu Dhabi   Supersoft New   32  32     1
915  Daniel Ricciardo  Abu Dhabi  Ultrasoft Used    1  19    19
916  Daniel Ricciardo  Abu Dhabi   Supersoft New   20  20     1
917    Lewis Hamilton  Abu Dhabi   Supersoft New   25  55    31

[918 rows x 6 columns]


In [13]:
print("Info for year 2017:")
print(dataframes_by_year["2017"].info())

Info for year 2017:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 918 entries, 0 to 917
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Driver  918 non-null    object
 1   GP      918 non-null    object
 2   Tyres   918 non-null    object
 3   From    918 non-null    object
 4   To      918 non-null    object
 5   #Laps   918 non-null    object
dtypes: object(6)
memory usage: 43.2+ KB
None


In [14]:
print(dataframes_by_year["2018"])

               Driver         GP               Tyres From  To #Laps
0    Sebastian Vettel  Australia      Ultrasoft Used    1  26    26
1    Sebastian Vettel  Australia            Soft New   27  58    32
2      Lewis Hamilton  Australia      Ultrasoft Used    1  19    19
3      Lewis Hamilton  Australia            Soft New   20  58    39
4      Kimi Räikkönen  Australia      Ultrasoft Used    1  18    18
..                ...        ...                 ...  ...  ..   ...
930      Esteban Ocon  Abu Dhabi  Hiper Blando Usado    1  18    18
931      Esteban Ocon  Abu Dhabi       Supersoft New   19  44    26
932   Marcus Ericsson  Abu Dhabi       Supersoft New    1  24    24
933    Kimi Räikkönen  Abu Dhabi      Ultrasoft Used    1   6     6
934   Nico Hulkenberg  Abu Dhabi  Hiper Blando Usado    1   1     1

[935 rows x 6 columns]


In [15]:
print("Info for year 2018:")
print(dataframes_by_year["2018"].info())

Info for year 2018:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 935 entries, 0 to 934
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Driver  935 non-null    object
 1   GP      935 non-null    object
 2   Tyres   935 non-null    object
 3   From    935 non-null    object
 4   To      935 non-null    object
 5   #Laps   935 non-null    object
dtypes: object(6)
memory usage: 44.0+ KB
None


In [16]:
print(dataframes_by_year["2019"])

               Driver         GP       Tyres From  To #Laps
0     Valtteri Bottas  Australia   Soft Used    1  23    23
1     Valtteri Bottas  Australia  Medium New   24  58    35
2      Lewis Hamilton  Australia   Soft Used    1  15    15
3      Lewis Hamilton  Australia  Medium New   16  58    43
4      Max Verstappen  Australia   Soft Used    1  25    25
...               ...        ...         ...  ...  ..   ...
1060    Robert Kubica  Abu Dhabi  Medium New   25  53    29
1061     Lance Stroll  Abu Dhabi  Medium New    1   5     5
1062     Lance Stroll  Abu Dhabi    Hard New    6  23    18
1063     Lance Stroll  Abu Dhabi    Soft New   24  45    22
1064    Robert Kubica  Abu Dhabi    Hard New    1  24    24

[1065 rows x 6 columns]


In [17]:
print("Info for year 2019:")
print(dataframes_by_year["2019"].info())

Info for year 2019:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1065 entries, 0 to 1064
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Driver  1065 non-null   object
 1   GP      1065 non-null   object
 2   Tyres   1065 non-null   object
 3   From    1065 non-null   object
 4   To      1065 non-null   object
 5   #Laps   1065 non-null   object
dtypes: object(6)
memory usage: 50.1+ KB
None


In [18]:
print(dataframes_by_year["2020"])

                Driver         GP       Tyres From  To #Laps
0      Valtteri Bottas    Austria   Soft Used    1  26    26
1      Valtteri Bottas    Austria    Hard New   27  56    30
2      Valtteri Bottas    Austria    Hard New   57  71    15
3      Charles Leclerc    Austria   Soft Used    1  26    26
4      Charles Leclerc    Austria    Hard New   27  51    25
..                 ...        ...         ...  ...  ..   ...
907  Pietro Fittipaldi  Abu Dhabi  Medium New    1  10    10
908  Pietro Fittipaldi  Abu Dhabi    Hard New   11  34    24
909  Pietro Fittipaldi  Abu Dhabi  Medium New   35  48    14
910  Pietro Fittipaldi  Abu Dhabi   Soft Used   49  53     5
911       Sergio Perez  Abu Dhabi    Hard New    1   8     8

[912 rows x 6 columns]


In [19]:
print("Info for year 2020:")
print(dataframes_by_year["2020"].info())

Info for year 2020:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 912 entries, 0 to 911
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Driver  912 non-null    object
 1   GP      912 non-null    object
 2   Tyres   912 non-null    object
 3   From    912 non-null    object
 4   To      912 non-null    object
 5   #Laps   912 non-null    object
dtypes: object(6)
memory usage: 42.9+ KB
None


In [20]:
print(dataframes_by_year["2021"])

               Driver         GP        Tyres From  To #Laps
0      Lewis Hamilton    Bahrain  Medium Used    1  13    13
1      Lewis Hamilton    Bahrain     Hard New   14  28    15
2      Lewis Hamilton    Bahrain     Hard New   29  56    28
3      Max Verstappen    Bahrain  Medium Used    1  17    17
4      Max Verstappen    Bahrain   Medium New   18  39    22
...               ...        ...          ...  ...  ..   ...
1087     Pierre Gasly  Abu Dhabi  Medium Used   37  54    18
1088     Carlos Sainz  Abu Dhabi    Soft Used    1  19    19
1089   Lewis Hamilton  Abu Dhabi    Hard Used   15  58    44
1090  Valtteri Bottas  Abu Dhabi    Hard Used   31  58    28
1091     Esteban Ocon  Abu Dhabi    Hard Used   16  58    43

[1092 rows x 6 columns]


In [21]:
print("Info for year 2021:")
print(dataframes_by_year["2021"].info())

Info for year 2021:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1092 entries, 0 to 1091
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Driver  1092 non-null   object
 1   GP      1092 non-null   object
 2   Tyres   1092 non-null   object
 3   From    1092 non-null   object
 4   To      1092 non-null   object
 5   #Laps   1092 non-null   object
dtypes: object(6)
memory usage: 51.3+ KB
None


In [22]:
print(dataframes_by_year["2022"])

               Driver         GP       Tyres From  To #Laps
0     Charles Leclerc    Bahrain    Soft New    1  15    15
1     Charles Leclerc    Bahrain   Soft Used   16  31    16
2     Charles Leclerc    Bahrain  Medium New   32  46    15
3     Charles Leclerc    Bahrain   Soft Used   47  57    11
4        Carlos Sainz    Bahrain   Soft Used    1  14    14
...               ...        ...         ...  ...  ..   ...
1208  Nicholas Latifi  Abu Dhabi  Medium New    1  15    15
1209  Nicholas Latifi  Abu Dhabi    Hard New   16  38    23
1210  Nicholas Latifi  Abu Dhabi  Medium New   39  55    17
1211  Fernando Alonso  Abu Dhabi  Medium New    1  19    19
1212  Fernando Alonso  Abu Dhabi    Hard New   20  27     8

[1213 rows x 6 columns]


In [23]:
print("Info for year 2022:")
print(dataframes_by_year["2022"].info())

Info for year 2022:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1213 entries, 0 to 1212
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Driver  1213 non-null   object
 1   GP      1213 non-null   object
 2   Tyres   1213 non-null   object
 3   From    1213 non-null   object
 4   To      1213 non-null   object
 5   #Laps   1213 non-null   object
dtypes: object(6)
memory usage: 57.0+ KB
None


In [24]:
print(dataframes_by_year["2023"])

              Driver       GP       Tyres From  To #Laps
0     Max Verstappen  Bahrain   Soft Used    1  14    14
1     Max Verstappen  Bahrain   Soft Used   15  36    22
2     Max Verstappen  Bahrain    Hard New   37  57    21
3       Sergio Perez  Bahrain   Soft Used    1  17    17
4       Sergio Perez  Bahrain   Soft Used   18  34    17
..               ...      ...         ...  ...  ..   ...
654     Carlos Sainz  Belgium  Medium New    8  23    16
655    Oscar Piastri  Belgium  Medium New    1   1     1
656  Charles Leclerc  Belgium    Soft New    1  13    13
657  Charles Leclerc  Belgium  Medium New   14  28    15
658  Charles Leclerc  Belgium    Soft New   29  44    16

[659 rows x 6 columns]


In [25]:
print("Info for year 2023:")
print(dataframes_by_year["2023"].info())

Info for year 2023:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 659 entries, 0 to 658
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Driver  659 non-null    object
 1   GP      659 non-null    object
 2   Tyres   659 non-null    object
 3   From    659 non-null    object
 4   To      659 non-null    object
 5   #Laps   659 non-null    object
dtypes: object(6)
memory usage: 31.0+ KB
None
