In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import time
import random

In [2]:
urls={'Premier League': 'https://fbref.com/en/comps/9/history/Premier-League-Seasons',
    'La Liga': 'https://fbref.com/en/comps/12/history/La-Liga-Seasons',
    'Serie A': 'https://fbref.com/en/comps/11/history/Serie-A-Seasons',
    'Bundesliga': 'https://fbref.com/en/comps/20/history/Bundesliga-Seasons',
    'Ligue 1': 'https://fbref.com/en/comps/13/history/Ligue-1-Seasons'}

#This function gets the urls for each season from the history page
def get_urls(history_url, season_number=15):
    page=requests.get(history_url)
    soup=BeautifulSoup(page.content,'html.parser')
    table=soup.find('table')
    urls=[]
    
    for row in table.find('tbody').find_all('tr'):
        first_cell=row.find('th')
        if first_cell and first_cell.find('a'):
            link=first_cell.find('a')['href']
            full_link='https://fbref.com'+link
            urls.append(full_link)
    
    return urls[:season_number]

#This list stores all the data retrieved
all_data=[]

#Starts from most recent season in order to later count down
starting_year=2025

for league, history_url in urls.items():
    print(f"\n webscraping {league}")
    
    urls=get_urls(history_url)
    year=starting_year
    
    for season_url in urls:
        try:
            tables=pd.read_html(season_url)
            league_table=tables[0]
            time.sleep(random.randint(4, 8))

            #Organise table
            league_table2=league_table[['Rk', 'Squad', 'Pts', 'MP', 'Pts/MP',
                                        'W', 'D', 'L']]
            league_table2.columns=['Position', 'Team', 'Points', 
                                    'Matches Played', 'Points per Match',
                                   'Wins', 'Draws', 'Losses']
            league_table2=league_table2.dropna()

            #Create league table and season year
            league_table2['League']=league
            league_table2['Season']=year

            #Format columns
            league_table2=league_table2[['League', 'Season', 'Position',
                                         'Team', 'Points', 'Matches Played', 
                                         'Points per Match', 'Wins', 'Draws', 'Losses']]

            all_data.append(league_table2)

            print(f"Scraped {league} {year}")
            #Count down the seasons
            year-=1 
        
        except Exception as e:
            print(f"Error in {league} {year}: {e}")


#Put into one dataframe
combined_df=pd.concat(all_data, ignore_index=True)

#Create CSV
combined_df.to_csv('data.csv', index=False)

print("\n Data saved")


 webscraping Premier League
Scraped Premier League 2025
Scraped Premier League 2024
Scraped Premier League 2023
Scraped Premier League 2022
Scraped Premier League 2021
Scraped Premier League 2020
Scraped Premier League 2019
Scraped Premier League 2018
Scraped Premier League 2017
Scraped Premier League 2016
Scraped Premier League 2015
Scraped Premier League 2014
Scraped Premier League 2013
Scraped Premier League 2012
Scraped Premier League 2011

 webscraping La Liga
Scraped La Liga 2025
Scraped La Liga 2024
Scraped La Liga 2023
Scraped La Liga 2022
Scraped La Liga 2021
Scraped La Liga 2020
Scraped La Liga 2019
Scraped La Liga 2018
Scraped La Liga 2017
Scraped La Liga 2016
Scraped La Liga 2015
Scraped La Liga 2014
Scraped La Liga 2013
Scraped La Liga 2012
Scraped La Liga 2011

 webscraping Serie A
Scraped Serie A 2025
Scraped Serie A 2024
Scraped Serie A 2023
Scraped Serie A 2022
Scraped Serie A 2021
Scraped Serie A 2020
Scraped Serie A 2019
Scraped Serie A 2018
Scraped Serie A 2017
Scr