# Scrape Defensive Team Statistics from FBREF

In [1]:
import pandas as pd
import requests

# Base URL for fetching data
base_url = "https://fbref.com/en/comps/Big5/{season}/defense/squads/{season}-Big-5-European-Leagues-Stats"

# Seasons to fetch data for
seasons = ["2023-2024", "2022-2023", "2021-2022"]

# API Key (replace 'your_api_key' with your actual API key)
api_key = "N5R7b54EhrQT43jnYTUOsfnjX0Rd4S0HeXKqq-vIV5M"

# List to store the DataFrames
dfs = []

# Headers including the API key for authentication
headers = {"X-API-Key": "N5R7b54EhrQT43jnYTUOsfnjX0Rd4S0HeXKqq-vIV5M"}

# Iterate over the seasons to fetch data
for season in seasons:
    url = base_url.format(season=season)
    
    # Send the request and capture the response
    response = requests.get(url, headers=headers)
    
    try:
        # Ensure the request was successful
        response.raise_for_status()
        
        # Read the table from the HTML response
        tables = pd.read_html(response.text)
        
        # Assuming the first table is the one we need
        df = tables[0]

        # Clean the DataFrame by resetting the columns
        df.columns = df.columns.droplevel(0)  # Flatten MultiIndex
        df = df.rename(columns=lambda x: x if not x.startswith("Unnamed") else " ")  # Clean up column names
        df = df.reset_index(drop=True)  # Reset the index
        
        # Append the cleaned DataFrame to the list
        dfs.append(df)
        print(f"✅ Successfully fetched data for {season}")
    
    except requests.exceptions.RequestException as e:
        print(f"❌ Failed to fetch data for {season}: {e}")
    except Exception as e:
        print(f"❌ Failed to process data for {season}: {e}")

# Ensure dfs is not empty before attempting to concatenate
if dfs:
    final_df = pd.concat(dfs, ignore_index=True)
    print("✅ Successfully combined all data.")
else:
    print("⚠ No data was fetched. Exiting gracefully.")


  tables = pd.read_html(response.text)


✅ Successfully fetched data for 2023-2024


  tables = pd.read_html(response.text)


✅ Successfully fetched data for 2022-2023
✅ Successfully fetched data for 2021-2022
✅ Successfully combined all data.


  tables = pd.read_html(response.text)


In [2]:
dfs

[    Rk          Squad                Comp  # Pl   90s  Tkl  TklW  Def 3rd  \
 0    1         Alavés          es La Liga    30  38.0  578   347      284   
 1    2        Almería          es La Liga    35  38.0  633   372      354   
 2    3        Arsenal  eng Premier League    25  38.0  610   348      257   
 3    4    Aston Villa  eng Premier League    31  38.0  547   311      269   
 4    5       Atalanta          it Serie A    29  38.0  613   369      273   
 ..  ..            ...                 ...   ...   ...  ...   ...      ...   
 91  92     Villarreal          es La Liga    32  38.0  576   348      288   
 92  93  Werder Bremen       de Bundesliga    30  34.0  604   351      303   
 93  94       West Ham  eng Premier League    26  38.0  684   426      350   
 94  95      Wolfsburg       de Bundesliga    28  34.0  550   303      272   
 95  96         Wolves  eng Premier League    27  38.0  739   430      415   
 
     Mid 3rd  Att 3rd  ...  Att  Tkl%  Lost  Blocks   Sh  Pass

# Scrape 

In [3]:
import pandas as pd

base_url_xga = "https://fbref.com/en/comps/Big5/{season}/{season}-Big-5-European-Leagues-Stats"

xga_dfs = []

for season in seasons:
    url = base_url_xga.format(season = season)
    
    try:
        tables = pd.read_html(url)
        df = tables[0]

        df = df.rename(columns = lambda x: x if not x.startswith("Unnamed") else " ")

        df = df.reset_index(drop=True)  # Reset index
        
        xga_dfs.append(df)  # Store the cleaned DataFrame
        
        print(f"Successfully fetched data for {season}")

    except Exception as e:
        print(f"Failed to fetch data for {season}: {e}")

final_df_xga = pd.concat(xga_dfs, ignore_index=True)

Successfully fetched data for 2023-2024
Successfully fetched data for 2022-2023
Successfully fetched data for 2021-2022


Unnamed: 0,Rk,Squad,Country,LgRk,MP,W,D,L,GF,GA,GD,Pts,Pts/MP,xG,xGA,xGD,xGD/90,Attendance,Top Team Scorer,Goalkeeper
0,1,Leverkusen,de GER,1,34,28,6,0,89,24,65,90,2.65,73.6,29.9,43.7,1.29,29994,Victor Boniface - 14,Lukáš Hrádecký
1,2,Real Madrid,es ESP,1,38,29,8,1,87,26,61,95,2.50,68.8,35.4,33.4,0.88,72061,Jude Bellingham - 19,Andriy Lunin
2,3,Inter,it ITA,1,38,29,7,2,89,22,67,94,2.47,78.2,31.2,47.1,1.24,72838,Lautaro Martínez - 24,Yann Sommer
3,4,Manchester City,eng ENG,1,38,28,7,3,96,34,62,91,2.39,80.5,35.6,44.9,1.18,53012,Erling Haaland - 27,Ederson
4,5,Arsenal,eng ENG,2,38,28,5,5,91,29,62,89,2.34,76.1,27.9,48.2,1.27,60236,Bukayo Saka - 16,David Raya
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
287,94,Genoa,it ITA,19,38,4,16,18,27,60,-33,28,0.74,37.2,54.1,-16.9,-0.44,12326,Mattia Destro - 9,Salvatore Sirigu
288,95,Venezia,it ITA,20,38,6,9,23,34,69,-35,27,0.71,36.5,71.8,-35.3,-0.93,6648,Thomas Henry - 9,Niki Mäenpää
289,96,Watford,eng ENG,19,38,6,5,27,34,77,-43,23,0.61,40.0,64.5,-24.5,-0.64,20614,Emmanuel Dennis - 10,Ben Foster
290,97,Norwich City,eng ENG,20,38,5,7,26,23,84,-61,22,0.58,32.3,75.5,-43.2,-1.14,26836,Teemu Pukki - 11,Tim Krul
