# **Scraping FC Barcelona Stats from fbref website**

In [1]:
!pip install playwright

Collecting playwright
  Downloading playwright-1.52.0-py3-none-manylinux1_x86_64.whl.metadata (3.5 kB)
Collecting pyee<14,>=13 (from playwright)
  Downloading pyee-13.0.0-py3-none-any.whl.metadata (2.9 kB)
Downloading playwright-1.52.0-py3-none-manylinux1_x86_64.whl (45.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.1/45.1 MB[0m [31m14.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyee-13.0.0-py3-none-any.whl (15 kB)
Installing collected packages: pyee, playwright
Successfully installed playwright-1.52.0 pyee-13.0.0


In [2]:
!playwright install

Downloading Chromium 136.0.7103.25 (playwright build v1169)[2m from https://cdn.playwright.dev/dbazure/download/playwright/builds/chromium/1169/chromium-linux.zip[22m
[1G167.7 MiB [] 0% 0.0s[0K[1G167.7 MiB [] 0% 50.3s[0K[1G167.7 MiB [] 0% 28.3s[0K[1G167.7 MiB [] 0% 16.8s[0K[1G167.7 MiB [] 0% 8.9s[0K[1G167.7 MiB [] 1% 5.7s[0K[1G167.7 MiB [] 2% 4.4s[0K[1G167.7 MiB [] 3% 3.6s[0K[1G167.7 MiB [] 4% 3.0s[0K[1G167.7 MiB [] 5% 2.8s[0K[1G167.7 MiB [] 5% 2.6s[0K[1G167.7 MiB [] 6% 2.8s[0K[1G167.7 MiB [] 6% 2.7s[0K[1G167.7 MiB [] 7% 2.7s[0K[1G167.7 MiB [] 8% 2.6s[0K[1G167.7 MiB [] 9% 2.4s[0K[1G167.7 MiB [] 10% 2.4s[0K[1G167.7 MiB [] 11% 2.3s[0K[1G167.7 MiB [] 12% 2.2s[0K[1G167.7 MiB [] 13% 2.2s[0K[1G167.7 MiB [] 14% 2.0s[0K[1G167.7 MiB [] 15% 2.0s[0K[1G167.7 MiB [] 16% 1.9s[0K[1G167.7 MiB [] 18% 1.8s[0K[1G167.7 MiB [] 19% 1.7s[0K[1G167.7 MiB [] 20% 1.7s[0K[1G167.7 MiB [] 21% 1.6s[0K[1G167.7 MiB [] 22% 1.6s[0K[1G167.7 MiB [] 23% 1.5s[0K[1

In [3]:
!pip install fake_useragent

Collecting fake_useragent
  Downloading fake_useragent-2.2.0-py3-none-any.whl.metadata (17 kB)
Downloading fake_useragent-2.2.0-py3-none-any.whl (161 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m161.7/161.7 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: fake_useragent
Successfully installed fake_useragent-2.2.0


In [4]:
import asyncio
from playwright.async_api import async_playwright
from fake_useragent import UserAgent
import pandas as pd
import time, random
import os

In [None]:
ua = UserAgent()

async def scrape_fbref():
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)
        page = await browser.new_page()
        await page.set_extra_http_headers({"User-Agent": ua.random})
        try:
            print("Scraping fbref")
            await page.goto("https://fbref.com/en/squads/206d90db/2024-2025/c8/Barcelona-Stats-Champions-League", timeout=30000)
            await page.wait_for_selector(".table_wrapper", timeout=30000)
            tables = await page.query_selector_all(".table_wrapper")
            # Folder for CSVs
            os.makedirs("fbref_tables", exist_ok=True)

            for i, wrapper in enumerate(tables):
                try:
                    # Table title for CSV filename
                    title_elem = await wrapper.query_selector("h2")
                    title = await title_elem.inner_text() if title_elem else f"table_{i}"
                    title = title.strip().replace(" ", "_").replace("/", "-")

                    # Table element
                    table = await wrapper.query_selector("table")
                    if not table:
                        continue

                    # Headers
                    header_elems = await table.query_selector_all("thead tr:nth-child(2) th")
                    if not header_elems:
                        header_elems = await table.query_selector_all("thead tr:nth-child(1) th")

                    headers = []
                    for h in header_elems:
                        aria_label = await h.get_attribute("aria-label")
                        text = await h.inner_text()
                        headers.append(aria_label.strip() if aria_label else text.strip())

                    # Rows
                    row_elements = await table.query_selector_all("tbody tr")
                    rows = []
                    for row in row_elements:
                        cell_elements = await row.query_selector_all("th, td")
                        row_data = [await cell.inner_text() for cell in cell_elements]
                        rows.append(row_data)

                    # Save to CSV
                    df = pd.DataFrame(rows, columns=headers[:len(rows[0])])
                    filename = f"fbref_tables/{title}.csv"
                    df.to_csv(filename, index=False)
                    print(f"Saved table: {filename}")
                except Exception as inner_e:
                    print(f"Error parsing table {i}: {inner_e}")

        except Exception as e:
            print(f"Error: {e}")
        await browser.close()

# Run
await scrape_fbref()

Scraping fbref
Saved table: fbref_tables/Standard_Stats_2024-2025_Barcelona:_Champions_League.csv
Saved table: fbref_tables/Scores_&_Fixtures_2024-2025_Barcelona:_Champions_League.csv
Saved table: fbref_tables/Goalkeeping_2024-2025_Barcelona:_Champions_League.csv
Saved table: fbref_tables/Advanced_Goalkeeping_2024-2025_Barcelona:_Champions_League.csv
Saved table: fbref_tables/Shooting_2024-2025_Barcelona:_Champions_League.csv
Saved table: fbref_tables/Passing_2024-2025_Barcelona:_Champions_League.csv
Saved table: fbref_tables/Pass_Types_2024-2025_Barcelona:_Champions_League.csv
Saved table: fbref_tables/Goal_and_Shot_Creation_2024-2025_Barcelona:_Champions_League.csv
Saved table: fbref_tables/Defensive_Actions_2024-2025_Barcelona:_Champions_League.csv
Saved table: fbref_tables/Possession_2024-2025_Barcelona:_Champions_League.csv
Saved table: fbref_tables/Playing_Time_2024-2025_Barcelona:_Champions_League.csv
Saved table: fbref_tables/Miscellaneous_Stats_2024-2025_Barcelona:_Champions_L