In [8]:
import requests
import time
from bs4 import BeautifulSoup
import pandas as pd

In [9]:
# Initialze a database that we will add to.
all_matches = []

# Starting URL for webscrapping.
euros_url = "https://fbref.com/en/comps/676/European-Championship-Stats"

In [10]:
# Group A - Group F (therefore, 6 groups to go through.)
for i in range(6):
  # Grab table.
  data = requests.get(euros_url)
  soup = BeautifulSoup(data.text)
  # This is a CSS selector; selects any table element that has the tag 'table' and class 'stats_table'.
  euros_table = soup.select('table.stats_table')[i]

  # Grab each team from the table. 
  # The link we need has an 'a' tag with an href and must have '/squads' in the link. 
  links = [l.get("href") for l in euros_table.find_all('a')]
  links = [l for l in links if '/squads' in l]
  team_urls = [f"https://fbref.com{l}" for l in links]

  # Prevent site from blocking requests.
  time.sleep(20)

  # Go through each team.
  for team_url in team_urls:
    # Grab team name; get rid of excess words.
    team_name = team_url.split("/")[-1].replace("-Stats", "").replace("-", " ").replace(" Men", "")

    # Iterate through five 'seasons' of data.
    for j in range(5):
      # Grab data of each team's scores & fixtures.
      data = requests.get(team_url)
      matches = pd.read_html(data.text, match = "Scores & Fixtures")[0]

      # Grab data of each team's shooting stats.
      # The link we need has an 'a' tag with an href and must have 'all_comps/shooting/' in the link. 
      # In this case, there were multiple of the same shooting links on the same page; choose first one. 
      soup = BeautifulSoup(data.text)
      links = [l.get("href") for l in soup.find_all('a')]
      links = [l for l in links if l and 'all_comps/shooting/' in l]
      data = requests.get(f"https://fbref.com{links[0]}")
      shooting = pd.read_html(data.text, match = "Shooting")[0]
      shooting.columns = shooting.columns.droplevel()

      # Merge scores & fixtures and shooting stats.
      try:
        team_data = matches.merge(shooting[["Date", "Sh", "SoT", "Dist", "PK", "PKatt"]], on = "Date")
      except ValueError:
        continue

      # Friendly matches don't have advanced stats, so don't include.
      team_data = team_data[team_data["Comp"] != "Friendlies (M)"]

      # Set team name and add team data to our database.
      team_data["Team"] = team_name
      all_matches.append(team_data)

      # Previous 'season' for team; to update the for loop.
      previous_season = soup.select("div.prevnext a")[0].get("href")
      team_url = f"https://fbref.com{previous_season}"

      # Prevent site from blocking requests.
      time.sleep(20)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  team_data["Team"] = team_name


In [11]:
# Combine all data and put into a csv.
match_df = pd.concat(all_matches)
match_df.to_csv("matches.csv")