In [None]:
"""multiple teams for multiple seasons"""
import requests
import time
from io import StringIO

import pandas as pd
from bs4 import BeautifulSoup

years = list(range(2023, 2021, -1))

# pause time between requests to prevent being blocked by the server's rate limiter
pause_time = 20

# several data frames for match logs for one team for one season
all_matches = []

standings_url = "https://fbref.com/en/comps/9/Premier-League-Stats"

for year in years:
    data = requests.get(standings_url)
    soup = BeautifulSoup(data.text)
    standings_table = soup.select("table.stats_table")[0]

    # scores and fixtures data
    links_standings_tables = standings_table.find_all('a')
    links = [l.get("href") for l in links_standings_tables]
    links = [l for l in links if '/squads/' in l]
    team_urls = [f"https://fbref.com{l}" for l in links]

    previous_season = soup.select("a.prev")[0].get("href")
    standings_url = f"https://fbref.com{previous_season}"

    for team_url in team_urls:
        team_name = team_url.split("/")[-1].replace("-Stats", "").replace("-", " ")
        data_team = requests.get(team_url)
        matches = pd.read_html(StringIO(data_team.text), match="Scores & Fixtures")
        
        soup_team = BeautifulSoup(data_team.text)
        links_shooting_stats_tables = soup_team.find_all('a')
        links_team = [l.get("href") for l in links_shooting_stats_tables]
        links_team = [l for l in links_team if l and 'all_comps/shooting' in l] 

        data = requests.get(f"https://fbref.com{links_team[0]}")
        shooting = pd.read_html(StringIO(data.text), match="Shooting")[0]
        shooting.columns = shooting.columns.droplevel()

        try:
            team_data = matches[0].merge(shooting[["Date", "Sh", "SoT", "Dist", "FK", "PK", "PKatt"]], on="Date")
        except ValueError:
            continue

        # filter: get only premier league matches
        team_data = team_data[team_data["Comp"] == "Premier League"]
        team_data["Team"] = team_name
        team_data["Season"] = year
        all_matches.append(team_data)
        print(f"Pause for {pause_time} seconds...")
        time.sleep(pause_time)
        print("Pause completed.")

match_df = pd.concat(all_matches)
match_df.columns = [c.lower() for c in match_df.columns]
match_df

# to csv file
match_df.to_csv("fbref_data.csv", index=False)