In [1]:
import requests
from pathlib import Path
import os

import shelve

from bs4 import BeautifulSoup
from bs4 import Comment
import pandas as pd

from tqdm.auto import tqdm

import math

In [2]:
from diskcache import Cache

cache = Cache("matches_report.shelve")

@cache.memoize()
def get_page(link):
    print(link)
    return requests.get(link).content

In [3]:
len(list(cache))

1704

In [4]:
def build_column_name_prefixes(spans, prefixes):
    assert len(spans) == len(prefixes)
    new_cols = []
    for i in range(len(prefixes)):
        for j in range(spans[i]):
            new_cols.append(prefixes[i])
    return new_cols
            

In [5]:
def join_prefixes_suffixes(prefixes, suffixes):
    assert len(prefixes) == len(suffixes)
    new_names = []
    for i in range(len(prefixes)):
        if prefixes[i] == '':
            new_names.append(suffixes[i])
        else:
            new_names.append(prefixes[i] + '_' + suffixes[i])
    return new_names

In [6]:
leagues = ['EPL', 'Ligue1', 'Bundesliga', 'SerieA', 'LaLiga']
report_types = ['summary', 'passing', 'pass_types', 'defensive_actions', 'possession', 'misc_stats']

In [7]:
for league in leagues:
    print(league)
    path = f'dfs/{league}/matches'
    matches_links = {f.split('.')[0]:os.path.join(path, f) for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))}
    matches_links = dict(sorted(matches_links.items(), key = lambda kv:(kv[1], kv[0]), reverse = True))

    dfs = []
    for season in matches_links:
        df = pd.read_csv(matches_links[season])
        dfs.append(df)

    columns = {}
    datasets = {}

    data = []
    for j in range(6):
        data.append([])

    for j, df in tqdm(enumerate(dfs)):
        if df['Season'].unique()[0] in ['2020-2021', '2019-2020', '2018-2019', '2017-2018']:
            print(df['Season'].unique()[0])
            for i, row in tqdm(df.iterrows()):
                link = row['Match Report']
                if type(link) == str and link.startswith('http'):
                    row_league = row['League']
                    season = row['Season']
                    home_team = row['HomeTeam']
                    away_team = row['AwayTeam']
                    date = row['Date']

                    page = get_page(link)

                    soup = BeautifulSoup(page, 'html.parser')

                    teams = soup.find_all("div", id=lambda value: value and value.startswith("all_player_stats_"))

                    i = -1
                    team_names = [home_team, away_team]

                    for team in teams:
                        i += 1

                        team_name = team_names[i]

                        tables = team.find_all("div", id=lambda value: value and value.startswith("div_stats_"))

                        j = -1
                        for table in tables:
                            j += 1

                            thead_trs = table.find('thead').find_all('tr')

                            col_spans = [1 if el.get('colspan') is None else int(el.get('colspan')) for el in thead_trs[0].find_all('th')]
                            column = [el.text.strip() for el in thead_trs[0].find_all('th')]
                            prefixes = build_column_name_prefixes(col_spans, column)
                            column = [el.text.strip() for el in thead_trs[1].find_all('th')]
                            column = ['Season', 'League', 'Match', 'Team', 'Date'] + join_prefixes_suffixes(prefixes, column)

                            if row_league not in columns:
                                columns[row_league] = {}
                            columns[row_league][j] = column

                            for el in table.find('tbody').find_all('tr'):
                                row = []
                                row.append(season)
                                row.append(row_league)
                                row.append(home_team.replace(' ', '_').strip() + '_vs_' + away_team.replace(' ', '_').strip())
                                row.append(team_name)
                                row.append(date)

                                count_empty = 0
                                for el2 in el.find_all('th'):
                                    row.append(el2.text.strip())
                                    if el2.text.strip() == '':
                                        count_empty += 1
                                for el2 in el.find_all('td'):
                                    row.append(el2.text.strip())
                                    if el2.text.strip() == '':
                                        count_empty += 1
                #                 print(row)
                                if row[-2] == 'On matchday squad, but did not play':
                                    temp = row[-1]
                                    row[-1] = None
                                    for i in range(len(columns[row_league][j]) - len(row) - 1):
                                        row.append(None)
                                    row.append(temp)

                                if count_empty != (len(row) - 5):
                                    data[j].append(row)

    for i in range(6):
        path = Path(f'dfs/{league}/match_reports/')
        path.mkdir(parents=True, exist_ok=True)

        df2 = pd.DataFrame(data[i], columns = columns[f'{row_league}'][i])

        df2.to_csv(os.path.join(path, f'{report_types[i]}.csv'), index = False)


EPL


0it [00:00, ?it/s]

2020-2021


0it [00:00, ?it/s]

https://fbref.com/en/matches/a223dd70/West-Ham-United-Wolverhampton-Wanderers-September-27-2020-Premier-League
https://fbref.com/en/matches/6b258be0/Fulham-Aston-Villa-September-28-2020-Premier-League
https://fbref.com/en/matches/ebe4e309/Liverpool-Arsenal-September-28-2020-Premier-League
https://fbref.com/en/matches/d97aa1b5/Chelsea-Crystal-Palace-October-3-2020-Premier-League
https://fbref.com/en/matches/3cbb397e/Everton-Brighton-and-Hove-Albion-October-3-2020-Premier-League
https://fbref.com/en/matches/5ce15b58/Leeds-United-Manchester-City-October-3-2020-Premier-League
https://fbref.com/en/matches/34ba607f/Newcastle-United-Burnley-October-3-2020-Premier-League
https://fbref.com/en/matches/a29866ec/Leicester-City-West-Ham-United-October-4-2020-Premier-League
https://fbref.com/en/matches/39864083/Southampton-West-Bromwich-Albion-October-4-2020-Premier-League
https://fbref.com/en/matches/09725cb3/Arsenal-Sheffield-United-October-4-2020-Premier-League
https://fbref.com/en/matches/8f9896

https://fbref.com/en/matches/4a2a6f14/Newcastle-United-West-Bromwich-Albion-December-12-2020-Premier-League
https://fbref.com/en/matches/85cc70bb/Manchester-Derby-Manchester-United-Manchester-City-December-12-2020-Premier-League
https://fbref.com/en/matches/4847e3c3/Everton-Chelsea-December-12-2020-Premier-League
https://fbref.com/en/matches/9bec56c6/Southampton-Sheffield-United-December-13-2020-Premier-League
https://fbref.com/en/matches/9b0e267e/Crystal-Palace-Tottenham-Hotspur-December-13-2020-Premier-League
https://fbref.com/en/matches/5abb0bd1/Fulham-Liverpool-December-13-2020-Premier-League
https://fbref.com/en/matches/e4dc55d0/Arsenal-Burnley-December-13-2020-Premier-League
https://fbref.com/en/matches/964e6470/Leicester-City-Brighton-and-Hove-Albion-December-13-2020-Premier-League
https://fbref.com/en/matches/20bd0665/Wolverhampton-Wanderers-Chelsea-December-15-2020-Premier-League
https://fbref.com/en/matches/40b42ca1/Manchester-City-West-Bromwich-Albion-December-15-2020-Premie

https://fbref.com/en/matches/6e963555/Manchester-United-Sheffield-United-January-27-2021-Premier-League
https://fbref.com/en/matches/3e4ff1e0/Everton-Leicester-City-January-27-2021-Premier-League
https://fbref.com/en/matches/17bac3d1/Tottenham-Hotspur-Liverpool-January-28-2021-Premier-League
https://fbref.com/en/matches/88053fb0/Everton-Newcastle-United-January-30-2021-Premier-League
https://fbref.com/en/matches/150c80d7/Manchester-City-Sheffield-United-January-30-2021-Premier-League
https://fbref.com/en/matches/d1517c30/West-Bromwich-Albion-Fulham-January-30-2021-Premier-League
https://fbref.com/en/matches/111bd050/Crystal-Palace-Wolverhampton-Wanderers-January-30-2021-Premier-League
https://fbref.com/en/matches/0fcaaa5f/Arsenal-Manchester-United-January-30-2021-Premier-League
https://fbref.com/en/matches/10d13e5b/Southampton-Aston-Villa-January-30-2021-Premier-League
https://fbref.com/en/matches/3372f3f3/Chelsea-Burnley-January-31-2021-Premier-League
https://fbref.com/en/matches/e5e4

https://fbref.com/en/matches/5ac8e391/Newcastle-United-Aston-Villa-March-12-2021-Premier-League
https://fbref.com/en/matches/a711769f/Leeds-United-Chelsea-March-13-2021-Premier-League
https://fbref.com/en/matches/29954372/Crystal-Palace-West-Bromwich-Albion-March-13-2021-Premier-League
https://fbref.com/en/matches/91cce376/Everton-Burnley-March-13-2021-Premier-League
https://fbref.com/en/matches/279b0eb5/Fulham-Manchester-City-March-13-2021-Premier-League
https://fbref.com/en/matches/b40eb9c6/Southampton-Brighton-and-Hove-Albion-March-14-2021-Premier-League
https://fbref.com/en/matches/d3bc5214/Leicester-City-Sheffield-United-March-14-2021-Premier-League
https://fbref.com/en/matches/127b8258/North-London-Derby-Arsenal-Tottenham-Hotspur-March-14-2021-Premier-League
https://fbref.com/en/matches/05c95367/Manchester-United-West-Ham-United-March-14-2021-Premier-League
https://fbref.com/en/matches/ad3063c2/Wolverhampton-Wanderers-Liverpool-March-15-2021-Premier-League
https://fbref.com/en/ma

0it [00:00, ?it/s]

https://fbref.com/en/matches/928467bd/Liverpool-Norwich-City-August-9-2019-Premier-League
https://fbref.com/en/matches/71c8a43e/West-Ham-United-Manchester-City-August-10-2019-Premier-League
https://fbref.com/en/matches/34b99058/Burnley-Southampton-August-10-2019-Premier-League
https://fbref.com/en/matches/38111659/Watford-Brighton-and-Hove-Albion-August-10-2019-Premier-League
https://fbref.com/en/matches/d402cacd/Bournemouth-Sheffield-United-August-10-2019-Premier-League
https://fbref.com/en/matches/a802f51e/Crystal-Palace-Everton-August-10-2019-Premier-League
https://fbref.com/en/matches/404ee5d3/Tottenham-Hotspur-Aston-Villa-August-10-2019-Premier-League
https://fbref.com/en/matches/1405a610/Newcastle-United-Arsenal-August-11-2019-Premier-League
https://fbref.com/en/matches/bf4afd61/Leicester-City-Wolverhampton-Wanderers-August-11-2019-Premier-League
https://fbref.com/en/matches/d0583d0d/Manchester-United-Chelsea-August-11-2019-Premier-League
https://fbref.com/en/matches/ff7eda21/Ars

https://fbref.com/en/matches/4df24be7/Tottenham-Hotspur-Watford-October-19-2019-Premier-League
https://fbref.com/en/matches/3529c097/Leicester-City-Burnley-October-19-2019-Premier-League
https://fbref.com/en/matches/f334b1dc/Crystal-Palace-Manchester-City-October-19-2019-Premier-League
https://fbref.com/en/matches/95c3f0c8/North-West-Derby-Manchester-United-Liverpool-October-20-2019-Premier-League
https://fbref.com/en/matches/f63044fd/Sheffield-United-Arsenal-October-21-2019-Premier-League
https://fbref.com/en/matches/3e9712d7/Southampton-Leicester-City-October-25-2019-Premier-League
https://fbref.com/en/matches/e0a4db2d/Manchester-City-Aston-Villa-October-26-2019-Premier-League
https://fbref.com/en/matches/43aa7711/West-Ham-United-Sheffield-United-October-26-2019-Premier-League
https://fbref.com/en/matches/f728ceea/Brighton-and-Hove-Albion-Everton-October-26-2019-Premier-League
https://fbref.com/en/matches/1ef8e186/Watford-Bournemouth-October-26-2019-Premier-League
https://fbref.com/e

https://fbref.com/en/matches/1163ec4a/Crystal-Palace-Brighton-and-Hove-Albion-December-16-2019-Premier-League
https://fbref.com/en/matches/b9d1d1a7/Everton-Arsenal-December-21-2019-Premier-League
https://fbref.com/en/matches/5e599d06/Newcastle-United-Crystal-Palace-December-21-2019-Premier-League
https://fbref.com/en/matches/d50390a8/Aston-Villa-Southampton-December-21-2019-Premier-League
https://fbref.com/en/matches/473db5a3/Brighton-and-Hove-Albion-Sheffield-United-December-21-2019-Premier-League
https://fbref.com/en/matches/e8b74ca0/Bournemouth-Burnley-December-21-2019-Premier-League
https://fbref.com/en/matches/223124ce/Norwich-City-Wolverhampton-Wanderers-December-21-2019-Premier-League
https://fbref.com/en/matches/733c0243/Manchester-City-Leicester-City-December-21-2019-Premier-League
https://fbref.com/en/matches/a58df282/Watford-Manchester-United-December-22-2019-Premier-League
https://fbref.com/en/matches/a1a20337/Tottenham-Hotspur-Chelsea-December-22-2019-Premier-League
https:

https://fbref.com/en/matches/a11273b7/Wolverhampton-Wanderers-Leicester-City-February-14-2020-Premier-League
https://fbref.com/en/matches/738ade70/Southampton-Burnley-February-15-2020-Premier-League
https://fbref.com/en/matches/9c238122/Norwich-City-Liverpool-February-15-2020-Premier-League
https://fbref.com/en/matches/ebfe971d/Aston-Villa-Tottenham-Hotspur-February-16-2020-Premier-League
https://fbref.com/en/matches/6ce374b0/Arsenal-Newcastle-United-February-16-2020-Premier-League
https://fbref.com/en/matches/2619bcb9/Chelsea-Manchester-United-February-17-2020-Premier-League
https://fbref.com/en/matches/bc091e86/Manchester-City-West-Ham-United-February-19-2020-Premier-League
https://fbref.com/en/matches/fdd364a6/Chelsea-Tottenham-Hotspur-February-22-2020-Premier-League
https://fbref.com/en/matches/66823ac4/Southampton-Aston-Villa-February-22-2020-Premier-League
https://fbref.com/en/matches/c24b02bd/Sheffield-United-Brighton-and-Hove-Albion-February-22-2020-Premier-League
https://fbref

https://fbref.com/en/matches/ae59ff28/Watford-Newcastle-United-July-11-2020-Premier-League
https://fbref.com/en/matches/1f633005/Norwich-City-West-Ham-United-July-11-2020-Premier-League
https://fbref.com/en/matches/267d7e78/Liverpool-Burnley-July-11-2020-Premier-League
https://fbref.com/en/matches/3f83499d/Sheffield-United-Chelsea-July-11-2020-Premier-League
https://fbref.com/en/matches/a24d3d6b/Brighton-and-Hove-Albion-Manchester-City-July-11-2020-Premier-League
https://fbref.com/en/matches/476e8583/Wolverhampton-Wanderers-Everton-July-12-2020-Premier-League
https://fbref.com/en/matches/b30bf2e8/Aston-Villa-Crystal-Palace-July-12-2020-Premier-League
https://fbref.com/en/matches/6630c721/North-London-Derby-Tottenham-Hotspur-Arsenal-July-12-2020-Premier-League
https://fbref.com/en/matches/d2adf574/Bournemouth-Leicester-City-July-12-2020-Premier-League
https://fbref.com/en/matches/668c9423/Manchester-United-Southampton-July-13-2020-Premier-League
https://fbref.com/en/matches/88d08b7b/Che

0it [00:00, ?it/s]

https://fbref.com/en/matches/3ae83896/Manchester-United-Leicester-City-August-10-2018-Premier-League
https://fbref.com/en/matches/dce15e01/Newcastle-United-Tottenham-Hotspur-August-11-2018-Premier-League
https://fbref.com/en/matches/b3a7c703/Watford-Brighton-and-Hove-Albion-August-11-2018-Premier-League
https://fbref.com/en/matches/d82a37a7/Huddersfield-Town-Chelsea-August-11-2018-Premier-League
https://fbref.com/en/matches/352f5574/Bournemouth-Cardiff-City-August-11-2018-Premier-League
https://fbref.com/en/matches/30875601/Fulham-Crystal-Palace-August-11-2018-Premier-League
https://fbref.com/en/matches/8dc8eaeb/Wolverhampton-Wanderers-Everton-August-11-2018-Premier-League
https://fbref.com/en/matches/aadb88b9/Southampton-Burnley-August-12-2018-Premier-League
https://fbref.com/en/matches/fc2c1788/Liverpool-West-Ham-United-August-12-2018-Premier-League
https://fbref.com/en/matches/478e9dab/Arsenal-Manchester-City-August-12-2018-Premier-League
https://fbref.com/en/matches/8b9218b2/Cardif

https://fbref.com/en/matches/1027bc75/Bournemouth-Southampton-October-20-2018-Premier-League
https://fbref.com/en/matches/fe1817e3/Cardiff-City-Fulham-October-20-2018-Premier-League
https://fbref.com/en/matches/356d83b4/Huddersfield-Town-Liverpool-October-20-2018-Premier-League
https://fbref.com/en/matches/2a861cc1/Everton-Crystal-Palace-October-21-2018-Premier-League
https://fbref.com/en/matches/9ba76788/Arsenal-Leicester-City-October-22-2018-Premier-League
https://fbref.com/en/matches/51c18906/Fulham-Bournemouth-October-27-2018-Premier-League
https://fbref.com/en/matches/8df7e453/Liverpool-Cardiff-City-October-27-2018-Premier-League
https://fbref.com/en/matches/1b2a6c82/Brighton-and-Hove-Albion-Wolverhampton-Wanderers-October-27-2018-Premier-League
https://fbref.com/en/matches/4e9ca23b/Southampton-Newcastle-United-October-27-2018-Premier-League
https://fbref.com/en/matches/af65625b/Watford-Huddersfield-Town-October-27-2018-Premier-League
https://fbref.com/en/matches/c55a55c7/Leiceste

https://fbref.com/en/matches/c9705ea1/Wolverhampton-Wanderers-Liverpool-December-21-2018-Premier-League
https://fbref.com/en/matches/ffe874ee/Arsenal-Burnley-December-22-2018-Premier-League
https://fbref.com/en/matches/1a9ca1ae/West-Ham-United-Watford-December-22-2018-Premier-League
https://fbref.com/en/matches/88800548/Chelsea-Leicester-City-December-22-2018-Premier-League
https://fbref.com/en/matches/056fa8cd/Manchester-City-Crystal-Palace-December-22-2018-Premier-League
https://fbref.com/en/matches/ee7f6109/Newcastle-United-Fulham-December-22-2018-Premier-League
https://fbref.com/en/matches/0233399d/Bournemouth-Brighton-and-Hove-Albion-December-22-2018-Premier-League
https://fbref.com/en/matches/7dfcd16a/Huddersfield-Town-Southampton-December-22-2018-Premier-League
https://fbref.com/en/matches/7d45c66e/Cardiff-City-Manchester-United-December-22-2018-Premier-League
https://fbref.com/en/matches/31157c7c/Everton-Tottenham-Hotspur-December-23-2018-Premier-League
https://fbref.com/en/mat

https://fbref.com/en/matches/0a085d29/Liverpool-Bournemouth-February-9-2019-Premier-League
https://fbref.com/en/matches/0194fea4/Crystal-Palace-West-Ham-United-February-9-2019-Premier-League
https://fbref.com/en/matches/2572aa9b/Brighton-and-Hove-Albion-Burnley-February-9-2019-Premier-League
https://fbref.com/en/matches/b53789c9/Tottenham-Hotspur-Leicester-City-February-10-2019-Premier-League
https://fbref.com/en/matches/5ec57dbe/Manchester-City-Chelsea-February-10-2019-Premier-League
https://fbref.com/en/matches/c3bbcee6/Wolverhampton-Wanderers-Newcastle-United-February-11-2019-Premier-League
https://fbref.com/en/matches/a87bd77b/Cardiff-City-Watford-February-22-2019-Premier-League
https://fbref.com/en/matches/698db309/West-Ham-United-Fulham-February-22-2019-Premier-League
https://fbref.com/en/matches/a4f29e38/Burnley-Tottenham-Hotspur-February-23-2019-Premier-League
https://fbref.com/en/matches/fc5c6132/Newcastle-United-Huddersfield-Town-February-23-2019-Premier-League
https://fbref.

https://fbref.com/en/matches/deab42c4/Newcastle-United-Southampton-April-20-2019-Premier-League
https://fbref.com/en/matches/225a548e/Everton-Manchester-United-April-21-2019-Premier-League
https://fbref.com/en/matches/e379285b/Arsenal-Crystal-Palace-April-21-2019-Premier-League
https://fbref.com/en/matches/ea0a6f68/Cardiff-City-Liverpool-April-21-2019-Premier-League
https://fbref.com/en/matches/9c5949d8/Chelsea-Burnley-April-22-2019-Premier-League
https://fbref.com/en/matches/704df4ec/Watford-Southampton-April-23-2019-Premier-League
https://fbref.com/en/matches/407914e9/Tottenham-Hotspur-Brighton-and-Hove-Albion-April-23-2019-Premier-League
https://fbref.com/en/matches/199f42bd/Wolverhampton-Wanderers-Arsenal-April-24-2019-Premier-League
https://fbref.com/en/matches/9bb5c776/Manchester-Derby-Manchester-United-Manchester-City-April-24-2019-Premier-League
https://fbref.com/en/matches/89b55cfd/Liverpool-Huddersfield-Town-April-26-2019-Premier-League
https://fbref.com/en/matches/787eb54d/T

0it [00:00, ?it/s]

https://fbref.com/en/matches/e3c3ddf0/Arsenal-Leicester-City-August-11-2017-Premier-League
https://fbref.com/en/matches/60f6cc1d/Watford-Liverpool-August-12-2017-Premier-League
https://fbref.com/en/matches/684f704a/West-Bromwich-Albion-Bournemouth-August-12-2017-Premier-League
https://fbref.com/en/matches/7c834541/Everton-Stoke-City-August-12-2017-Premier-League
https://fbref.com/en/matches/e782371e/Southampton-Swansea-City-August-12-2017-Premier-League
https://fbref.com/en/matches/71b00bca/Chelsea-Burnley-August-12-2017-Premier-League
https://fbref.com/en/matches/2d369d17/Crystal-Palace-Huddersfield-Town-August-12-2017-Premier-League
https://fbref.com/en/matches/072bfc99/Brighton-and-Hove-Albion-Manchester-City-August-12-2017-Premier-League
https://fbref.com/en/matches/d8a995d7/Newcastle-United-Tottenham-Hotspur-August-13-2017-Premier-League
https://fbref.com/en/matches/f5d1f6f4/Manchester-United-West-Ham-United-August-13-2017-Premier-League
https://fbref.com/en/matches/9c09e079/Swans

https://fbref.com/en/matches/11c702a4/Manchester-City-Burnley-October-21-2017-Premier-League
https://fbref.com/en/matches/7da1fc42/Stoke-City-Bournemouth-October-21-2017-Premier-League
https://fbref.com/en/matches/155ed776/Huddersfield-Town-Manchester-United-October-21-2017-Premier-League
https://fbref.com/en/matches/424a0e74/Southampton-West-Bromwich-Albion-October-21-2017-Premier-League
https://fbref.com/en/matches/73da64ff/Everton-Arsenal-October-22-2017-Premier-League
https://fbref.com/en/matches/b43525f5/Tottenham-Hotspur-Liverpool-October-22-2017-Premier-League
https://fbref.com/en/matches/11f75bcb/Manchester-United-Tottenham-Hotspur-October-28-2017-Premier-League
https://fbref.com/en/matches/9e2f900f/Liverpool-Huddersfield-Town-October-28-2017-Premier-League
https://fbref.com/en/matches/4ee00470/Crystal-Palace-West-Ham-United-October-28-2017-Premier-League
https://fbref.com/en/matches/fdbbce69/West-Bromwich-Albion-Manchester-City-October-28-2017-Premier-League
https://fbref.com/

https://fbref.com/en/matches/eaf5939d/Manchester-United-Bournemouth-December-13-2017-Premier-League
https://fbref.com/en/matches/8ca205ce/Tottenham-Hotspur-Brighton-and-Hove-Albion-December-13-2017-Premier-League
https://fbref.com/en/matches/029fd911/Leicester-City-Crystal-Palace-December-16-2017-Premier-League
https://fbref.com/en/matches/2d7430b9/Arsenal-Newcastle-United-December-16-2017-Premier-League
https://fbref.com/en/matches/178a7528/Watford-Huddersfield-Town-December-16-2017-Premier-League
https://fbref.com/en/matches/cd98063b/Chelsea-Southampton-December-16-2017-Premier-League
https://fbref.com/en/matches/4365b452/Brighton-and-Hove-Albion-Burnley-December-16-2017-Premier-League
https://fbref.com/en/matches/17329e19/Stoke-City-West-Ham-United-December-16-2017-Premier-League
https://fbref.com/en/matches/6b423cc3/Manchester-City-Tottenham-Hotspur-December-16-2017-Premier-League
https://fbref.com/en/matches/097569a9/West-Bromwich-Albion-Manchester-United-December-17-2017-Premier-

https://fbref.com/en/matches/e85b8a76/Leicester-City-Swansea-City-February-3-2018-Premier-League
https://fbref.com/en/matches/398ae957/West-Bromwich-Albion-Southampton-February-3-2018-Premier-League
https://fbref.com/en/matches/a424d390/Brighton-and-Hove-Albion-West-Ham-United-February-3-2018-Premier-League
https://fbref.com/en/matches/3ad5f648/Bournemouth-Stoke-City-February-3-2018-Premier-League
https://fbref.com/en/matches/ac69ef1b/Arsenal-Everton-February-3-2018-Premier-League
https://fbref.com/en/matches/8849f87b/Crystal-Palace-Newcastle-United-February-4-2018-Premier-League
https://fbref.com/en/matches/4825ca2d/Liverpool-Tottenham-Hotspur-February-4-2018-Premier-League
https://fbref.com/en/matches/92aff98f/Watford-Chelsea-February-5-2018-Premier-League
https://fbref.com/en/matches/9be2b4a6/North-London-Derby-Tottenham-Hotspur-Arsenal-February-10-2018-Premier-League
https://fbref.com/en/matches/15ad0bb7/Everton-Crystal-Palace-February-10-2018-Premier-League
https://fbref.com/en/ma

https://fbref.com/en/matches/a1c7d96c/West-Bromwich-Albion-Liverpool-April-21-2018-Premier-League
https://fbref.com/en/matches/abaea3fc/Watford-Crystal-Palace-April-21-2018-Premier-League
https://fbref.com/en/matches/183dc99a/Arsenal-West-Ham-United-April-22-2018-Premier-League
https://fbref.com/en/matches/4b1d1fa6/Stoke-City-Burnley-April-22-2018-Premier-League
https://fbref.com/en/matches/8beedd1d/Manchester-City-Swansea-City-April-22-2018-Premier-League
https://fbref.com/en/matches/1e1e3394/Everton-Newcastle-United-April-23-2018-Premier-League
https://fbref.com/en/matches/b6b246c1/Liverpool-Stoke-City-April-28-2018-Premier-League
https://fbref.com/en/matches/229ca929/Newcastle-United-West-Bromwich-Albion-April-28-2018-Premier-League
https://fbref.com/en/matches/4d38129c/Southampton-Bournemouth-April-28-2018-Premier-League
https://fbref.com/en/matches/611476e6/Huddersfield-Town-Everton-April-28-2018-Premier-League
https://fbref.com/en/matches/c1a0d4ce/Burnley-Brighton-and-Hove-Albion

0it [00:00, ?it/s]

2020-2021


0it [00:00, ?it/s]

https://fbref.com/en/matches/c0996cac/Bordeaux-Nantes-August-21-2020-Ligue-1
https://fbref.com/en/matches/9cbccb37/Dijon-Angers-August-22-2020-Ligue-1
https://fbref.com/en/matches/50ec2fcc/Lille-Rennes-August-22-2020-Ligue-1
https://fbref.com/en/matches/b3bddcc0/Monaco-Reims-August-23-2020-Ligue-1
https://fbref.com/en/matches/da8136ae/Nimes-Brest-August-23-2020-Ligue-1
https://fbref.com/en/matches/f96cd5a0/Lorient-Strasbourg-August-23-2020-Ligue-1
https://fbref.com/en/matches/b08ae3b2/Nice-Lens-August-23-2020-Ligue-1
https://fbref.com/en/matches/7512406f/Lyon-Dijon-August-28-2020-Ligue-1
https://fbref.com/en/matches/1ec60fe6/Rennes-Montpellier-August-29-2020-Ligue-1
https://fbref.com/en/matches/a271affc/Strasbourg-Nice-August-29-2020-Ligue-1
https://fbref.com/en/matches/c25931b4/Reims-Lille-August-30-2020-Ligue-1
https://fbref.com/en/matches/db69f933/Angers-Bordeaux-August-30-2020-Ligue-1
https://fbref.com/en/matches/9b7fa4e1/Saint-Etienne-Lorient-August-30-2020-Ligue-1
https://fbref.c

https://fbref.com/en/matches/1ee3fd0c/Reims-Nimes-November-22-2020-Ligue-1
https://fbref.com/en/matches/7015e72d/Angers-Lyon-November-22-2020-Ligue-1
https://fbref.com/en/matches/89a4df99/Lille-Lorient-November-22-2020-Ligue-1
https://fbref.com/en/matches/fa32e765/Lens-Nantes-November-25-2020-Ligue-1
https://fbref.com/en/matches/3b498bf8/Strasbourg-Rennes-November-27-2020-Ligue-1
https://fbref.com/en/matches/9ade19b5/Marseille-Nantes-November-28-2020-Ligue-1
https://fbref.com/en/matches/35fddbd7/Paris-Saint-Germain-Bordeaux-November-28-2020-Ligue-1
https://fbref.com/en/matches/4301f4b9/Lyon-Reims-November-29-2020-Ligue-1
https://fbref.com/en/matches/50ca8cbf/Monaco-Nimes-November-29-2020-Ligue-1
https://fbref.com/en/matches/034e6d97/Lens-Angers-November-29-2020-Ligue-1
https://fbref.com/en/matches/57eeea74/Metz-Brest-November-29-2020-Ligue-1
https://fbref.com/en/matches/5ede6719/Lorient-Montpellier-November-29-2020-Ligue-1
https://fbref.com/en/matches/01d18727/Nice-Dijon-November-29-20

https://fbref.com/en/matches/93d96e5c/Montpellier-Lens-January-30-2021-Ligue-1
https://fbref.com/en/matches/35e70807/Nice-Saint-Etienne-January-31-2021-Ligue-1
https://fbref.com/en/matches/d0799ee9/Angers-Nimes-January-31-2021-Ligue-1
https://fbref.com/en/matches/0439be7e/Lorient-Paris-Saint-Germain-January-31-2021-Ligue-1
https://fbref.com/en/matches/7d380a9a/Strasbourg-Reims-January-31-2021-Ligue-1
https://fbref.com/en/matches/dedabe17/Brest-Metz-January-31-2021-Ligue-1
https://fbref.com/en/matches/46396f0f/Lille-Dijon-January-31-2021-Ligue-1
https://fbref.com/en/matches/e1e4471a/Nantes-Monaco-January-31-2021-Ligue-1
https://fbref.com/en/matches/5834a902/Rennes-Lorient-February-3-2021-Ligue-1
https://fbref.com/en/matches/d53b40e0/Metz-Montpellier-February-3-2021-Ligue-1
https://fbref.com/en/matches/ce4a0c8e/Strasbourg-Brest-February-3-2021-Ligue-1
https://fbref.com/en/matches/cbaa8ed9/Bordeaux-Lille-February-3-2021-Ligue-1
https://fbref.com/en/matches/00f3abdc/Reims-Angers-February-3

https://fbref.com/en/matches/f07ab6e3/Brest-Nimes-April-11-2021-Ligue-1
https://fbref.com/en/matches/8bd928a1/Lens-Lorient-April-11-2021-Ligue-1
https://fbref.com/en/matches/fe622347/Nice-Reims-April-11-2021-Ligue-1
https://fbref.com/en/matches/24d1a5c7/Monaco-Dijon-April-11-2021-Ligue-1
https://fbref.com/en/matches/4d4dc0b9/Lyon-Angers-April-11-2021-Ligue-1
https://fbref.com/en/matches/283e5426/Lille-Montpellier-April-16-2021-Ligue-1
https://fbref.com/en/matches/f3267962/Angers-Rennes-April-17-2021-Ligue-1
https://fbref.com/en/matches/fd8c8405/Marseille-Lorient-April-17-2021-Ligue-1
https://fbref.com/en/matches/9ae8425f/Paris-Saint-Germain-Saint-Etienne-April-18-2021-Ligue-1
https://fbref.com/en/matches/9a9ec43b/Nimes-Strasbourg-April-18-2021-Ligue-1
https://fbref.com/en/matches/0e79fe5c/Brest-Lens-April-18-2021-Ligue-1
https://fbref.com/en/matches/b7a01ae1/Reims-Metz-April-18-2021-Ligue-1
https://fbref.com/en/matches/47537610/Dijon-Nice-April-18-2021-Ligue-1
https://fbref.com/en/matc

0it [00:00, ?it/s]

https://fbref.com/en/matches/eb594ee1/Monaco-Lyon-August-9-2019-Ligue-1
https://fbref.com/en/matches/d7e26ebe/Marseille-Reims-August-10-2019-Ligue-1
https://fbref.com/en/matches/f7af8742/Brest-Toulouse-August-10-2019-Ligue-1
https://fbref.com/en/matches/bfb2e34e/Angers-Bordeaux-August-10-2019-Ligue-1
https://fbref.com/en/matches/39bc30d4/Dijon-Saint-Etienne-August-10-2019-Ligue-1
https://fbref.com/en/matches/14a49720/Montpellier-Rennes-August-10-2019-Ligue-1
https://fbref.com/en/matches/fe79e1f7/Nice-Amiens-August-10-2019-Ligue-1
https://fbref.com/en/matches/aa2721a9/Lille-Nantes-August-11-2019-Ligue-1
https://fbref.com/en/matches/1bf2a0be/Strasbourg-Metz-August-11-2019-Ligue-1
https://fbref.com/en/matches/df55c5ac/Paris-Saint-Germain-Nimes-August-11-2019-Ligue-1
https://fbref.com/en/matches/7e480f83/Lyon-Angers-August-16-2019-Ligue-1
https://fbref.com/en/matches/94f0a534/Nantes-Marseille-August-17-2019-Ligue-1
https://fbref.com/en/matches/1b36cd8c/Nimes-Nice-August-17-2019-Ligue-1
htt

https://fbref.com/en/matches/85daa283/Reims-Nimes-October-26-2019-Ligue-1
https://fbref.com/en/matches/d2906fab/Brest-Dijon-October-26-2019-Ligue-1
https://fbref.com/en/matches/72816286/Strasbourg-Nice-October-26-2019-Ligue-1
https://fbref.com/en/matches/7fbe4c70/Rennes-Toulouse-October-27-2019-Ligue-1
https://fbref.com/en/matches/296bee41/Saint-Etienne-Amiens-October-27-2019-Ligue-1
https://fbref.com/en/matches/cc3d6ed7/Paris-Saint-Germain-Marseille-October-27-2019-Ligue-1
https://fbref.com/en/matches/e402000b/Dijon-Paris-Saint-Germain-November-1-2019-Ligue-1
https://fbref.com/en/matches/8a98eb80/Marseille-Lille-November-2-2019-Ligue-1
https://fbref.com/en/matches/fa414126/Angers-Strasbourg-November-2-2019-Ligue-1
https://fbref.com/en/matches/732b8271/Metz-Montpellier-November-2-2019-Ligue-1
https://fbref.com/en/matches/60b4a92b/Amiens-Brest-November-2-2019-Ligue-1
https://fbref.com/en/matches/4bd6a637/Toulouse-Lyon-November-2-2019-Ligue-1
https://fbref.com/en/matches/84c26b31/Bordeau

https://fbref.com/en/matches/9d9d651a/Nantes-Bordeaux-January-26-2020-Ligue-1
https://fbref.com/en/matches/54632cb4/Lille-Paris-Saint-Germain-January-26-2020-Ligue-1
https://fbref.com/en/matches/9c4c2caa/Rennes-Nantes-January-31-2020-Ligue-1
https://fbref.com/en/matches/1a826b08/Paris-Saint-Germain-Montpellier-February-1-2020-Ligue-1
https://fbref.com/en/matches/12de06b5/Angers-Reims-February-1-2020-Ligue-1
https://fbref.com/en/matches/49b45502/Nimes-Monaco-February-1-2020-Ligue-1
https://fbref.com/en/matches/959ca3f4/Dijon-Brest-February-1-2020-Ligue-1
https://fbref.com/en/matches/933c09df/Amiens-Toulouse-February-1-2020-Ligue-1
https://fbref.com/en/matches/20e22348/Strasbourg-Lille-February-1-2020-Ligue-1
https://fbref.com/en/matches/8ced3663/Nice-Lyon-February-2-2020-Ligue-1
https://fbref.com/en/matches/7d4b9547/Metz-Saint-Etienne-February-2-2020-Ligue-1
https://fbref.com/en/matches/f250868e/Bordeaux-Marseille-February-2-2020-Ligue-1
https://fbref.com/en/matches/7903d407/Monaco-Ange

https://fbref.com/en/matches/a6220762/Saint-Etienne-Rennes-April-11-2020-Ligue-1
https://fbref.com/en/matches/7024e6a9/Nantes-Lyon-April-11-2020-Ligue-1
https://fbref.com/en/matches/a9a4fa42/Metz-Lille-April-11-2020-Ligue-1
https://fbref.com/en/matches/4539502a/Angers-Paris-Saint-Germain-April-11-2020-Ligue-1
https://fbref.com/en/matches/1b327b14/Brest-Monaco-April-11-2020-Ligue-1
https://fbref.com/en/matches/10b7bd4b/Nimes-Montpellier-April-11-2020-Ligue-1
https://fbref.com/en/matches/a151d71c/Strasbourg-Brest-April-18-2020-Ligue-1
https://fbref.com/en/matches/385c7fa5/Choc-des-Olympiques-Lyon-Marseille-April-18-2020-Ligue-1
https://fbref.com/en/matches/d29312ea/Lille-Nice-April-18-2020-Ligue-1
https://fbref.com/en/matches/c9cadfbb/Montpellier-Reims-April-18-2020-Ligue-1
https://fbref.com/en/matches/d9f87fb2/Amiens-Nimes-April-18-2020-Ligue-1
https://fbref.com/en/matches/25b47edc/Monaco-Bordeaux-April-18-2020-Ligue-1
https://fbref.com/en/matches/3cbe315e/Paris-Saint-Germain-Saint-Etie

0it [00:00, ?it/s]

https://fbref.com/en/matches/8818bccb/Marseille-Toulouse-August-10-2018-Ligue-1
https://fbref.com/en/matches/cfa44f15/Nantes-Monaco-August-11-2018-Ligue-1
https://fbref.com/en/matches/002eb19a/Montpellier-Dijon-August-11-2018-Ligue-1
https://fbref.com/en/matches/f18b4a9b/Angers-Nimes-August-11-2018-Ligue-1
https://fbref.com/en/matches/c19cba06/Lille-Rennes-August-11-2018-Ligue-1
https://fbref.com/en/matches/0446fd28/Nice-Reims-August-11-2018-Ligue-1
https://fbref.com/en/matches/74211c45/Saint-Etienne-Guingamp-August-11-2018-Ligue-1
https://fbref.com/en/matches/c17d091d/Lyon-Amiens-August-12-2018-Ligue-1
https://fbref.com/en/matches/f134cf1e/Bordeaux-Strasbourg-August-12-2018-Ligue-1
https://fbref.com/en/matches/ed566f34/Paris-Saint-Germain-Caen-August-12-2018-Ligue-1
https://fbref.com/en/matches/ff766777/Reims-Lyon-August-17-2018-Ligue-1
https://fbref.com/en/matches/6f2eb060/Guingamp-Paris-Saint-Germain-August-18-2018-Ligue-1
https://fbref.com/en/matches/8e87e498/Caen-Nice-August-18-20

https://fbref.com/en/matches/3af7f923/Guingamp-Strasbourg-October-27-2018-Ligue-1
https://fbref.com/en/matches/18916f9c/Monaco-Dijon-October-27-2018-Ligue-1
https://fbref.com/en/matches/091274cf/Toulouse-Montpellier-October-27-2018-Ligue-1
https://fbref.com/en/matches/cc86cdf3/Rennes-Reims-October-28-2018-Ligue-1
https://fbref.com/en/matches/cc5aa1d0/Bordeaux-Nice-October-28-2018-Ligue-1
https://fbref.com/en/matches/12a14761/Marseille-Paris-Saint-Germain-October-28-2018-Ligue-1
https://fbref.com/en/matches/5da755cd/Paris-Saint-Germain-Lille-November-2-2018-Ligue-1
https://fbref.com/en/matches/c4e74b0c/Lyon-Bordeaux-November-3-2018-Ligue-1
https://fbref.com/en/matches/3d70a26a/Nice-Amiens-November-3-2018-Ligue-1
https://fbref.com/en/matches/31c0adcf/Dijon-Nimes-November-3-2018-Ligue-1
https://fbref.com/en/matches/e426ced1/Strasbourg-Toulouse-November-3-2018-Ligue-1
https://fbref.com/en/matches/73090e41/Reims-Monaco-November-3-2018-Ligue-1
https://fbref.com/en/matches/63276e1b/Caen-Renne

https://fbref.com/en/matches/038b6a22/Dijon-Monaco-January-26-2019-Ligue-1
https://fbref.com/en/matches/31a6ef67/Guingamp-Reims-January-26-2019-Ligue-1
https://fbref.com/en/matches/d16cad7d/Strasbourg-Bordeaux-January-26-2019-Ligue-1
https://fbref.com/en/matches/03acdfb8/Nice-Nimes-January-26-2019-Ligue-1
https://fbref.com/en/matches/7baec3a3/Montpellier-Caen-January-27-2019-Ligue-1
https://fbref.com/en/matches/c2610f32/Amiens-Lyon-January-27-2019-Ligue-1
https://fbref.com/en/matches/306c9177/Toulouse-Angers-January-27-2019-Ligue-1
https://fbref.com/en/matches/f0a113f8/Paris-Saint-Germain-Rennes-January-27-2019-Ligue-1
https://fbref.com/en/matches/6e3a1e22/Nantes-Saint-Etienne-January-30-2019-Ligue-1
https://fbref.com/en/matches/dd30a3c2/Lille-Nice-February-1-2019-Ligue-1
https://fbref.com/en/matches/34d963b4/Angers-Dijon-February-2-2019-Ligue-1
https://fbref.com/en/matches/0b44ddfa/Rennes-Amiens-February-2-2019-Ligue-1
https://fbref.com/en/matches/07d2b627/Monaco-Toulouse-February-2-2

https://fbref.com/en/matches/1f3d473c/Caen-Angers-April-13-2019-Ligue-1
https://fbref.com/en/matches/e7ff1ff0/Montpellier-Toulouse-April-14-2019-Ligue-1
https://fbref.com/en/matches/ab1e5c68/Rennes-Nice-April-14-2019-Ligue-1
https://fbref.com/en/matches/6a8b98ba/Saint-Etienne-Bordeaux-April-14-2019-Ligue-1
https://fbref.com/en/matches/f0d500da/Lille-Paris-Saint-Germain-April-14-2019-Ligue-1
https://fbref.com/en/matches/9b494820/Nantes-Paris-Saint-Germain-April-17-2019-Ligue-1
https://fbref.com/en/matches/050367b9/Dijon-Rennes-April-19-2019-Ligue-1
https://fbref.com/en/matches/6eea3ae0/Lyon-Angers-April-19-2019-Ligue-1
https://fbref.com/en/matches/43aeb747/Guingamp-Marseille-April-20-2019-Ligue-1
https://fbref.com/en/matches/f2d6766b/Nimes-Bordeaux-April-20-2019-Ligue-1
https://fbref.com/en/matches/3f44f10d/Strasbourg-Montpellier-April-20-2019-Ligue-1
https://fbref.com/en/matches/a0a12160/Nice-Caen-April-20-2019-Ligue-1
https://fbref.com/en/matches/d386d6d6/Toulouse-Lille-April-21-2019-

0it [00:00, ?it/s]

https://fbref.com/en/matches/a68e623d/Monaco-Toulouse-August-4-2017-Ligue-1
https://fbref.com/en/matches/37f2c25f/Paris-Saint-Germain-Amiens-August-5-2017-Ligue-1
https://fbref.com/en/matches/68b9eea2/Saint-Etienne-Nice-August-5-2017-Ligue-1
https://fbref.com/en/matches/b2829d08/Montpellier-Caen-August-5-2017-Ligue-1
https://fbref.com/en/matches/7d2eb66d/Metz-Guingamp-August-5-2017-Ligue-1
https://fbref.com/en/matches/4d28b63b/Lyon-Strasbourg-August-5-2017-Ligue-1
https://fbref.com/en/matches/bf730e30/Troyes-Rennes-August-5-2017-Ligue-1
https://fbref.com/en/matches/a5686dbd/Lille-Nantes-August-6-2017-Ligue-1
https://fbref.com/en/matches/f85b1b75/Angers-Bordeaux-August-6-2017-Ligue-1
https://fbref.com/en/matches/ab13ffd4/Marseille-Dijon-August-6-2017-Ligue-1
https://fbref.com/en/matches/7faa9279/Nice-Troyes-August-11-2017-Ligue-1
https://fbref.com/en/matches/52194605/Rennes-Lyon-August-11-2017-Ligue-1
https://fbref.com/en/matches/1ad6b969/Nantes-Marseille-August-12-2017-Ligue-1
https://

KeyboardInterrupt: 

In [10]:
list(cache)[0][1]

'https://fbref.com/en/matches/6c2be382/Atalanta-Chievo-February-4-2018-Serie-A'

In [25]:
link = list(cache)[0][1]
page = get_page(link)

soup = BeautifulSoup(page, 'html.parser')

teams = soup.find_all("div", id=lambda value: value and value.startswith("all_player_stats_"))

doc_columns = []
doc_meanings = []

i = -1
team_names = ['Atalanta', 'Chievo']

for team in teams:
    i += 1

    team_name = team_names[i]

    tables = team.find_all("div", id=lambda value: value and value.startswith("div_stats_"))

    j = -1
    for table in tables:
        j += 1

        thead_trs = table.find('thead').find_all('tr')

        col_spans = [1 if el.get('colspan') is None else int(el.get('colspan')) for el in thead_trs[0].find_all('th')]
        column = [el.text.strip() for el in thead_trs[0].find_all('th')]
        prefixes = build_column_name_prefixes(col_spans, column)
        column = [el.text.strip() for el in thead_trs[1].find_all('th')]
        meanings = ['Season', 'League', 'Team', 'Date'] + [BeautifulSoup(el.get('data-tip').replace('<br>', '; '), "lxml").text if el.has_attr('data-tip') else el.text.strip() for el in thead_trs[1].find_all('th')]
        column = ['Season', 'League', 'Team', 'Date'] + join_prefixes_suffixes(prefixes, column)
        k = 0
        for col in column:
            if col not in doc_columns:
                doc_columns.append(col.replace(" ", '_'))
                doc_meanings.append(meanings[k])
            k += 1
        
        print(len(meanings), len(column))
    
    break

36 36
31 31
35 35
33 33
34 34
26 26


In [28]:
import json
json = json.dumps(dict(zip(doc_columns, doc_meanings)))
f = open("dfs/player_match_reports_column_documentation.json", "w")
f.write(json)
f.close()

In [29]:
dict(zip(doc_columns, doc_meanings))

{'Season': 'Season',
 'League': 'League',
 'Team': 'Team',
 'Date': 'Date',
 'Player': 'Player',
 '#': 'Shirt Number',
 'Nation': 'Nationality of the player.; First, we check our records in international play at senior level.; Then youth level.; Then citizenship presented on wikipedia.; Finally, we use their birthplace when available.',
 'Pos': 'GK - Goalkeepers; DF - Defenders; MF - Midfielders; FW - Forwards; FB - Fullbacks; LB - Left Backs; RB - Right Backs; CB - Center Backs; DM - Defensive Midfielders; CM - Central Midfielders; LM - Left Midfielders; RM - Right Midfielders; WM - Wide Midfielders; LW - Left Wingers; RW - Right Wingers; AM - Attacking Midfielders; ',
 'Age': "Age on date of match; Age is shown in YY-DDD format; Note: Ages are based on the player's age relative to UTC when the web page is first created. This may cause some issues where a player's age differs by one day from what you might expect",
 'Min': 'Minutes',
 'Performance_Gls': 'Goals; Goals scored or allowed