# Data Collection

Collecting data about the **`Copa America 2021's`** participants from **`FBref`**

## Import libraries

In [1]:
import pandas as pd

## Prepare data

In [80]:
countries = {
    'Argentina': 'f9fddd6e',
    'Bolivia': '1bd2760c',
    'Brazil': '304635c3',
    'Chile': '7fd9c2a2',
    'Colombia': 'ab73cfe5',
    'Ecuador': '123acaf8',
    'Paraguay': 'd2043442',
    'Peru': 'f711c854',
    'Uruguay': '870e020f',
    'Venezuela': 'df384984'
}

In [56]:
tables_order = {
    0: 'standard',
    3: 'advance_goalkeeping',
    4: 'shooting',
    5: 'passing',
    6: 'pass_types',
    7: 'goal_and_shot_creation',
    8: 'defensive_actions',
    9: 'possession',
    11: 'misc'
}

## Read and process tables

In [82]:
def get_column_names(table):
    new_cols = []
    for c in table.columns.values:
        if 'Unnamed' in c[0]:
            new_cols.append(c[1].lower())
        else:
            c_new_name = c[0].replace(' ','').lower()
            c_new_name = f"{c_new_name}_{c[1]}"
            new_cols.append(c_new_name.lower())
    return new_cols

In [83]:
for country, code in countries.items():
    # read tables
    print(f"Reading tables of {country}...")
    tables = pd.read_html(f"https://fbref.com/en/squads/{code}/2021/{country.lower()}-Stats")
    # process tables
    print(f"Processing tables of {country}...")
    for idx, table in enumerate(tables):
        if idx in tables_order.keys():
            new_cols = get_column_names(table)
            table.columns = new_cols
            print(f"Saving table {tables_order[idx]}...")
            table.to_csv(f"data/{country.lower()}_{tables_order[idx]}.csv", index=False)

Reading tables of Argentina...
Processing tables of Argentina...
Saving table standard...
Saving table advance_goalkeeping...
Saving table shooting...
Saving table passing...
Saving table pass_types...
Saving table goal_and_shot_creation...
Saving table defensive_actions...
Saving table possession...
Saving table misc...
Reading tables of Bolivia...
Processing tables of Bolivia...
Saving table standard...
Saving table advance_goalkeeping...
Saving table shooting...
Saving table passing...
Saving table pass_types...
Saving table goal_and_shot_creation...
Saving table defensive_actions...
Saving table possession...
Saving table misc...
Reading tables of Brazil...
Processing tables of Brazil...
Saving table standard...
Saving table advance_goalkeeping...
Saving table shooting...
Saving table passing...
Saving table pass_types...
Saving table goal_and_shot_creation...
Saving table defensive_actions...
Saving table possession...
Saving table misc...
Reading tables of Chile...
Processing tab