In [1]:
import pandas as pd
import numpy as np

In [1]:
# FBREF, SoccerPrime, FIFA22

# SoccerPrime is the naming standard

la_liga_teams = [
    ('Real Madrid','Real Madrid','Real Madrid CF'), 
    ('Atlético Madrid','Atletico Madrid','Atlético de Madrid'), 
    ('Real Sociedad','Real Sociedad','Real Sociedad'), 
    ('Sevilla','Sevilla FC','Sevilla FC'), 
    ('Betis','Real Betis','Real Betis Balompié'), 
    # ('Rayo Vallecano',,'Rayo Vallecano'),#
    ('Barcelona', 'FC Barcelona','FC Barcelona'),
    ('Athletic Club','Athletic Bilbao','Athletic Club de Bilbao'),
    # ('Espanyol',,'RCD Espanyol de Barcelona'),#
    ('Osasuna','CA Osasuna','CA Osasuna'),
    ('Valencia','Valencia CF','Valencia CF'),
    ('Villarreal','Villarreal CF','Villarreal CF'),
    ('Celta Vigo','Celta Vigo','RC Celta de Vigo'),
    # ('Mallorca',,'RCD Mallorca'),#
    ('Alavés','Deportivo Alaves','Deportivo Alavés'),
    ('Granada','Granada CF','Granada CF'),
    ('Elche','Elche CF','Elche CF'),
    ('Cádiz','Cádiz CF','Cádiz CF'),
    ('Getafe','Getafe CF','Getafe CF'),
    ('Levante','Levante UD','Levante Unión Deportiva'),
]

In [2]:
def read_and_clean_data():
    df_players = pd.read_csv('players-2.csv') # data about the field players
    df_keepers = pd.read_csv('keepers-2.csv') # data about the keepers
    df_fifa = pd.read_csv('players_22.csv') # statistics from FIFA22

    # desired values from the field players
    df_players = df_players[[
            'season', 'player', 'position', 'squad', 'age', 
            'games', 'minutes', 'goals', 'assists', 
            'cards_yellow', 'cards_red'
    ]]

    # removing the days in age of the players
    age_data = []
    for age in np.array(df_players.age):
        if isinstance(age, int):
            age_data.append(age)
        else:
            age_data.append(int(age[:2]))

    df_players["age"] = np.array(age_data)

    # for consistency of the team names
    for team in la_liga_teams:
        df_players = df_players.replace(team[0], team[1])
    for team in la_liga_teams:
        df_fifa = df_fifa.replace(team[2], team[1])

    return df_players, df_keepers, df_fifa

In [3]:
def get_fifa_data(df_fifa, df_team, team_name):

    def data_check(array, value):
        if len(np.array(value)) > 0:
            array.append(np.array(value)[0])
        else:
            array.append(np.nan)

        return array

    overall_values = []
    potential_values = []

    pace_values = []
    shooting_values = []
    passing_values = []
    dribbling_values = []
    defending_values = []
    physic_values = []

    base = r'^{}'
    expr = '(?=.*{})'

    for name in np.array(df_team.player):
        names = name.split(' ')
        b = base.format(''.join(expr.format(w) for w in names))

        player = df_fifa[df_fifa.long_name.str.contains(b) & 
                        (df_fifa.club_name == team_name)]
        
        overall_values = data_check(overall_values, player.overall)
        potential_values = data_check(potential_values, player.potential)
                                      
        pace_values = data_check(pace_values, player.pace)
        shooting_values = data_check(shooting_values, player.shooting)
        passing_values = data_check(passing_values, player.passing)
        dribbling_values = data_check(dribbling_values, player.dribbling)
        defending_values = data_check(defending_values, player.defending)
        physic_values = data_check(physic_values, player.physic)

    df_team["overall"] = overall_values
    df_team["potential"] = potential_values

    df_team["pace"] = pace_values
    df_team["shooting"] = shooting_values
    df_team["passing"] = passing_values
    df_team["dribbling"] = dribbling_values
    df_team["defending"] = defending_values
    df_team["physic"] = physic_values 

    return df_team

In [4]:
def get_team(season, team_name):

    df_players, df_keepers, df_fifa = read_and_clean_data()

    df_team = df_players[df_players["squad"].str.contains(team_name, na=True)]
    df_team = df_team[df_team.season == season]

    df_team = get_fifa_data(df_fifa, df_team, team_name)

    return df_team

In [10]:
import plotly.io as pio
pio.renderers.default = 'iframe_connected'
get_team(2022, 'Real Betis')


NameError: name 'pd' is not defined

### JSON maker

In [10]:
import json

# makes JSON file for our MongoDB database
def make_json(df_team, team_name, file_name):

    players = df_team.T.to_dict()

    team = [players[key] for key in players]

    final_object = {
        'team': team_name,
        'players': team
    }
        
    with open(file_name, "w") as outfile:
        json.dump(final_object, outfile)