### Notebook responsável por concentrar os métodos de transformações para as análises

In [1]:
import pandas as pd

In [2]:
def vencedor(diff):
    if diff > 0:
        return 'CASA'
    elif diff == 0:
        return 'EMPATE'
    else:
        return 'VISITANTE'

In [3]:
def is_home(home_id, id_club):
    if home_id == id_club:
        return 1
    else:
        return 0

In [4]:
def time_adversario(time_casa, time_visitante,time):
    if time == time_casa:
        return time_visitante
    else:
        return time_casa

In [5]:
def aplica_transformacoes_dataframe_partidas(df_match):
    df_match['diff_goals'] = df_match['home_goal'] - df_match['visitor_goal']
    df_match['vencedor'] = df_match['diff_goals'].map(vencedor)
    df_match['datetime'] = pd.to_datetime(df_match['date'])
    return df_match

In [6]:
def medida_casa_empate_fora(df_match):
    numero_jogos = len(df_match)
    vencedor_casa = len(df_match[df_match['vencedor']=='CASA'])
    empate = len(df_match[df_match['vencedor']=='EMPATE'])
    vencedor_fora = len(df_match[df_match['vencedor']=='VISITANTE'])
    return {
        "Percentual de vitoria CASA": vencedor_casa/numero_jogos,
        "Percentual de EMPATE": empate/numero_jogos,
        "Percentual de vitoria FORA": vencedor_fora/numero_jogos
    }

In [7]:
def __adiciona_coluna_visitor_partida__(df_match, df_scouts):
    df_match.columns = ['match_id', 'round', 'id_club', 'visitor_id', 'date', 'timestamp', 'local', 'valid', 'home_goal', 'visitor_goal', 'year', 'diff_goals', 'vencedor', 'datetime']
    df_scouts = pd.merge(df_scouts, df_match, how = 'left', on = ['round', 'id_club'])
    return df_scouts

In [8]:
def __adiciona_coluna_home_partida__(df_match, df_scouts):
    df_match.columns = ['match_id', 'round', 'home_id', 'id_club', 'date', 'timestamp', 'local', 'valid', 'home_goal', 'visitor_goal', 'year', 'diff_goals', 'vencedor', 'datetime']
    df_scouts = pd.merge(df_scouts, df_match, how='left', on=['round', 'id_club'])
    return df_scouts

In [9]:
def __organizacao_colunas_apos_criacao_visitor_home__(df_scouts):
    df_scouts.head()
    df_scouts['home_id'].fillna(df_scouts['id_club'], inplace= True)
    df_scouts['visitor_id'].fillna(df_scouts['id_club'], inplace= True)
    df_scouts['home_goal_x'].fillna(df_scouts['home_goal_y'], inplace= True)
    df_scouts['visitor_goal_x'].fillna(df_scouts['visitor_goal_y'], inplace= True)
    df_scouts['home_goal'] = df_scouts['home_goal_x']
    df_scouts['visitor_goal'] = df_scouts['visitor_goal_x'] 
    df_scouts = df_scouts[['player_name', 'points', 'id_position', 'id_club', 'has_played',\
           'id_player', 'scout_fc', 'scout_fs', 'scout_pi', 'scout_ff', 'scout_a',\
           'scout_ds', 'scout_de', 'scout_gs', 'scout_fd', 'scout_sg', 'scout_ca',\
           'scout_ft', 'scout_g', 'scout_pc', 'scout_ps', 'scout_pp', 'scout_i',\
           'scout_dp', 'scout_gc', 'scout_cv', 'round', 'visitor_id',\
           'home_id','home_goal', 'visitor_goal']]
    
    return df_scouts

In [10]:
def __adiciona_coluna_casa__(df_scouts):
    df_scouts['CASA'] = df_scouts.apply(lambda row : is_home(row['home_id'], row['id_club']), axis = 1)
    return df_scouts

In [11]:
def __adiciona_coluna_home_clube__(df_scouts, df_club):
    df_club = df_club[['id', 'name']]
    df_club.columns = ['home_id', 'time_casa']
    df_scouts = pd.merge(df_scouts, df_club, how='left', on=['home_id'])
    return df_scouts

In [12]:
def __adiciona_coluna_visitor_clube__(df_scouts, df_club):
    df_club = df_club[['id', 'name']]
    df_club.columns = ['visitor_id', 'time_visitante']
    df_scouts = pd.merge(df_scouts, df_club, how='left', on=['visitor_id'])
    df_club.columns = ['id_club', 'time']
    df_scouts = pd.merge(df_scouts, df_club, how='left', on=['id_club'])
    return df_scouts

In [13]:
def __adiciona_coluna_adversario__(df_scouts):
    df_scouts['adversario'] = df_scouts.apply(lambda row : time_adversario(row['time_casa'], row['time_visitante'], row['time']), axis = 1)
    return df_scouts

In [14]:
def realiza_merge_entre_partidas_scouts_clube(df_match, df_scouts, df_club):
    df_scouts = __adiciona_coluna_visitor_partida__(df_match, df_scouts)
    df_scouts = __adiciona_coluna_home_partida__(df_match, df_scouts)
    df_scouts = __organizacao_colunas_apos_criacao_visitor_home__(df_scouts)
    df_scouts = __adiciona_coluna_casa__(df_scouts)
    df_scouts = __adiciona_coluna_home_clube__(df_scouts, df_club)
    df_scouts = __adiciona_coluna_visitor_clube__(df_scouts, df_club)
    df_scouts = __adiciona_coluna_adversario__(df_scouts)
    return df_scouts

In [15]:
def cria_dataframe_pontuacao(df_scouts):
    df_pontuacao_casa = df_scouts[df_scouts['CASA']==1].groupby(['id_player','time', 'player_name', 'id_club', 'id_position']).agg({'points':'sum', 'has_played':'sum'}).reset_index()
    df_pontuacao_fora = df_scouts[df_scouts['CASA']==0].groupby(['id_player', 'player_name', 'id_club', 'id_position']).agg({'points':'sum', 'has_played':'sum'}).reset_index()
    df_pontuacao_casa.columns=['id_player','time','player_name', 'id_club', 'id_position', 'points_casa', 'has_played_casa']
    df_pontuacao_fora.columns=['id_player','player_name', 'id_club', 'id_position', 'points_fora', 'has_played_fora']
    df_pontuacao = pd.merge(df_pontuacao_casa, df_pontuacao_fora, how='outer', on=['id_player','player_name','id_club','id_position'])
    df_pontuacao.fillna(0, inplace=True)
    df_pontuacao['total_pontos']=df_pontuacao['points_fora']+df_pontuacao['points_casa']
    df_pontuacao['total_jogos']=df_pontuacao['has_played_casa']+df_pontuacao['has_played_fora']
    return df_pontuacao