In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from toolz.curried import *
from pandas.testing import assert_frame_equal

In [None]:
df = pd.read_csv('../db/cartola_aggregated.csv')

In [None]:
df.columns

In [None]:
df.sort_values(['AtletaID', 'ano', 'Rodada'], inplace=True)

In [None]:
df['is_next_same'] = df['AtletaID'] == df['AtletaID'].shift(1)

In [None]:
df['Pontos_last1'] = df['Pontos'].shift(1)
df.loc[~(df['is_next_same']), 'Pontos_last1'] = 0 

In [None]:
df.rename(columns={"Pontos": "target"}, inplace=True)

In [None]:
df[['AtletaID', 'ano', 'Rodada','target', 'Pontos_last1', 'ClubeID']].head()

In [None]:
columns_to_average = ['FS', 'PE', 'A','FT', 'FD', 'FF', 'G', 'I', 'PP', 'RB', 'FC', 
 'GC', 'CA', 'CV', 'SG','DD', 'DP', 'GS', 'Pontos_last1', 'Preco']

In [None]:
df_team = df.groupby(['ClubeID','ano', 'Rodada'], as_index=False)[['FS', 'PE', 'A','FT', 'FD', 'FF', 'G', 'I', 'PP', 'RB', 'FC', 
 'GC', 'CA', 'CV', 'SG','DD', 'DP', 'GS', 'Pontos_last1', 'Preco']].mean()

In [None]:
df_posicao = df.groupby(['Posicao', 'ano', 'Rodada'], as_index=False)[columns_to_average].mean()

In [None]:
df_team[['ClubeID', 'Rodada', 'Pontos_last1']].head(20)

In [None]:
df_posicao[['Posicao', 'Rodada', 'Pontos_last1']].head(20)

In [None]:
def getWindowOver(df, overCol, groupCols, aggCols, window_size, na_value, suffix):
    return (df.set_index(overCol).groupby(groupCols, as_index=False)[aggCols]
            .rolling(5, min_periods=0).mean().fillna(na_value)
            .reset_index(drop=True)
            .rename(columns=merge(map(lambda name: {name: name + '_avg_pos_last_5'}, aggCols))))

In [None]:
df_avg_pos_l5 = (df_posicao.set_index('Rodada').groupby(['Posicao', 'ano'], as_index=False)[columns_to_average]
                           .rolling(5, min_periods=0).mean().fillna(0)
                           .reset_index(drop=True)
                           .rename(columns=merge(map(lambda name: {name: name + '_avg_pos_last_5'}, columns_to_average))))

In [None]:
getWindowOver(df_posicao, 'Rodada', ['Posicao', 'ano'], columns_to_average,
                                               5, 0, '_avg_pos').head()

## Making team feature calculation

In [None]:
df_avg_team_l5 = (df_team.set_index('Rodada').groupby(['ClubeID', 'ano'], as_index=False)[columns_to_average]
                           .rolling(5, min_periods=0).mean().fillna(0)
                           .reset_index(drop=True)
                           .rename(columns=merge(map(lambda name: {name: name + '_avg_team_last_5'}, columns_to_average))))

In [None]:
df_avg_team_l10 = (df_team.set_index('Rodada').groupby(['ClubeID', 'ano'], as_index=False)[columns_to_average]
                           .rolling(10, min_periods=0).mean().fillna(0)
                           .reset_index(drop=True)
                           .rename(columns=merge(map(lambda name: {name: name + '_avg_team_last_10'}, columns_to_average))))

In [None]:
df_avg_team_l20 = (df_team.set_index('Rodada').groupby(['ClubeID', 'ano'], as_index=False)[columns_to_average]
                           .rolling(20, min_periods=0).mean().fillna(0)
                           .reset_index(drop=True)
                           .rename(columns=merge(map(lambda name: {name: name + '_avg_team_last_20'}, columns_to_average))))

In [None]:
df_std_team_l5 = (df_team.set_index('Rodada').groupby(['ClubeID', 'ano'], as_index=False)[columns_to_average]
                           .rolling(5, min_periods=0).std().fillna(0)
                           .reset_index(drop=True)
                           .rename(columns=merge(map(lambda name: {name: name + '_avg_team_last_5'}, columns_to_average))))

In [None]:
df_std_team_l10 = (df_team.set_index('Rodada').groupby(['ClubeID', 'ano'], as_index=False)[columns_to_average]
                           .rolling(10, min_periods=0).std().fillna(0)
                           .reset_index(drop=True)
                           .rename(columns=merge(map(lambda name: {name: name + '_avg_team_last_10'}, columns_to_average))))

In [None]:
df_std_team_l20 = (df_team.set_index('Rodada').groupby(['ClubeID', 'ano'], as_index=False)[columns_to_average]
                           .rolling(20, min_periods=0).std().fillna(0)
                           .reset_index(drop=True)
                           .rename(columns=merge(map(lambda name: {name: name + '_avg_team_last_20'}, columns_to_average))))

In [None]:
df_l5 = (df.set_index('Rodada').groupby(['AtletaID','ano'], as_index=False)[columns_to_average]
                           .rolling(5, min_periods=0).mean().fillna(0)
                           .reset_index(drop=True)
                           .rename(columns=merge(map(lambda name: {name: name + 'avg_last_5'}, columns_to_average))))

In [None]:
df_l10 = (df.set_index('Rodada').groupby(['AtletaID','ano'], as_index=False)[columns_to_average]
                           .rolling(10, min_periods=0).mean().fillna(0)
                           .reset_index(drop=True)
                           .rename(columns=merge(map(lambda name: {name: name + 'avg_last_10'}, columns_to_average))))

In [None]:
df_l20 = (df.set_index('Rodada').groupby(['AtletaID','ano'], as_index=False)[columns_to_average]
                           .rolling(20, min_periods=0).mean().fillna(0)
                           .reset_index(drop=True)
                           .rename(columns=merge(map(lambda name: {name: name + 'avg_last_20'}, columns_to_average))))

In [None]:
df_std_l5 = (df.set_index('Rodada').groupby(['AtletaID','ano'], as_index=False)[columns_to_average]
                           .rolling(5, min_periods=0).std().fillna(0)
                           .reset_index(drop=True)
                           .rename(columns=merge(map(lambda name: {name: name + 'std_last_5'}, columns_to_average))))

In [None]:
df_std_l10 = (df.set_index('Rodada').groupby(['AtletaID','ano'], as_index=False)[columns_to_average]
                           .rolling(10, min_periods=0).std().fillna(0)
                           .reset_index(drop=True)
                           .rename(columns=merge(map(lambda name: {name: name + 'std_last_10'}, columns_to_average))))

In [None]:
df_std_l20 = (df.set_index('Rodada').groupby(['AtletaID','ano'], as_index=False)[columns_to_average]
                           .rolling(20, min_periods=0).std().fillna(0)
                           .reset_index(drop=True)
                           .rename(columns=merge(map(lambda name: {name: name + 'std_last_20'}, columns_to_average))))

In [None]:
df_l5 = (df.set_index('Rodada').groupby(['AtletaID','ClubeID', 'ano'], as_index=False)[columns_to_average]
                           .rolling(5, min_periods=0).mean().fillna(0)
                           .reset_index(drop=True)
                           .rename(columns=merge(map(lambda name: {name: name + 'avg_last_5'}, columns_to_average))))

In [None]:
df_avg.sort_values(['AtletaID','ano','Rodada'])[['AtletaID','ano','Rodada'] + ['Pontos', 'avg_points_l5']].head()

In [None]:
df.set_index('Rodada').groupby(['AtletaID','ano'], as_index=False)[['Pontos']].rolling(5, min_periods=0).mean().reset_index(0)

In [None]:
df_2014 = df.loc[df.ano == 2014]

In [None]:
df_2014.columns

In [None]:
df_2014.sort_values(by=['AtletaID', 'Rodada'], inplace=True, ascending=False)

In [None]:
df_2014

In [None]:
df_2014['avg_points_l5'] = df_2014.orderby('AtletaID', 'Rodada').Pontos.rolling(5, on='Rodada').mean()