In [1]:
# Librerías de siempre
import pandas as pd
import numpy as np
import os
import statsmodels.api as sm
from statsmodels.formula.api import ols
from scipy import stats
import warnings
import openpyxl
warnings.filterwarnings("ignore")

In [2]:
def formatear_base(df):
    equipos = df['Local'].value_counts().index.tolist()
    equipos = pd.DataFrame(equipos, columns=['equipo'])
    equipos['i'] = equipos.index
    dict_equipos = dict(zip(equipos['i'].tolist(), equipos['equipo'].tolist()))
    df = pd.merge(df, equipos, left_on='Local', right_on='equipo', how='left')
    df = df.rename(columns = {'i': 'i_local'}).drop('equipo', 1)
    df = pd.merge(df, equipos, left_on='Visita', right_on='equipo', how='left')
    df = df.rename(columns = {'i': 'i_visita'}).drop('equipo', 1)
    return df

In [3]:
def tabla_final_torneo(db):
    torneo = db['Torneo'].drop_duplicates().tolist()[0]
    df = db.copy().reset_index(drop=True)
    df = formatear_base(df)
    tabla = df[['Local','i_local']].drop_duplicates()
    tabla = tabla.set_index(['i_local'])
    tabla.columns = ['equipo']
    conditions = [
            (df['goles L'] > df['goles V']),
            (df['goles L'] < df['goles V'])]
    choices = ['local', 'visita']
    df = df.join(pd.get_dummies(np.select(conditions, choices, default = 'empate')))
    ghome = df.groupby('i_local')
    gaway = df.groupby('i_visita')
    df_home = pd.DataFrame({'wins_h': ghome['local'].sum(),
                            'draws_h': ghome['empate'].sum(),
                            'losses_h': ghome['visita'].sum(),
                            'gf_h': ghome['goles L'].sum(),
                            'ga_h': ghome['goles V'].sum(),
                            'gd_h': ghome['goles L'].sum() - ghome['goles V'].sum(),
                            'promoccup': ghome['Occupation'].mean()})
    df_away = pd.DataFrame({'wins_a': gaway['visita'].sum(),
                            'draws_a': gaway['empate'].sum(),
                            'losses_a': gaway['local'].sum(),
                            'gf_a': gaway['goles V'].sum(),
                            'ga_a': gaway['goles L'].sum(),
                            'gd_a': gaway['goles V'].sum() - gaway['goles L'].sum()})
    tabla = tabla.join(df_home, how='left').join(df_away,how = 'left').fillna(0)
    tabla['wins'] = tabla.wins_h + tabla.wins_a
    tabla['draws'] = tabla.draws_h + tabla.draws_a
    tabla['losses'] = tabla.losses_h + tabla.losses_a
    tabla['gf'] = tabla.gf_h + tabla.gf_a
    tabla['ga'] = tabla.ga_h + tabla.ga_a
    tabla['gd'] = tabla.gd_h + tabla.gd_a
    tabla['points'] = (tabla['wins']*3 + tabla['draws']).astype(int)
    tabla = tabla.sort_values(by=['points','gd'], ascending = False).reset_index(drop=True)
    tabla['position'] = (tabla.index + 1).astype(int)
    tabla['Torneo'] = torneo
    return tabla[['equipo','Torneo','position','points', 'promoccup']]

In [4]:
def ConstruirDatosPreTratamiento(regdir,
                                 ligasdir,
                                 ligas = ['Alemania','Espana','Francia','Inglaterra','Italia']):
    dfs_pretrat = []
    for liga in ligas:
        archivo = liga + '.xlsx'
        df = pd.read_excel(os.path.join(regdir, archivo))
        torneos = df['Torneo'].drop_duplicates().tolist()
        df_ant = pd.read_excel(os.path.join(ligasdir, archivo))
        torneos_ant = df_ant['Torneo'].drop_duplicates().tolist()[:-1]
        dicttorant = dict(zip(torneos,torneos_ant))
        df['Torneo ant'] = [dicttorant[t] for t in df['Torneo'].tolist()]
        
        df_ant['Occupation'] = np.where(df_ant['Capacity'] > 0,
                                        np.minimum(df_ant['Attendance']/df_ant['Capacity'], 1),
                                        np.nan) 
        dfs_tablas = []
        for torneo in torneos_ant:
            test = tabla_final_torneo(df_ant[df_ant['Torneo'] == torneo].reset_index(drop=True))
            dfs_tablas.append(test)
        df_tablas_ant = pd.concat(dfs_tablas, ignore_index = True)
        df_tablas_ant.columns = ['equipo','Torneo ant','position_ant','points_ant', 'promoccup_ant']
        df = df.merge(df_tablas_ant,
              how = 'left',
              left_on = ['equipo','Torneo ant'],
              right_on = ['equipo', 'Torneo ant'])
        df = df[['equipo','Torneo'] + [i for i in df.columns if 'perc_' in i and 'corr' not in i] + [i for i in df.columns if '_ant' in i]]
        df = df.dropna().reset_index(drop=True)
        dfs_pretrat.append(df)
    return dfs_pretrat

In [5]:
def ConstruirDatosPreTratamiento(regdir,
                                 ligasdir,
                                 ligas = ['Alemania','Espana','Francia','Inglaterra','Italia']):
    dfs_pretrat = []
    for liga in ligas:
        archivo = liga + '.xlsx'
        df = pd.read_excel(os.path.join(regdir, archivo))
        df = df.sort_values(by = 'Torneo', ascending = False).reset_index(drop = True)
        print('Liga: %s, N = %s' % (liga, df.shape[0]))
        torneos = df['Torneo'].drop_duplicates().tolist()
        df_ant = pd.read_excel(os.path.join(ligasdir, archivo))
        df_ant = df_ant.sort_values(by = 'Date', ascending = False).reset_index(drop = True)
        for tor in range(0,6):
            torneos_ant = df_ant['Torneo'].drop_duplicates().tolist()[tor:]
            dicttorant = dict(zip(torneos,torneos_ant))
            df['Torneo ant %s' % tor] = [dicttorant[t] for t in df['Torneo'].tolist()]

            df_ant['Occupation'] = np.where(df_ant['Capacity'] > 0,
                                            np.minimum(df_ant['Attendance']/df_ant['Capacity'], 1),
                                            np.nan) 
            dfs_tablas = []
            for torneo in torneos_ant:
                test = tabla_final_torneo(df_ant[df_ant['Torneo'] == torneo].reset_index(drop=True))
                dfs_tablas.append(test)
            df_tablas_ant = pd.concat(dfs_tablas, ignore_index = True)
            df_tablas_ant.columns = ['equipo','Torneo ant %s' % tor,'position_ant_%s' % tor,'points_ant_%s' % tor, 'promoccup_ant_%s' % tor]
            df = df.merge(df_tablas_ant,
                  how = 'left',
                  left_on = ['equipo','Torneo ant %s' %tor],
                  right_on = ['equipo', 'Torneo ant %s' %tor])
        pretrats = [i for i in df.columns if '_ant' in i]
        percs = [i for i in df.columns if 'perc_' in i and 'corr' not in i]
        categorias = [i for i in df.columns if 'categoria' in i]
        var_inter = ['equipo','Torneo'] + pretrats + percs + categorias
        df['position_ant_0'] = df['position_final']
        df['points_ant_0'] = df['points_final']
        df = df[var_inter + ['position_final','points_final']]
        fillpretrat = {}

        for p in pretrats:
            if 'position' in p:
                fillpretrat[p] = df[p].max() + 1
            elif 'points' in p:
                fillpretrat[p] = df[p].min() - 1
            else:
                fillpretrat[p] = df[p].mean()
        df = df.fillna(value = fillpretrat)
        dfs_pretrat.append(df)
    return dfs_pretrat

In [6]:
%%time
regdir = os.path.join(os.path.pardir,
                      'datos',
                      'regresiones')
ligasdir = os.path.join(os.path.pardir,
                        'datos',
                        'ligas-hist')
dfs_pretrat = ConstruirDatosPreTratamiento(regdir, ligasdir)

Liga: Alemania, N = 270
Liga: Espana, N = 300
Liga: Francia, N = 200
Liga: Inglaterra, N = 504
Liga: Italia, N = 298
CPU times: user 2min 12s, sys: 477 ms, total: 2min 12s
Wall time: 2min 16s


In [7]:
dfs_pretrat[0].shape

(270, 343)

In [8]:
df_test = dfs_pretrat[3][['position_ant_0','position_final']]

In [9]:
df_test[df_test['position_ant_0'] == df_test['position_final']].shape

(504, 2)

In [10]:
df_test.shape

(504, 2)

In [11]:
dfs_pretrat[0].columns.tolist()

['equipo',
 'Torneo',
 'position_ant_0',
 'points_ant_0',
 'promoccup_ant_0',
 'position_ant_1',
 'points_ant_1',
 'promoccup_ant_1',
 'position_ant_2',
 'points_ant_2',
 'promoccup_ant_2',
 'position_ant_3',
 'points_ant_3',
 'promoccup_ant_3',
 'position_ant_4',
 'points_ant_4',
 'promoccup_ant_4',
 'position_ant_5',
 'points_ant_5',
 'promoccup_ant_5',
 'perc_facil_elo_p3_c3',
 'perc_dificil_elo_p3_c3',
 'perc_facil_elo_p4_c3',
 'perc_dificil_elo_p4_c3',
 'perc_facil_elo_p5_c3',
 'perc_dificil_elo_p5_c3',
 'perc_facil_elo_p6_c3',
 'perc_dificil_elo_p6_c3',
 'perc_facil_elo_p3_c4',
 'perc_dificil_elo_p3_c4',
 'perc_facil_elo_p4_c4',
 'perc_dificil_elo_p4_c4',
 'perc_facil_elo_p5_c4',
 'perc_dificil_elo_p5_c4',
 'perc_facil_elo_p6_c4',
 'perc_dificil_elo_p6_c4',
 'perc_facil_elo_p3_c5',
 'perc_dificil_elo_p3_c5',
 'perc_facil_elo_p4_c5',
 'perc_dificil_elo_p4_c5',
 'perc_facil_elo_p5_c5',
 'perc_dificil_elo_p5_c5',
 'perc_facil_elo_p6_c5',
 'perc_dificil_elo_p6_c5',
 'perc_facil_elo_p

In [12]:
# WORKING
# def TablasPreTratamiento(dfs_pretrat, ligas, defin, dific, outputdir):
#     Categories = [c for c in dfs_pretrat[0].columns.tolist() if 'perc' in c and defin in c and dific in c]
#     sheets = [c.replace('perc_','') for c in Categories]
#     Y = [c for c in dfs_pretrat[0].columns.tolist() if '_ant_' in c]
# #     sheets = [y for y in Y]
#     for i in range(len(ligas)):
#         liga = ligas[i].lower()
#         if liga != 'all':
#             df_pretrat = dfs_pretrat[i].round(decimals = 3)
#         else:
#             df_pretrat = pd.concat(dfs_pretrat, ignore_index = True).round(decimals = 3)
#         dfs_tablas = []
# #         for y in Y:
#         for c in Categories:
#             tabla_proms = []
# #             for c in Categories:
#             for y in Y:
#                 df_anova = df_pretrat[[y,c]].dropna().reset_index(drop=True)
#                 df_gb = df_anova.groupby(c).agg({y : 'mean'}).reset_index()
#                 df_gbcount = df_anova.groupby(c).agg({y : 'count'}).reset_index()
#                 df_gb[y] = df_gb[y].round(decimals=3).map(str) + ' (' + df_gbcount[y].map(str) + ')'
# #                 df_gb['Indicador'] = c
#                 df_gb['Pretrat'] = y
# #                 df_gb = df_gb.pivot(index='Indicador', columns = c, values = y).reset_index().reset_index(drop=True).rename_axis(None, axis=1)
#                 df_gb = df_gb.pivot(index='Pretrat', columns = c, values = y).reset_index().reset_index(drop=True).rename_axis(None, axis=1)
#                 formula = '%s ~ C(%s)' % (y,c)
#                 model = ols(formula, data = df_anova).fit()
#                 aov_table = sm.stats.anova_lm(model, typ=2)
#                 df_gb['N'] = df_anova.shape[0]
#                 df_gb['F'] = np.round(aov_table['F'][0], decimals = 3)
#                 df_gb['p-val'] = np.round(aov_table['PR(>F)'][0], decimals = 3)
#                 tabla_proms.append(df_gb)
#             df_tabla = pd.concat(tabla_proms, ignore_index = True)
#             sort_cols = [c for c in df_tabla.columns.tolist() if type(c) != str]
#             sort_cols.sort()
# #             dfs_tablas.append(df_tabla[['Indicador'] + sort_cols + ['N','F','p-val']].round(decimals = 3))
#             df_tabla = df_tabla[['Pretrat'] + sort_cols + ['N','F','p-val']].round(decimals = 3)
#             df_tabla = df_tabla.sort_values(by = 'Pretrat', ascending = True).reset_index(drop=True)
#             dfs_tablas.append(df_tabla)
#         file = 'pretrat-%s-%s-%s.xlsx' % (liga,defin,dific)
#         writer = pd.ExcelWriter(os.path.join(outputdir,file), engine='xlsxwriter')
#         for df, sheet in zip(dfs_tablas, sheets):
#             df.to_excel(writer, sheet_name = sheet, index = False)
#         writer.save()

In [13]:
def FiltrarRegulares(df, defin, c, h, filt):
    if filt == 'facil' or filt == 'dificil':
        cats = [c for c in ['facil','regular','dificil'] if c != filt]
    elif filt == 'regular':
        cats = [filt]
    else:
        cats = ['facil','regular','dificil']
    if defin == 'elo':
        col = 'categoria_%s_c%s' % (defin, c)
        df_filter = df[df[col].isin(cats)]
    elif defin == 'general':
        col = 'categoria_%s_h%s_c%s' % (defin, h, c)
        df_filter = df[df[col].isin(cats)]
    else:
        col1 = 'categoria_local_h%s_c%s' % (h,c)
        col2 = 'categoria_visita_h%s_c%s' % (h,c)
        df_filter = df[(df[col1].isin(cats)) & 
                       (df[col2].isin(cats))]
    return df_filter

In [14]:
def TablasPreTratamiento(dfs_pretrat, ligas, defin, dific, outputdir, filtro):
    Categories = [c for c in dfs_pretrat[0].columns.tolist() if 'perc' in c and defin in c and dific in c]
    sheets = [c.replace('perc_','') for c in Categories]
    Y = [c for c in dfs_pretrat[0].columns.tolist() if '_ant_' in c]
#     sheets = [y for y in Y]
    for i in range(len(ligas)):
        liga = ligas[i].lower()
        if liga != 'all':
            df_pretrat = dfs_pretrat[i].round(decimals = 3)
        else:
            df_pretrat = pd.concat(dfs_pretrat, ignore_index = True).round(decimals = 3)
        dfs_tablas = []
#         for y in Y:
        for var in Categories:
            if filtro == 'dificultad':
                if 'facil' in var:
                    filt = 'facil'
                else:
                    filt = 'dificil'
            else:
                filt = filtro
            if defin != 'elo':
                aux = var.split('_h')[1].split('_p')
                h, c = aux[0], aux[1].split('_c')[1]
            else:
                h, c = None, var.split('_c')[1]
            tabla_proms = []
#             print(df_pretrat.shape)
            df_pretrat_filt = FiltrarRegulares(df_pretrat, defin, c, h, filt)
#             print(df_pretrat_filt.shape)
#             for c in Categories:
            for y in Y:
                df_anova = df_pretrat_filt[[y,var, 'equipo', 'Torneo']].dropna().reset_index(drop=True)
                df_gb = df_anova.groupby(var).agg({y : 'mean'}).reset_index()
                df_gbcount = df_anova.groupby(var).agg({y : 'count'}).reset_index()
                df_gb[y] = df_gb[y].round(decimals=3).map(str) + ' (' + df_gbcount[y].map(str) + ')'
#                 df_gb['Indicador'] = c
                df_gb['Pretrat'] = y
#                 df_gb = df_gb.pivot(index='Indicador', columns = c, values = y).reset_index().reset_index(drop=True).rename_axis(None, axis=1)
                df_gb = df_gb.pivot(index='Pretrat', columns = var, values = y).reset_index().reset_index(drop=True).rename_axis(None, axis=1)

                #Case for anova
#                 formula = '%s ~ C(%s)' % (y,var)
#                 model = ols(formula, data = df_anova).fit()
#                 aov_table = sm.stats.anova_lm(model, typ=2)
#                 df_gb['N'] = df_anova.shape[0]
#                 df_gb['F'] = np.round(aov_table['F'][0], decimals = 3)
#                 df_gb['p-val'] = np.round(aov_table['PR(>F)'][0], decimals = 3)
                # Case for regression
                formula = '%s ~ %s + equipo + Torneo' % (y,var)
                model = ols(formula, data = df_anova).fit(cov_type = 'cluster',
                                                          cov_kwds={'groups': df_anova['equipo']})
                df_gb['N'] = df_anova.shape[0]
                df_gb['Var. trat.'] = df_anova[var].var()
                df_gb['Beta'] = np.round(model.params[var], decimals = 3)
                df_gb['t-stat'] = np.round(model.tvalues[var], decimals = 3)
                df_gb['p-val'] =  np.round(model.pvalues[var], decimals = 3)
                tabla_proms.append(df_gb)
            df_tabla = pd.concat(tabla_proms, ignore_index = True)
            sort_cols = [c for c in df_tabla.columns.tolist() if type(c) != str]
            sort_cols.sort()
#             dfs_tablas.append(df_tabla[['Indicador'] + sort_cols + ['N','F','p-val']].round(decimals = 3))
            df_tabla = df_tabla[['Pretrat'] + sort_cols + ['N','Var. trat.','Beta','t-stat','p-val']].round(decimals = 3)
            df_tabla = df_tabla.sort_values(by = 'Pretrat', ascending = True).reset_index(drop=True)
            dfs_tablas.append(df_tabla)
        file = 'pretrat-%s-%s-%s-%s.xlsx' % (liga,defin,dific,filtro)
        file = file.replace('-.xlsx','.xlsx')
        writer = pd.ExcelWriter(os.path.join(outputdir,file), engine='xlsxwriter')
        for df, sheet in zip(dfs_tablas, sheets):
            df.to_excel(writer, sheet_name = sheet, index = False)
            for i, col in enumerate(df.columns):
                # find length of column i
                column_len = df[col].astype(str).str.len().max()
                # Setting the length if the column header is larger
                # than the max column value length
                column_len = max(column_len, len(str(col))) + 2
                # set the column length
                writer.sheets[sheet].set_column(i, i, column_len)
        writer.save()

In [15]:
%%time
outputdir = os.path.join(os.path.pardir, 'resultados','pretratamiento-tablas-control')
ligas = ['Alemania','Espana','Francia','Inglaterra','Italia', 'All']
# defins = ['elo','general','lv']
defins = ['elo']
difics = ['facil','dificil']
filtros = ['','regular','dificultad']
# filtros = ['','dificultad']

for defin in defins:
    for dific in difics:
        for filtro in filtros:
            TablasPreTratamiento(dfs_pretrat, ligas, defin, dific, outputdir, filtro)

CPU times: user 17min 5s, sys: 8min 8s, total: 25min 13s
Wall time: 7min 45s


In [16]:
# CONCAT

datadir = os.path.join(os.path.pardir, 'resultados','pretratamiento-tablas-control')
outputdir = os.path.join(os.path.pardir, 'resultados','pretratamiento-tablas-resumen')
ligas = ['inglaterra','alemania','espana','francia','italia', 'all']
filtros = ['','regular','dificultad']
summaryfile = 'resumen-pretrat-elo-%s-%s.xlsx'
inputfile = 'pretrat-%s-elo-%s-%s.xlsx'
for filtro in filtros:
    for dif in ['facil','dificil']:
        sheets = ['%s_elo_p%s_c%s' % (dif, i, j) for i in range(3,7) for j in range(3,7)]
        summary = (summaryfile % (dif,filtro)).replace('-.','.')
        writer = pd.ExcelWriter(os.path.join(outputdir,summary), engine='xlsxwriter')
        for sheet in sheets:
            dfs = []
            for liga in ligas:
                inp = (inputfile % (liga, dif,filtro)).replace('-.','.')
                df = pd.read_excel(os.path.join(datadir,inp), sheet_name = sheet)
                df['Liga'] = liga
                dfs.append(df)
            df = pd.concat(dfs, ignore_index = True)
            pretrat_num = ['Pretrat'] + [i for i in df.columns if type(i) != str]
            others = [c for c in df.columns if c not in pretrat_num]
            cols = pretrat_num + others
            df = df[cols]
            df.to_excel(writer, sheet_name = sheet, index = False, freeze_panes = (1,0))
            for i, col in enumerate(df.columns):
                # find length of column i
                column_len = df[col].astype(str).str.len().max()
                # Setting the length if the column header is larger
                # than the max column value length
                column_len = max(column_len, len(str(col))) + 2
                # set the column length
                writer.sheets[sheet].set_column(i, i, column_len)
        writer.save()

In [None]:
# POR AHORA NO SE QUÉ TAN BASURA ES ESTO
# dfs_pretrat[3].shape

# FiltrarRegulares(dfs_pretrat[3], 'elo', 3, None, '').dropna().shape

# tablas_text = """
# """

# # Y = ['position_ant', 'points_ant', 'promoccup_ant']
# # Y_print = ['Posición anterior', 'Puntos anterior', 'Prom. ocupación anterior']
# Y = ['points_ant']
# Y_print = ['Puntos anterior']
# X = ['perc_facil_general_h%s_p%s_c%s',
#      'perc_dificil_general_h%s_p%s_c%s',
#      'perc_facil_lv_h%s_p%s_c%s',
#      'perc_dificil_lv_h%s_p%s_c%s',
#      'perc_facil_elo_p%s_c%s',
#      'perc_dificil_elo_p%s_c%s']
# X_print = ['Frac. Fácil General',
#            'Frac. Difícil General',
#            'Frac. Fácil LV',
#            'Frac. Difícil LV',
#            'Frac. Fácil ELO',
#            'Frac. Difícil ELO']
# X_sheet = ['FFG',
#            'FDG',
#            'FFLV',
#            'FDLV',
#            'FFELO',
#            'FDELO']
# ligas = ['Alemania','Espana','Francia','Inglaterra','Italia', 'All']

# dfs_res = []
# sheets = []
# for h in range(1,6):
#     for k in range(len(Y)):
#         y = Y[k]
#         means = []
#         for j in range(len(X)):
#             agregar = True
#             x = X[j]
#             if 'h' in x:
#                 sheet_n = X_sheet[j] + (' %s anterior' % h)
#             else:
#                 sheet_n = X_sheet[j]
#             for i in range(len(ligas)):
#                 resultados = []
#                 liga = ligas[i]
#                 if liga  == 'All':
#                     df = pd.concat(dfs_pretrat, ignore_index = True)
#                 else:
#                     df = dfs_pretrat[i]
#                 rl = []
#                 for r in [4,5,6,7]:
#                     for l in [3,4,5]:
#                         if h > 1 and 'h' not in x:
#                             agregar = False
#                             pass
#                         elif h < 2 and 'h' not in x:
#                             x_ = x % (r,l)
#                             rl.append("$r = %s, l = %s$" % (r,l))
#                         else:
#                             x_ = x % (h,r,l)
#                             rl.append("$h = %s, r = %s, l = %s$" % (h,r,l))
#                         formula = '%s ~ C(%s)' % (y,x_)
#                         model = ols(formula, data = df).fit()
#                         aov_table = sm.stats.anova_lm(model, typ=2)
#                         resultados.append(np.round(aov_table['PR(>F)'][0], decimals = 3))
#                 if agregar:
#                     if i < 1:
#                         df_res = pd.DataFrame({liga : resultados}, index = rl)
#                     else:
#                         df_res[liga] = resultados
#             if agregar:
#                 tablas_text = tablas_text + df_res.to_latex()
#                 dfs_res.append(df_res)
#                 sheets.append(sheet_n)
#                 print('----------------------------------------------------------------------------------------')
#                 print('Análisis y = %s, x = %s para %s torneos anteriores' % (Y_print[k], X_print[j], h))
# #                 print(df_res.to_latex())
# #             means.append(df_res.mean().tolist())
# #         df_means = pd.DataFrame(data = means, columns = ligas, index = X_print)
# #         print('---------------------------------PROMEDIOS------------------------------------------')
# #         print(df_means.to_latex())
# # with open(os.path.join(os.path.pardir,'Tablas', 'tablaspretreatment.txt'),'w') as tf:
# #     tf.write(tablas_text)

# outputdir = os.path.join(os.path.pardir, 'resultados','pretratamiento')
# writer = pd.ExcelWriter(os.path.join(outputdir,'puntos-anterior.xlsx'), engine='xlsxwriter')
# for df, sheet in zip(dfs_res, sheets):
#     df.to_excel(writer, sheet_name = sheet)
# writer.save()

# # Juntar todas las ligas
# df_ligas = 


# tablas_text = """
# """

# # Y = ['position_ant', 'points_ant', 'promoccup_ant']
# # Y_print = ['Posición anterior', 'Puntos anterior', 'Prom. ocupación anterior']
# Y = ['position_ant']
# Y_print = ['Posicion anterior']
# X = ['perc_facil_general_h%s_p%s_c%s',
#      'perc_dificil_general_h%s_p%s_c%s',
#      'perc_facil_lv_h%s_p%s_c%s',
#      'perc_dificil_lv_h%s_p%s_c%s',
#      'perc_facil_elo_p%s_c%s',
#      'perc_dificil_elo_p%s_c%s']
# X_print = ['Frac. Fácil General',
#            'Frac. Difícil General',
#            'Frac. Fácil LV',
#            'Frac. Difícil LV',
#            'Frac. Fácil ELO',
#            'Frac. Difícil ELO']
# X_sheet = ['FFG',
#            'FDG',
#            'FFLV',
#            'FDLV',
#            'FFELO',
#            'FDELO']
# ligas = ['Alemania','Espana','Francia','Inglaterra','Italia']

# dfs_res = []
# sheets = []
# for h in range(1,6):
#     for k in range(len(Y)):
#         y = Y[k]
#         means = []
#         for j in range(len(X)):
#             agregar = True
#             x = X[j]
#             if 'h' in x:
#                 sheet_n = X_sheet[j] + (' %s anterior' % h)
#             else:
#                 sheet_n = X_sheet[j]
#             for i in range(len(ligas)):
#                 resultados = []
#                 df = df_ligas
#                 rl = []
#                 for r in [4,5,6,7]:
#                     for l in [3,4,5]:
#                         if h > 1 and 'h' not in x:
#                             agregar = False
#                             pass
#                         elif h < 2 and 'h' not in x:
#                             x_ = x % (r,l)
#                             rl.append("$r = %s, l = %s$" % (r,l))
#                         else:
#                             x_ = x % (h,r,l)
#                             rl.append("$h = %s, r = %s, l = %s$" % (h,r,l))
#                         formula = '%s ~ C(%s)' % (y,x_)
#                         model = ols(formula, data = df).fit()
#                         aov_table = sm.stats.anova_lm(model, typ=2)
#                         resultados.append(np.round(aov_table['PR(>F)'][0], decimals = 3))
#                 if agregar:
#                     if i < 1:
#                         df_res = pd.DataFrame({liga : resultados}, index = rl)
#             if agregar:
#                 tablas_text = tablas_text + df_res.to_latex()
#                 dfs_res.append(df_res)
#                 sheets.append(sheet_n)
#                 print('----------------------------------------------------------------------------------------')
#                 print('Análisis y = %s, x = %s para %s torneos anteriores' % (Y_print[k], X_print[j], h))