# Aleatoriedad en asignación de dificultades

In [1]:
import numpy as np
import pandas as pd
import os
from statsmodels.formula.api import ols
import statsmodels.api as sm
from scipy import stats

In [2]:
def LeerBasesRegresion(datadir, ligas, column_condition):
    dfs = []
    # Primer caso:
    archivo = ligas[0] + '.xlsx'
    df = pd.read_excel(os.path.join(datadir, archivo))
    columns_filter = [i for i in df.columns if column_condition(i)]
    columns_to_keep = ['equipo','Torneo'] + columns_filter
    df = df[columns_to_keep]
    dfs.append(df)
    try:
        for liga in ligas[1:]:
            archivo = liga + '.xlsx'
            df = pd.read_excel(os.path.join(datadir, archivo))
            columns_to_keep = ['equipo','Torneo'] + [i for i in df.columns if column_condition(i)]
            df = df[columns_to_keep]
            dfs.append(df)
    except:
        pass
    return dfs, columns_filter

In [3]:
def SignificanciaVariablesManual(coef, se, pval, replace_spaces = True):
    coef = "%.3f" % (np.round(coef, decimals = 3))
    se = "%.3f" % (np.round(se, decimals = 3))
    if coef > 0:
        output = '_' + str(coef) + ' (' + str(se) + ')'                    
    else:
        output = str(coef) + ' (' + str(se) + ')'
    pval = np.round(pval, 3)
    if pval < 0.01:
        output = output + '***'
    elif pval < 0.05 and pval >= 0.01:
        output = output + '**_'
    elif pval < 0.1 and pval >=0.05:
        output = output + '*__'
    else:
        output = output + '___'
    if replace_spaces:
        output = output.replace("_"," ")
    return output

In [4]:
def ANOVA1ViaIndicadores(datadir, ligas, column_condition):
    dict_replaces = {'_mean': '',
                     '_':' ',
                     'dificil':'Difícil',
                     'dificl' : 'Difícil',
                     'facil': 'Fácil',
                     'regular' : 'Regular',
                     'prom': 'Promedio',
                     'posicion': 'Posición',
                     'primer' : 'Primer',
                     'ronda' : 'Ronda',
                     'gral': 'Gral.',
                     'elo' : 'ELO',
                     'categoria': 'Categoría',
                     'local': 'Local',
                     'visita': 'Visita',
                     'general' : 'Gral.',
                     'corr' : 'Correg.',
                     'perc' : 'Frac.',
                     ' h' : ' Ant. ',
                     ' p': ' Primeros ',
                     ' c': ' Corte ',
                     'lv' : 'LV',
                     'Primer Fácil C': 'Primer Fácil LV C',
                     'Primer Difícil C': 'Primer Difícil LV C'}
    dfs, columns_filter = LeerBasesRegresion(datadir,
                                             ligas,
                                             column_condition)
    columns_facil = [i for i in columns_filter if 'facil' in i]
    columns_dificil = [i for i in columns_filter if 'dificil' in i]
    dictresumen_l = {'Indicador' : columns_facil}
    dfs_resumen = []
    # Columnas de facil
    for i in range(len(ligas)):
        liga = ligas[i]
        resultados = []
        for var in columns_facil:
            formula = var + ' ~ C(equipo)'
            model = ols(formula,
                        data = dfs[i]).fit()
            aov_table = sm.stats.anova_lm(model, typ=2)
            resultados.append(np.round(aov_table['PR(>F)'][0], decimals = 3))
        dictresumen_l[liga] = resultados
    df_facil = pd.DataFrame(data = dictresumen_l)
    df_facil['Indicador'] = df_facil['Indicador'].replace(to_replace = dict_replaces, regex=True)
    dfs_resumen.append(df_facil)
    dictresumen_v = {'Indicador' : columns_dificil}
    for i in range(len(ligas)):
        liga = ligas[i]
        resultados = []
        for var in columns_dificil:
            formula = var + ' ~ C(equipo)'
            model = ols(formula,
                        data = dfs[i]).fit()
            aov_table = sm.stats.anova_lm(model, typ=2)
            resultados.append(np.round(aov_table['PR(>F)'][0], decimals = 3))
        dictresumen_v[liga] = resultados
    df_dificil = pd.DataFrame(data = dictresumen_v)
    df_dificil['Indicador'] = df_dificil['Indicador'].replace(to_replace = dict_replaces, regex=True)
    dfs_resumen.append(df_dificil)
    return dfs_resumen

## Aleatoriedad para indicadores según puntaje ELO

In [5]:
datadir = os.path.join(os.path.pardir, 'datos', 'regresiones')
ligas = ['Alemania','Espana','Francia', 'Inglaterra','Italia']
outputdir = os.path.join(os.path.pardir,'resultados','aleatoriedad-inicio')
dfs_resumen_elo = ANOVA1ViaIndicadores(datadir,
                                       ligas,
                                       lambda x: 'perc' in x and 'elo' in x)

outputfile = 'aleatoriedad-elo.xlsx'
writer = pd.ExcelWriter(os.path.join(outputdir, outputfile), engine='xlsxwriter')
for df, sheet in zip(dfs_resumen_elo, ['Facil','Dificil']):
    df.to_excel(writer, sheet_name = sheet, index = False)
writer.save()

In [6]:
dfs_resumen_elo[0]

Unnamed: 0,Indicador,Alemania,Espana,Francia,Inglaterra,Italia
0,Frac. Fácil ELO Primeros 4 Corte 3,0.121,0.105,0.341,0.532,0.646
1,Frac. Fácil ELO Primeros 5 Corte 3,0.067,0.264,0.381,0.743,0.664
2,Frac. Fácil ELO Primeros 6 Corte 3,0.24,0.015,0.593,0.79,0.844
3,Frac. Fácil ELO Primeros 7 Corte 3,0.048,0.019,0.451,0.891,0.848
4,Frac. Fácil ELO Primeros 4 Corte 4,0.309,0.215,0.537,0.793,0.055
5,Frac. Fácil ELO Primeros 5 Corte 4,0.095,0.234,0.488,0.946,0.113
6,Frac. Fácil ELO Primeros 6 Corte 4,0.406,0.026,0.759,0.989,0.129
7,Frac. Fácil ELO Primeros 7 Corte 4,0.08,0.034,0.315,0.993,0.279
8,Frac. Fácil ELO Primeros 4 Corte 5,0.365,0.012,0.507,0.632,0.19
9,Frac. Fácil ELO Primeros 5 Corte 5,0.169,0.02,0.551,0.783,0.04


In [7]:
dfs_resumen_elo[1]

Unnamed: 0,Indicador,Alemania,Espana,Francia,Inglaterra,Italia
0,Frac. Difícil ELO Primeros 4 Corte 3,0.81,0.001,0.09,0.788,0.009
1,Frac. Difícil ELO Primeros 5 Corte 3,0.489,0.0,0.064,0.264,0.0
2,Frac. Difícil ELO Primeros 6 Corte 3,0.374,0.0,0.087,0.076,0.006
3,Frac. Difícil ELO Primeros 7 Corte 3,0.309,0.001,0.049,0.088,0.01
4,Frac. Difícil ELO Primeros 4 Corte 4,0.777,0.002,0.009,0.68,0.206
5,Frac. Difícil ELO Primeros 5 Corte 4,0.359,0.0,0.003,0.17,0.003
6,Frac. Difícil ELO Primeros 6 Corte 4,0.291,0.0,0.043,0.067,0.145
7,Frac. Difícil ELO Primeros 7 Corte 4,0.14,0.001,0.043,0.04,0.305
8,Frac. Difícil ELO Primeros 4 Corte 5,0.956,0.0,0.021,0.203,0.248
9,Frac. Difícil ELO Primeros 5 Corte 5,0.653,0.0,0.002,0.138,0.037


## Aleatoriedad para indicadores según definición general

In [8]:
datadir = os.path.join(os.path.pardir, 'datos', 'regresiones')
ligas = ['Alemania','Espana','Francia', 'Inglaterra','Italia']
dfs_resumen_general = ANOVA1ViaIndicadores(datadir,
                                       ligas,
                                       lambda x: 'perc' in x and 'general' in x)

outputfile = 'aleatoriedad-general.xlsx'
writer = pd.ExcelWriter(os.path.join(outputdir, outputfile), engine='xlsxwriter')
for df, sheet in zip(dfs_resumen_general, ['Facil','Dificil']):
    df.to_excel(writer, sheet_name = sheet, index = False)
writer.save()

In [9]:
dfs_resumen_general[0]

Unnamed: 0,Indicador,Alemania,Espana,Francia,Inglaterra,Italia
0,Frac. Fácil Gral. Ant. 1 Primeros 4 Corte 3,0.603,0.557,0.403,0.713,0.481
1,Frac. Fácil Gral. Ant. 1 Primeros 5 Corte 3,0.248,0.333,0.619,0.885,0.306
2,Frac. Fácil Gral. Ant. 1 Primeros 6 Corte 3,0.563,0.188,0.871,0.987,0.412
3,Frac. Fácil Gral. Ant. 1 Primeros 7 Corte 3,0.474,0.086,0.693,0.985,0.71
4,Frac. Fácil Gral. Ant. 1 Primeros 4 Corte 4,0.3,0.411,0.674,0.807,0.3
5,Frac. Fácil Gral. Ant. 1 Primeros 5 Corte 4,0.182,0.213,0.695,0.873,0.081
6,Frac. Fácil Gral. Ant. 1 Primeros 6 Corte 4,0.704,0.052,0.869,0.981,0.171
7,Frac. Fácil Gral. Ant. 1 Primeros 7 Corte 4,0.414,0.047,0.866,0.969,0.715
8,Frac. Fácil Gral. Ant. 1 Primeros 4 Corte 5,0.73,0.041,0.679,0.402,0.149
9,Frac. Fácil Gral. Ant. 1 Primeros 5 Corte 5,0.689,0.045,0.851,0.692,0.021


In [10]:
dfs_resumen_general[1]

Unnamed: 0,Indicador,Alemania,Espana,Francia,Inglaterra,Italia
0,Frac. Difícil Gral. Ant. 1 Primeros 4 Corte 3,0.861,0.005,0.135,0.854,0.025
1,Frac. Difícil Gral. Ant. 1 Primeros 5 Corte 3,0.933,0.0,0.183,0.209,0.0
2,Frac. Difícil Gral. Ant. 1 Primeros 6 Corte 3,0.733,0.0,0.406,0.06,0.014
3,Frac. Difícil Gral. Ant. 1 Primeros 7 Corte 3,0.672,0.006,0.237,0.019,0.007
4,Frac. Difícil Gral. Ant. 1 Primeros 4 Corte 4,0.762,0.003,0.027,0.909,0.404
5,Frac. Difícil Gral. Ant. 1 Primeros 5 Corte 4,0.646,0.002,0.027,0.358,0.105
6,Frac. Difícil Gral. Ant. 1 Primeros 6 Corte 4,0.798,0.001,0.084,0.331,0.371
7,Frac. Difícil Gral. Ant. 1 Primeros 7 Corte 4,0.531,0.026,0.101,0.178,0.18
8,Frac. Difícil Gral. Ant. 1 Primeros 4 Corte 5,0.76,0.0,0.083,0.542,0.232
9,Frac. Difícil Gral. Ant. 1 Primeros 5 Corte 5,0.485,0.0,0.011,0.212,0.052


In [11]:
datadir = os.path.join(os.path.pardir, 'datos', 'regresiones')
ligas = ['Alemania','Espana','Francia', 'Inglaterra','Italia']
dfs_resumen_lv = ANOVA1ViaIndicadores(datadir,
                                       ligas,
                                       lambda x: 'perc' in x and 'lv' in x)

outputfile = 'aleatoriedad-localvisita.xlsx'
writer = pd.ExcelWriter(os.path.join(outputdir, outputfile), engine='xlsxwriter')
for df, sheet in zip(dfs_resumen_lv, ['Facil','Dificil']):
    df.to_excel(writer, sheet_name = sheet, index = False)
writer.save()

In [12]:
dfs_resumen_lv[0]

Unnamed: 0,Indicador,Alemania,Espana,Francia,Inglaterra,Italia
0,Frac. Fácil LV Ant. 1 Primeros 4 Corte 3,0.014,0.319,0.111,0.14,0.811
1,Frac. Fácil LV Ant. 1 Primeros 5 Corte 3,0.061,0.129,0.217,0.362,0.705
2,Frac. Fácil LV Ant. 1 Primeros 6 Corte 3,0.254,0.093,0.444,0.287,0.598
3,Frac. Fácil LV Ant. 1 Primeros 7 Corte 3,0.341,0.087,0.261,0.346,0.63
4,Frac. Fácil LV Ant. 1 Primeros 4 Corte 4,0.37,0.134,0.389,0.075,0.782
5,Frac. Fácil LV Ant. 1 Primeros 5 Corte 4,0.521,0.048,0.183,0.121,0.612
6,Frac. Fácil LV Ant. 1 Primeros 6 Corte 4,0.653,0.038,0.488,0.237,0.545
7,Frac. Fácil LV Ant. 1 Primeros 7 Corte 4,0.549,0.035,0.633,0.324,0.648
8,Frac. Fácil LV Ant. 1 Primeros 4 Corte 5,0.726,0.141,0.43,0.16,0.447
9,Frac. Fácil LV Ant. 1 Primeros 5 Corte 5,0.806,0.092,0.149,0.36,0.242


In [13]:
dfs_resumen_lv[1]

Unnamed: 0,Indicador,Alemania,Espana,Francia,Inglaterra,Italia
0,Frac. Difícil LV Ant. 1 Primeros 4 Corte 3,0.743,0.087,0.083,0.468,0.03
1,Frac. Difícil LV Ant. 1 Primeros 5 Corte 3,0.958,0.003,0.05,0.25,0.0
2,Frac. Difícil LV Ant. 1 Primeros 6 Corte 3,0.953,0.001,0.334,0.111,0.008
3,Frac. Difícil LV Ant. 1 Primeros 7 Corte 3,0.859,0.084,0.277,0.056,0.0
4,Frac. Difícil LV Ant. 1 Primeros 4 Corte 4,0.932,0.0,0.079,0.163,0.127
5,Frac. Difícil LV Ant. 1 Primeros 5 Corte 4,0.729,0.0,0.017,0.01,0.005
6,Frac. Difícil LV Ant. 1 Primeros 6 Corte 4,0.665,0.002,0.157,0.032,0.127
7,Frac. Difícil LV Ant. 1 Primeros 7 Corte 4,0.48,0.035,0.145,0.044,0.018
8,Frac. Difícil LV Ant. 1 Primeros 4 Corte 5,0.482,0.032,0.117,0.099,0.029
9,Frac. Difícil LV Ant. 1 Primeros 5 Corte 5,0.244,0.007,0.018,0.006,0.001
