In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor

In [27]:
# PREPOCESAMIENTO DE DATOS


# cargar dataset
df_part = pd.read_csv('../data/partidos_train_ucl_23_todo_24_fg.csv')
df_part.set_index('Match_ID', inplace=True)

# seleccionar variables de interes
df_1 = df_part.copy()
variables = df_1.drop(['Nombre_Eq1', 'Nombre_Eq2', 'GANADOR','Year'], axis=1).columns.to_list()
df_1 = df_1[variables]


# ampliar dataset (x4) para ML
#df_3 = df_1.copy()
#for _ in range(2):
#    df_3 = pd.concat([df_3, df_3], axis=0)

print('DATOS LISTOS')
print()


# DIVIDIR TEST Y TRAIN
df_4 = df_1.copy()
caracteristicas = df_4.drop(['Gol_Match_Eq1', 'Gol_Match_Eq2'], axis=1).columns.to_list()
objetivo = ['Gol_Match_Eq1', 'Gol_Match_Eq2']

# dividir 70%-30% en entrenamiento y test
x_train, x_test, y_train, y_test = train_test_split(df_4[caracteristicas].values,
                                                    df_4[objetivo].values,
                                                    test_size=0.3,
                                                    random_state=53)

DATOS LISTOS



In [28]:
# APLICAR MODELO DE REGRESIÓN

regr_1 = DecisionTreeRegressor(max_depth=2)
regr_1.fit(x_train, y_train)
regr_2 = DecisionTreeRegressor(max_depth=3)
regr_2.fit(x_train, y_train)

In [29]:
# PREDICCIONES  
predict_1 = regr_1.predict(x_test).round(3)
print(predict_1[:3])
print()
predict_2 = regr_2.predict(x_test).round(3)
print(predict_2[:3])

[[1.836 0.673]
 [1.068 2.545]
 [1.836 0.673]]

[[1.222 0.593]
 [0.975 2.725]
 [1.222 0.593]]


In [30]:
# EVALUACIÓN
from sklearn.metrics import mean_squared_error, r2_score

print('Métricas de evaluación REGRESION 1:')
print('MSE:', mean_squared_error(y_test, predict_1))
print('R2:', r2_score(y_test, predict_1))
print()
print('Métricas de evaluación REGRESION 2:')
print('MSE:', mean_squared_error(y_test, predict_2))
print('R2:', r2_score(y_test, predict_2))

Métricas de evaluación REGRESION 1:
MSE: 1.769825753731343
R2: 0.13307803446053773

Métricas de evaluación REGRESION 2:
MSE: 1.6836592686567164
R2: 0.16410189903913103


Vamos a uedarnos con el primer árbol, que da mejores resultados en la evaluación.

## Predicciones

In [31]:
psg = [1.55,1.13,2.68,1.4,2.53,23.48,2.57,13.07,4.39,0.25,0.08,0.23,0.86,0.54,3.82]
real_soc = [0.82,0.54,1.36,0.8,1.34,16.7,1.31,9.38,3.03,0.22,0.06,0.18,0.49,0.51,2.48]

df_partido = pd.DataFrame([psg+real_soc], columns=caracteristicas)
resultado = regr_2.predict(df_partido.values)

resultado_eq1 = resultado[0][0].round(5)
resultado_eq2 = resultado[0][1].round(5)
print(f'resultado: {resultado_eq1} - {resultado_eq2}')


resultado: 1.22222 - 0.59259


In [32]:
def resultado_partido(estad_eq1, estad_eq2):
    '''resultado de un partido entre dos equipos'''
    df_partido = pd.DataFrame([estad_eq1+estad_eq2], columns=caracteristicas)
    resultado = regr_1.predict(df_partido.values)

    resultado_eq1 = resultado[0][0].round(5)
    resultado_eq2 = resultado[0][1].round(5)
    return resultado_eq1, resultado_eq2


def resultado_media_final(eq1, eq2):
    '''media de goles de partido de ida y de vuelta entre dos equipos'''
    result_eq1_eq2 = resultado_partido(eq1, eq2)
    result_eq2_eq1 = resultado_partido(eq2, eq1)
    goles_eq1 = [result_eq1_eq2[0], result_eq2_eq1[1]]
    goles_eq2 = [result_eq1_eq2[1], result_eq2_eq1[0]]

    media_goles_eq1 = np.mean(goles_eq1).round(5)
    media_goles_eq2 = np.mean(goles_eq2).round(5)

    return media_goles_eq1, media_goles_eq2


### Octavos

In [33]:
psg = [1.55,1.13,2.68,1.4,2.53,23.48,2.57,13.07,4.39,0.25,0.08,0.23,0.86,0.54,3.82]
real_soc = [0.82,0.54,1.36,0.8,1.34,16.7,1.31,9.38,3.03,0.22,0.06,0.18,0.49,0.51,2.48]
result_psg_rsoc = resultado_partido(psg, real_soc)
print(result_psg_rsoc)
result_rsoc_psg = resultado_partido(real_soc, psg)
print(result_rsoc_psg)

(1.83636, 0.67273)
(1.83636, 0.67273)


In [None]:
psg = [1.55,1.13,2.68,1.4,2.53,23.48,2.57,13.07,4.39,0.25,0.08,0.23,0.86,0.54,3.82]
real_soc = [0.82,0.54,1.36,0.8,1.34,16.7,1.31,9.38,3.03,0.22,0.06,0.18,0.49,0.51,2.48]
oct1_psg_rsoc = resultado_media_final(psg, real_soc)
print(f'Resultado medio: \nPSG {oct1_psg_rsoc[0]} \nReal Sociedad {oct1_psg_rsoc[1]} ')

copenh = [1.18,0.8,1.97,1.08,1.87,17.35,2.04,9.58,3.3,0.26,0.08,0.25,1.12,0.52,4.42]
city = [2.09,1.56,3.65,1.82,3.38,26.29,3.86,14.26,5.28,0.28,0.1,0.26,0.68,0.48,2.23]

barca = [1.39,1.26,2.66,1.38,2.64,21.04,2.44,12.06,4.15,0.26,0.09,0.25,0.67,0.53,2.53]
napoli = [1.02,0.53,1.55,0.9,1.43,19.86,1.74,10.84,3.28,0.23,0.06,0.21,1.0,0.52,3.78]

atl_mad = [1.56,1.01,2.57,1.53,2.54,18.95,2.47,10.24,3.66,0.27,0.12,0.32,0.79,0.56,3.46]
inter = [1.22,0.77,1.99,0.94,1.71,19.86,2.07,10.88,3.47,0.24,0.06,0.2,0.58,0.6,3.11]

dortmund = [1.17,0.92,2.09,1.03,1.95,19.19,2.12,10.7,3.58,0.25,0.07,0.21,0.57,0.6,2.74]
psv = [1.12,0.75,1.87,0.95,1.7,21.29,1.98,11.64,4.12,0.27,0.06,0.16,1.09,0.46,3.06]

bay_munch = [1.28,1.0,2.28,1.18,2.18,19.23,2.11,10.82,3.2,0.22,0.08,0.28,0.7,0.57,2.74]
lazio = [0.78,0.5,1.28,0.62,1.12,15.22,1.35,8.49,2.48,0.22,0.06,0.19,0.91,0.52,3.56]

arsenal = [1.6,1.26,2.86,1.49,2.75,16.63,2.76,9.26,3.58,0.29,0.12,0.32,0.65,0.55,2.66]
porto = [1.66,1.23,2.88,1.53,2.77,18.7,2.67,10.46,4.36,0.32,0.11,0.26,0.88,0.53,3.11]

real_mad = [1.57,1.35,2.92,1.53,2.88,24.5,2.98,13.2,4.48,0.22,0.08,0.23,0.58,0.54,3.3]
lepizig = [1.21,1.06,2.27,1.18,2.25,17.27,2.04,10.46,3.61,0.23,0.07,0.22,0.92,0.42,3.08]

(1.806, 1.583)

In [13]:


resultado_media_final(copenh, city)

(1.71, 2.79)

In [14]:


resultado_media_final(barca, napoli)

(1.824, 1.154)

In [15]:


resultado_media_final(atl_mad, inter)

(1.769, 1.656)

In [16]:


resultado_media_final(dortmund, psv)

(1.91, 1.404)

In [17]:


resultado_media_final(bay_munch, lazio)

(1.593, 1.083)

In [18]:


resultado_media_final(arsenal, porto)

(2.173, 1.958)

In [19]:


resultado_media_final(real_mad, lepizig)

(2.08, 1.623)

### Cuartos

In [20]:
resultado_media_final(dortmund, atl_mad)

(1.498, 1.7)

In [21]:
resultado_media_final(barca, psg)

(1.53, 1.47)

In [22]:
resultado_media_final(arsenal, bay_munch)

(1.459, 1.147)

In [23]:
resultado_media_final(real_mad, city)

(2.007, 2.129)

### Semis

In [26]:
resultado_media_final(barca, atl_mad)

(1.566, 1.496)

In [27]:
resultado_media_final(arsenal, city)

(1.755, 2.1)

### Final

In [28]:
resultado_media_final(barca,city)

(1.461, 1.818)