In [71]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [72]:
# PREPOCESAMIENTO DE DATOS


# cargar dataset
df_part = pd.read_csv('../data/partidos_train_ucl_23_todo_24_fg_SEMIS.csv')
df_part.set_index('Match_ID', inplace=True)

# seleccionar variables de interes
df_1 = df_part.copy()
variables = df_1.drop(['Nombre_Eq1','Nombre_Eq2','GANADOR','Year'], axis=1).columns.to_list()
df_1 = df_1[variables]


# ampliar dataset (x4) para ML
#df_3 = df_1.copy()
#for _ in range(2):
#    df_3 = pd.concat([df_3, df_3], axis=0)

print('DATOS LISTOS')
print()


# DIVIDIR TEST Y TRAIN
df_4 = df_1.copy()
caracteristicas = df_4.drop(['Gol_Match_Eq1', 'Gol_Match_Eq2'], axis=1).columns.to_list()
objetivo = ['Gol_Match_Eq1', 'Gol_Match_Eq2']

# dividir 70%-30% en entrenamiento y test
x_train, x_test, y_train, y_test = train_test_split(df_4[caracteristicas].values,
                                                    df_4[objetivo].values,
                                                    test_size=0.3,
                                                    random_state=53)

# escalar variables independientes
scaler = MinMaxScaler()
x_train_sc = scaler.fit_transform(x_train)
x_test_sc = scaler.transform(x_test)

DATOS LISTOS



In [73]:
# APLICAR MODELO REGRESIÓN LINEAL
from sklearn.linear_model import LinearRegression

modelo_regr_lineal = LinearRegression()
modelo_regr_lineal.fit(x_train_sc, y_train)

In [74]:
# PREDICCIONES
predicciones = modelo_regr_lineal.predict(x_test_sc)
predicciones = predicciones.round(3)
predicciones[:5]

array([[2.757, 0.668],
       [0.162, 1.634],
       [2.218, 0.72 ],
       [1.417, 0.637],
       [1.033, 1.616]])

In [75]:
# EVALUACIÓN
from sklearn.metrics import mean_squared_error, r2_score

print('Métricas de evaluación:')
print('MSE:', mean_squared_error(y_test, predicciones))
print('R2:', r2_score(y_test, predicciones))

Métricas de evaluación:
MSE: 1.8069911417910447
R2: 0.1004720255363949


## Predicciones

In [76]:
print(len(caracteristicas))

32


In [77]:
def resultado_partido(estad_eq1, estad_eq2):
    df_partido = pd.DataFrame([estad_eq1+estad_eq2], columns=caracteristicas)
    resultado = modelo_regr_lineal.predict(df_partido.values)

    resultado_eq1 = resultado[0][0].round(5)
    resultado_eq2 = resultado[0][1].round(5)
    return resultado_eq1, resultado_eq2


def resultado_media_final(eq1, eq2):
    '''media de partido de ida y de vuelta'''
    eq1_eq2 = resultado_partido(eq1, eq2)
    eq2_eq1 = resultado_partido(eq2, eq1)
    goles_eq1 = [eq1_eq2[0], eq2_eq1[1]]
    goles_eq2 = [eq1_eq2[1], eq2_eq1[0]]

    media_eq1 = np.mean(goles_eq1).round(3)
    media_eq2 = np.mean(goles_eq2).round(3)

    return media_eq1, media_eq2


### Octavos

In [78]:
psg = [1.55,1.13,2.68,1.4,2.53,23.48,2.57,13.07,4.39,0.25,0.08,0.23,0.86,0.54,3.82]+[3.0]
real_soc = [0.82,0.54,1.36,0.8,1.34,16.7,1.31,9.38,3.03,0.22,0.06,0.18,0.49,0.51,2.48]+[1.0]

resultado_media_final(psg, real_soc)

(122.811, -100.629)

In [79]:
copenh = [1.18,0.8,1.97,1.08,1.87,17.35,2.04,9.58,3.3,0.26,0.08,0.25,1.12,0.52,4.42]+[0.0]
city = [2.09,1.56,3.65,1.82,3.38,26.29,3.86,14.26,5.28,0.28,0.1,0.26,0.68,0.48,2.23]+[4.0]

resultado_media_final(copenh, city)

(-229.338, 173.264)

In [80]:
barca = [1.39,1.26,2.66,1.38,2.64,21.04,2.44,12.06,4.15,0.26,0.09,0.25,0.67,0.53,2.53]+[17.0]
napoli = [1.02,0.53,1.55,0.9,1.43,19.86,1.74,10.84,3.28,0.23,0.06,0.21,1.0,0.52,3.78]+[0.0]

resultado_media_final(barca, napoli)

(970.418, -456.583)

In [81]:
atl_mad = [1.56,1.01,2.57,1.53,2.54,18.95,2.47,10.24,3.66,0.27,0.12,0.32,0.79,0.56,3.46]+[6.0]
inter = [1.22,0.77,1.99,0.94,1.71,19.86,2.07,10.88,3.47,0.24,0.06,0.2,0.58,0.6,3.11]+[9.0]

resultado_media_final(atl_mad, inter)

(95.572, 334.721)

In [82]:
dortmund = [1.17,0.92,2.09,1.03,1.95,19.19,2.12,10.7,3.58,0.25,0.07,0.21,0.57,0.6,2.74]+[4.0]
psv = [1.12,0.75,1.87,0.95,1.7,21.29,1.98,11.64,4.12,0.27,0.06,0.16,1.09,0.46,3.06]+[3.0]

resultado_media_final(dortmund, psv)

(112.847, 18.974)

In [83]:
bay_munch = [1.28,1.0,2.28,1.18,2.18,19.23,2.11,10.82,3.2,0.22,0.08,0.28,0.7,0.57,2.74]+[20.0]
lazio = [0.78,0.5,1.28,0.62,1.12,15.22,1.35,8.49,2.48,0.22,0.06,0.19,0.91,0.52,3.56]+[0.0]

resultado_media_final(bay_munch, lazio)

(1140.846, -497.533)

In [84]:
arsenal = [1.6,1.26,2.86,1.49,2.75,16.63,2.76,9.26,3.58,0.29,0.12,0.32,0.65,0.55,2.66]+[3.0]
porto = [1.66,1.23,2.88,1.53,2.77,18.7,2.67,10.46,4.36,0.32,0.11,0.26,0.88,0.53,3.11]+[2.0]

resultado_media_final(arsenal, porto)

(20.656, -60.327)

In [85]:
real_mad = [1.57,1.35,2.92,1.53,2.88,24.5,2.98,13.2,4.48,0.22,0.08,0.23,0.58,0.54,3.3]+[32.0]
lepizig = [1.21,1.06,2.27,1.18,2.25,17.27,2.04,10.46,3.61,0.23,0.07,0.22,0.92,0.42,3.08]+[1.0]

resultado_media_final(real_mad, lepizig)

(1786.082, -717.738)

### Cuartos

In [86]:
resultado_media_final(dortmund, atl_mad)

(42.788, 182.938)

In [87]:
resultado_media_final(barca, psg)

(844.32, -280.644)

In [88]:
resultado_media_final(arsenal, bay_munch)

(-333.161, 1013.031)

In [89]:
resultado_media_final(real_mad, city)

(1674.773, -538.941)

### Semis

In [90]:
resultado_media_final(barca, atl_mad)

(812.309, -114.581)

In [91]:
resultado_media_final(arsenal, city)

(-63.652, 66.935)

### Final

In [92]:
resultado_media_final(barca,city)

(781.028, -227.739)