# DNN

In [42]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Carga los datos
data = pd.read_csv('../../data/partidos_limpio.csv')

# Preparar las etiquetas binarias para los resultados de los partidos
data['Winner'] = (data['Score'].apply(lambda x: int(x.split('-')[0]) > int(x.split('-')[1]))).astype(int)

# Selección de características y datos categóricos para codificación
features = data[['Home', 'Away', 'Referee']]
target = data['Winner']

# Preprocesamiento con OneHotEncoder para los datos categóricos
preprocessor = ColumnTransformer(transformers=[
    ('cat', OneHotEncoder(sparse_output=False, handle_unknown='ignore'), ['Home', 'Away', 'Referee'])
])

# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Transformar los datos con el preprocesador
X_train_transformed = preprocessor.fit_transform(X_train)
X_test_transformed = preprocessor.transform(X_test)

# Modelo de red neuronal
model = Sequential([
    Dense(128, activation='relu', input_shape=[X_train_transformed.shape[1]]),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Compilación del modelo
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Entrenamiento del modelo
model.fit(X_train_transformed, y_train, epochs=50, batch_size=10)

# Evaluación del modelo en el conjunto de prueba
test_accuracy = model.evaluate(X_test_transformed, y_test)[1]
print("Precisión en el conjunto de prueba:", test_accuracy)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Precisión en el conjunto de prueba: 0.5478261113166809


In [43]:
# Cargar y revisar el contenido del archivo actualizado para la temporada 2023-2024
updated_data_path = '../../data/partidos_2023-2024_limpio.csv'
updated_data = pd.read_csv(updated_data_path)
updated_data.head()

Unnamed: 0,Season,Round,Day,Date,Home,Score,Away,Venue,Referee
0,2023-2024,Round of 16,Tue,2024-02-13,RB Leipzig,0-1,Real Madrid,Red Bull Arena,Irfan Peljto
1,2023-2024,Round of 16,Tue,2024-02-13,FC Copenhagen,1-3,Manchester City,Parken,José Sánchez
2,2023-2024,Round of 16,Wed,2024-02-14,Paris S-G,2-0,Real Sociedad,Parc des Princes,Marco Guida
3,2023-2024,Round of 16,Wed,2024-02-14,Lazio,1-0,Bayern Munich,Stadio Olimpico,François Letexier
4,2023-2024,Round of 16,Tue,2024-02-20,PSV Eindhoven,1-1,Dortmund,Philips Stadion,Srđan Jovanović


In [44]:
# Filtrar partidos sin resultado en el score
upcoming_matches = updated_data[updated_data['Score'].isna()]

# Mostrar los partidos próximos para verificar los datos
upcoming_matches


Unnamed: 0,Season,Round,Day,Date,Home,Score,Away,Venue,Referee
24,2023-2024,Semi-finals,Tue,2024-04-30,Bayern Munich,,Real Madrid,Allianz Arena,
25,2023-2024,Semi-finals,Wed,2024-05-01,Dortmund,,Paris S-G,Signal Iduna Park,
26,2023-2024,Semi-finals,Tue,2024-05-07,Paris S-G,,Dortmund,Parc des Princes,
27,2023-2024,Semi-finals,Wed,2024-05-08,Real Madrid,,Bayern Munich,Estadio Santiago Bernabéu,


In [45]:
# Preparar los datos de los próximos partidos para la predicción
features_upcoming = upcoming_matches[['Home', 'Away']]

# Añadir columna 'Referee' con un valor predeterminado si no está presente
if 'Referee' not in features_upcoming.columns:
    features_upcoming['Referee'] = 'Unknown'

# Preparar los datos de los próximos partidos para la predicción
X_upcoming = preprocessor.transform(features_upcoming)

# Hacer las predicciones con el modelo entrenado
predicted_winners = model.predict(X_upcoming)
predicted_winners = (predicted_winners > 0.5).astype(int)

# Asignar las predicciones al dataframe de próximos partidos
upcoming_matches['Predicted Winner'] = predicted_winners
upcoming_matches['Predicted Winner'] = upcoming_matches['Predicted Winner'].map({1: 'Home Wins', 0: 'Away Wins'})

# Mostrar los resultados
print(upcoming_matches[['Home', 'Away', 'Predicted Winner']])


             Home           Away Predicted Winner
24  Bayern Munich    Real Madrid        Away Wins
25       Dortmund      Paris S-G        Home Wins
26      Paris S-G       Dortmund        Home Wins
27    Real Madrid  Bayern Munich        Home Wins


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  features_upcoming['Referee'] = 'Unknown'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  upcoming_matches['Predicted Winner'] = predicted_winners
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  upcoming_matches['Predicted Winner'] = upcoming_matches['Predicted Winner'].map({1: 'Home Wins', 0: 'Away W

In [46]:
# Configurar los datos del partido final
final_match = pd.DataFrame({
    'Home': ['Paris S-G'],
    'Away': ['Bayern Munich'],
    'Referee': ['Unknown']  # Asumiendo que no conocemos el árbitro
})

# Preparar los datos para la predicción
X_final = preprocessor.transform(final_match)

# Hacer la predicción
predicted_final_winner = model.predict(X_final)
predicted_final_winner = (predicted_final_winner > 0.5).astype(int)

# Interpretar la predicción
final_winner = 'Home Wins' if predicted_final_winner else 'Away Wins'
print("Predicted Final Winner: ", 'Paris S-G' if final_winner == 'Home Wins' else ' Bayern Munich')


Predicted Final Winner:   Bayern Munich
