In [32]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.preprocessing import OneHotEncoder
from scipy.stats import poisson
from sklearn.metrics import confusion_matrix, classification_report, f1_score

# Cargar dataset
df = pd.read_csv("../datasets/dataset_transformado.csv")

In [33]:
df['season'] = df['season'].astype(str)
df['date'] = pd.to_datetime(df['date'])

df_train = df[df['season'].isin(['2019-20', '2020-21', '2021-22', '2022-23'])].reset_index(drop=True)
df_test = df[df['season'] == '2023-24'].reset_index(drop=True)


In [34]:
features_numericas = [
    'home_adv', 'pct_wins', 'avg_goals_scored', 'avg_goals_received', 'goal_difference',
    'pct_wins_rival', 'avg_goals_scored_rival', 'avg_goals_received_rival', 'goal_difference_rival',
    'pct_wins_vs_rival', 'avg_goals_scored_vs_rival', 'avg_goals_received_vs_rival', 'goal_difference_vs_rival',
    'AvgH', 'AvgD', 'AvgA'
]

equipos = sorted(pd.unique(df[['team', 'rival_team']].values.ravel()))

# Codificadores separados
enc_team = OneHotEncoder(categories=[equipos], drop='first', sparse_output=False)
enc_rival = OneHotEncoder(categories=[equipos], drop='first', sparse_output=False)

# Entrenamiento - local
X_team = enc_team.fit_transform(df_train[['team']])
X_rival = enc_rival.fit_transform(df_train[['rival_team']])
team_cols = [f"team_{name}" for name in enc_team.categories_[0][1:]]
rival_cols = [f"rival_{name}" for name in enc_rival.categories_[0][1:]]
X_train_df = pd.concat([
    pd.DataFrame(X_team, columns=team_cols),
    pd.DataFrame(X_rival, columns=rival_cols),
    df_train[features_numericas].reset_index(drop=True)
], axis=1)
X_train_df = sm.add_constant(X_train_df)
y_train_home = df_train['goals_team'].reset_index(drop=True)

model_home = sm.GLM(y_train_home, X_train_df, family=sm.families.Poisson()).fit()


In [35]:
X_rival2 = enc_team.transform(df_train[['rival_team']])
X_team2 = enc_rival.transform(df_train[['team']])
X_train_away_df = pd.concat([
    pd.DataFrame(X_rival2, columns=team_cols),
    pd.DataFrame(X_team2, columns=rival_cols),
    df_train[features_numericas].reset_index(drop=True)
], axis=1)
X_train_away_df = sm.add_constant(X_train_away_df)
y_train_away = df_train['goals_rival'].reset_index(drop=True)

model_away = sm.GLM(y_train_away, X_train_away_df, family=sm.families.Poisson()).fit()


ValueError: The feature names should match those that were passed during fit.
Feature names unseen at fit time:
- rival_team
Feature names seen at fit time, yet now missing:
- team


In [36]:
# Test - local
X_team_test = enc_team.transform(df_test[['team']])
X_rival_test = enc_rival.transform(df_test[['rival_team']])
X_test_df = pd.concat([
    pd.DataFrame(X_team_test, columns=team_cols),
    pd.DataFrame(X_rival_test, columns=rival_cols),
    df_test[features_numericas].reset_index(drop=True)
], axis=1)
X_test_df = sm.add_constant(X_test_df)

# Test - visitante
X_rival_test2 = enc_team.transform(df_test[['rival_team']])
X_team_test2 = enc_rival.transform(df_test[['team']])
X_test_away_df = pd.concat([
    pd.DataFrame(X_rival_test2, columns=team_cols),
    pd.DataFrame(X_team_test2, columns=rival_cols),
    df_test[features_numericas].reset_index(drop=True)
], axis=1)
X_test_away_df = sm.add_constant(X_test_away_df)


ValueError: The feature names should match those that were passed during fit.
Feature names unseen at fit time:
- rival_team
Feature names seen at fit time, yet now missing:
- team


In [None]:
# Predicción
lambda_home = model_home.predict(X_test_df)
mu_away = model_away.predict(X_test_away_df)

# Predicción de resultados
max_goals = 10
factor = 0.97
umbral_empate = 0.29

In [None]:
predicciones = []
for i, row in df_test.iterrows():
    l = lambda_home[i] * factor
    m = mu_away[i] * factor
    matriz = np.outer(poisson.pmf(range(max_goals), l), poisson.pmf(range(max_goals), m))
    p_home = np.tril(matriz, -1).sum()
    p_draw = np.trace(matriz)
    p_away = np.triu(matriz, 1).sum()
    if p_draw >= umbral_empate:
        pred = 'D'
    else:
        pred = 'H' if p_home > p_away else 'A'
    predicciones.append({'P_H': p_home, 'P_D': p_draw, 'P_A': p_away, 'Pred': pred})

df_preds = pd.DataFrame(predicciones)
df_preds['real'] = df_test['result'].map({1: 'H', 0: 'D', -1: 'A'})


In [None]:
# Evaluación
f1 = f1_score(df_preds['real'], df_preds['Pred'], average='macro')
acc = (df_preds['real'] == df_preds['Pred']).mean()

print(f"F1 Score: {f1:.4f}")
print(f"Accuracy: {acc:.4f}")

# Matriz de confusión
cm = confusion_matrix(df_preds['real'], df_preds['Pred'], labels=['H', 'D', 'A'])
print("Confusion Matrix:")
print(cm)


In [None]:
# Apuestas
df_preds[['AvgH', 'AvgD', 'AvgA']] = df_test[['AvgH', 'AvgD', 'AvgA']].reset_index(drop=True)
df_preds['cuota'] = df_preds.apply(lambda r: r['AvgH'] if r['Pred'] == 'H' else (r['AvgD'] if r['Pred'] == 'D' else r['AvgA']), axis=1)
df_preds['acierto'] = (df_preds['Pred'] == df_preds['real']).astype(int)
df_preds['p_pred'] = df_preds.apply(lambda r: r['P_H'] if r['Pred'] == 'H' else (r['P_D'] if r['Pred'] == 'D' else r['P_A']), axis=1)
df_preds['stake'] = df_preds['p_pred'].apply(lambda p: 2 if p >= 0.6 else 1)
df_preds['ganancia'] = df_preds.apply(lambda r: r['cuota'] * r['stake'] if r['acierto'] == 1 else 0, axis=1)

acertadas = df_preds['acierto'].sum()
total_apostado = df_preds['stake'].sum()
total_ganado = df_preds['ganancia'].sum()
beneficio = total_ganado - total_apostado
rentabilidad = (beneficio / total_apostado) * 100

# Resultados
print(f"Apuestas acertadas: {acertadas}")
print(f"Total apostado: {total_apostado}")
print(f"Total ganado: {total_ganado}")
print(f"Beneficio: {beneficio}")
print(f"Rentabilidad: {rentabilidad:.2f}%")

In [None]:

# Log-verosimilitud
goles_home = df_test['goals_team'].values
goles_away = df_test['goals_rival'].values
log_probs = np.log(poisson.pmf(goles_home, lambda_home)) + np.log(poisson.pmf(goles_away, mu_away))
log_likelihood_total = log_probs.sum()
log_likelihood_media = log_likelihood_total / len(df_test)


print(f"Log-verosimilitud total: {log_likelihood_total:.4f}")
print(f"Log-verosimilitud media: {log_likelihood_media:.4f}")