In [832]:
import pandas as pd
import numpy as np
import time
import csv
from datetime import datetime

import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import (
    OneHotEncoder,
    StandardScaler,
    PolynomialFeatures,
)
from sklearn.pipeline import Pipeline

from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import (
    mean_absolute_error,
    mean_squared_error,
    confusion_matrix,
    classification_report,
    roc_curve,
    precision_recall_curve,
    roc_auc_score,
    accuracy_score,
)

df = pd.read_csv("./csv_files/engineered/nfl_games_attendance.csv")
df_corr = pd.read_csv("./csv_files/engineered/nfl_games_attendance_with_coefficients.csv")

### Setup para los modelos con el df regular

In [833]:
X = df.drop("game_attendance", axis=1)
y = df.game_attendance

# Identificar columnas numéricas y categóricas
numerical_features = X.select_dtypes(include=["int64", "float64"]).columns
categorical_features = X.select_dtypes(include=["object"]).columns

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

# Preprocesamiento para variables numéricas (normalización) y categóricas (one-hot encoding)
preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), numerical_features),
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_features),
    ]
)

regular_scores = {
    "efficiency": {
        "linear": 0,
        "poly": 0,
        "lasso": 0,
        "ridge": 0,
    },
    "time": {
        "linear": 0,
        "poly": 0,
        "lasso": 0,
        "ridge": 0,
    },
    "precision": {
        "linear": 0,
        "poly": 0,
        "lasso": 0,
        "ridge": 0,
    },
}

### Modelos entrenados con el df regular

In [834]:
# Crear un pipeline que combine preprocesamiento y modelo de regresión lineal
linear_reg_model = Pipeline(
    steps=[("preprocessor", preprocessor), ("regressor", LinearRegression())]
)

linear_reg_start_time = time.time()

# Entrenar en el set de entrenamiento
linear_reg_model.fit(X_train, y_train)

# Predecir en el set de testeo
linear_reg_y_pred = linear_reg_model.predict(X_test)

linear_reg_end_time = time.time()

# Darle puntaje al modelo
linear_reg_score = linear_reg_model.score(X_test, y_test)
linear_reg_mean_absolute_error = mean_absolute_error(y_test, linear_reg_y_pred)
linear_reg_mean_squared_error = mean_squared_error(y_test, linear_reg_y_pred)

# Calcular tiempo transcurrido
linear_reg_time = linear_reg_end_time - linear_reg_start_time

print(f"Linear Regression Model Time Spent: {linear_reg_time}")
print(f"Linear Regression Model Score: {linear_reg_score:0.3f}")
print(f"Linear Regression Mean Absolute Error: {linear_reg_mean_absolute_error:0.3f}")
print(f"Linear Regression Mean Squared Error: {linear_reg_mean_squared_error:0.3f}")

regular_scores["precision"]["linear"] = linear_reg_score
regular_scores["time"]["linear"] = linear_reg_time

Linear Regression Model Time Spent: 0.044252634048461914
Linear Regression Model Score: 0.839
Linear Regression Mean Absolute Error: 2893.717
Linear Regression Mean Squared Error: 22534995.221


In [835]:
# poly = PolynomialFeatures(degree=1, include_bias=False)

# poly_train = poly.fit_transform(X_train.values.reshape(-1,1))
# poly_test = poly.fit_transform(X_test.values.reshape(-1,1))

# Crear un pipeline que combine preprocesamiento y modelo de regresión lineal
poly_reg_model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('poly', PolynomialFeatures(degree=3, include_bias=False)),
    ('regressor', LinearRegression())
])

poly_reg_start_time = time.time()

# Entrenar en el set de entrenamiento
poly_reg_model.fit(X_train, y_train)

# Predecir en el set de testeo
poly_reg_y_pred = poly_reg_model.predict(X_test)

poly_reg_end_time = time.time()

# Darle puntaje al modelo
poly_reg_score = poly_reg_model.score(X_test, y_test)
poly_reg_mean_absolute_error = mean_absolute_error(y_test, poly_reg_y_pred)
poly_reg_mean_squared_error = mean_squared_error(y_test, poly_reg_y_pred)

# Calcular tiempo transcurrido
poly_reg_time = poly_reg_end_time - poly_reg_start_time

print(f"Poly Regression Model Time Spent: {poly_reg_time}")
print(f'Poly Regression Model Score: {poly_reg_score:0.3f}')
print(f'Poly Regression Mean Absolute Error: {poly_reg_mean_absolute_error:0.3f}')
print(f'Poly Regression Mean Squared Error: {poly_reg_mean_squared_error:0.3f}')

regular_scores["precision"]["poly"] = poly_reg_score
regular_scores["time"]["poly"] = poly_reg_time

Poly Regression Model Time Spent: 8.844347476959229
Poly Regression Model Score: 0.761
Poly Regression Mean Absolute Error: 3320.608
Poly Regression Mean Squared Error: 33494519.870


In [836]:
# Crear un pipeline que combine preprocesamiento y modelo de regresión lasso
lasso_model = Pipeline(
    steps=[("preprocessor", preprocessor), ("regressor", Lasso(alpha=20))]
)

lasso_start_time = time.time()

# Entrenar en el set de entrenamiento
lasso_model.fit(X_train, y_train)

# Predecir en el set de testeo
lasso_y_pred = lasso_model.predict(X_test)

lasso_end_time = time.time()

# Darle puntaje al modelo
lasso_score = lasso_model.score(X_test, y_test)
lasso_mean_absolute_error = mean_absolute_error(y_test, lasso_y_pred)
lasso_mean_squared_error = mean_squared_error(y_test, lasso_y_pred)

# Calcular tiempo transcurrido
lasso_time = lasso_end_time - lasso_start_time

print(f"Lasso Regression Model Time Spent: {lasso_time}")
print(f"Lasso Model Score: {lasso_score:0.3f}")
print(f"Lasso Mean Absolute Error: {lasso_mean_absolute_error:0.3f}")
print(f"Lasso Mean Absolute Error: {lasso_mean_squared_error:0.3f}")

regular_scores["precision"]["lasso"] = lasso_score
regular_scores["time"]["lasso"] = lasso_time

Lasso Regression Model Time Spent: 0.08952546119689941
Lasso Model Score: 0.846
Lasso Mean Absolute Error: 2505.418
Lasso Mean Absolute Error: 21569104.458


In [837]:
# Crear un pipeline que combine preprocesamiento y modelo de regresión ridge
ridge_model = Pipeline(
    steps=[("preprocessor", preprocessor), ("regressor", Ridge(alpha=4))]
)

ridge_start_time = time.time()

# Entrenar en el set de entrenamiento
ridge_model.fit(X_train, y_train)

# Predecir en el set de testeo
ridge_y_pred = ridge_model.predict(X_test)

ridge_end_time = time.time()

# Darle puntaje al modelo
ridge_score = ridge_model.score(X_test, y_test)
ridge_mean_absolute_error = mean_absolute_error(y_test, ridge_y_pred)
ridge_mean_squared_error = mean_squared_error(y_test, ridge_y_pred)

# Calcular tiempo transcurrido
ridge_time = ridge_end_time - ridge_start_time

print(f"Ridge Regression Model Time Spent: {ridge_time}")
print(f"Ridge Model Score: {ridge_score:0.3f}")
print(f"Ridge Mean Absolute Error: {ridge_mean_absolute_error:0.3f}")
print(f"Ridge Mean Absolute Error: {ridge_mean_squared_error:0.3f}")

regular_scores["precision"]["ridge"] = ridge_score
regular_scores["time"]["ridge"] = ridge_time

Ridge Regression Model Time Spent: 0.03755378723144531
Ridge Model Score: 0.835
Ridge Mean Absolute Error: 2646.455
Ridge Mean Absolute Error: 23085661.898


### Setup para los modelos con el df de los coeficientes de correlacion

In [838]:
X_corr = df_corr.drop("game_attendance", axis=1)
y_corr = df_corr.game_attendance

# Identificar columnas numéricas, descartar el resto
corr_numerical_features = X_corr.select_dtypes(include=["int64", "float64"]).columns

X_corr_train, X_corr_test, y_corr_train, y_corr_test = train_test_split(X_corr, y_corr, test_size=0.3)

# Preprocesamiento para variables numéricas (normalización) y categóricas (one-hot encoding)
preprocessor_corr = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), corr_numerical_features),
    ]
)

corr_scores = {
    "efficiency": {
        "linear": 0,
        "poly": 0,
        "lasso": 0,
        "ridge": 0,
    },
    "time": {
        "linear": 0,
        "poly": 0,
        "lasso": 0,
        "ridge": 0,
    },
    "precision": {
        "linear": 0,
        "poly": 0,
        "lasso": 0,
        "ridge": 0,
    },
}

### Modelos entrenados con el df con los coeficientes de correlacion

In [839]:
# # Crear un pipeline que combine preprocesamiento y modelo de regresión lineal
# linear_reg_model_corr = Pipeline(
#     steps=[("preprocessor", preprocessor_corr), ("regressor", LinearRegression())]
# )

# linear_reg_start_time_corr = time.time()

# # Entrenar en el set de entrenamiento
# linear_reg_model_corr.fit(X_corr_train, y_corr_train)

# # Predecir en el set de testeo
# linear_reg_y_pred_corr = linear_reg_model_corr.predict(X_corr_test)

# linear_reg_end_time_corr = time.time()

# # Darle puntaje al modelo
# linear_reg_score_corr = linear_reg_model_corr.score(X_corr_test, y_corr_test)
# linear_reg_mean_absolute_error_corr = mean_absolute_error(y_corr_test, linear_reg_y_pred_corr)
# linear_reg_mean_squared_error_corr = mean_squared_error(y_corr_test, linear_reg_y_pred_corr)

# # Calcular tiempo transcurrido
# linear_reg_time_corr = linear_reg_end_time_corr - linear_reg_start_time_corr

# print(f"Linear Regression Model Time Spent: {linear_reg_time_corr}")
# print(f"Linear Regression Model Score: {linear_reg_score_corr:0.3f}")
# print(f"Linear Regression Mean Absolute Error: {linear_reg_mean_absolute_error_corr:0.3f}")
# print(f"Linear Regression Mean Squared Error: {linear_reg_mean_squared_error_corr:0.3f}")

# corr_scores["precision"]["linear"] = linear_reg_score_corr
# corr_scores["time"]["linear"] = linear_reg_time_corr

In [840]:

# linear_reg_end_time_corr = time.time()

# # Darle puntaje al modelo
# linear_reg_score_corr = linear_reg_model_corr.score(X_corr_test, y_corr_test)
# linear_reg_mean_absolute_error_corr = mean_absolute_error(y_corr_test, linear_reg_y_pred_corr)
# linear_reg_mean_squared_error_corr = mean_squared_error(y_corr_test, linear_reg_y_pred_corr)

# # Calcular tiempo transcurrido
# linear_reg_time_corr = linear_reg_end_time_corr - linear_reg_start_time_corr

# print(f"Linear Regression Model Time Spent: {linear_reg_time_corr}")
# print(f"Linear Regression Model Score: {linear_reg_score_corr:0.3f}")
# print(f"Linear Regression Mean Absolute Error: {linear_reg_mean_absolute_error_corr:0.3f}")
# print(f"Linear Regression Mean Squared Error: {linear_reg_mean_squared_error_corr:0.3f}")

# corr_scores["precision"]["linear"] = linear_reg_score_corr
# corr_scores["time"]["linear"] = linear_reg_time_corr

In [841]:
# # Crear un pipeline que combine preprocesamiento y modelo de regresión ridge
# poly_reg_model_corr = Pipeline(
#     steps=[("preprocessor", preprocessor_corr),
#            ('poly', PolynomialFeatures(degree=3, include_bias=False)),
#            ("regressor", LinearRegression())]
# )

# poly_reg_start_time_corr = time.time()

# # Entrenar en el set de entrenamiento
# poly_reg_model_corr.fit(X_corr_train, y_corr_train)

# # Predecir en el set de testeo
# poly_reg_y_pred_corr = poly_reg_model_corr.predict(X_corr_test)

# poly_reg_end_time_corr = time.time()

# # Darle puntaje al modelo
# poly_reg_score_corr = poly_reg_model_corr.score(X_corr_test, y_corr_test)
# poly_reg_mean_absolute_error_corr = mean_absolute_error(y_corr_test, poly_reg_y_pred_corr)
# poly_reg_mean_squared_erro_corrr = mean_squared_error(y_corr_test, poly_reg_y_pred_corr)

# # Calcular tiempo transcurrido
# poly_reg_time_corr = poly_reg_end_time_corr - poly_reg_start_time_corr

# print(f"Poly Regression Model Time Spent: {poly_reg_time_corr}")
# print(f"Poly Regression Model Score: {poly_reg_score_corr:0.3f}")
# print(f"Poly Regression Mean Absolute Error: {poly_reg_mean_absolute_error_corr:0.3f}")
# print(f"Poly Regression Mean Squared Error: {poly_reg_mean_squared_erro_corrr:0.3f}")

# corr_scores["precision"]["poly"] = poly_reg_score_corr
# corr_scores["time"]["poly"] = poly_reg_time_corr

In [842]:
# # Crear un pipeline que combine preprocesamiento y modelo de regresión lineal
# lasso_model_corr = Pipeline(
#     steps=[("preprocessor", preprocessor_corr), ("regressor", Lasso())]
# )

# lasso_start_time_corr = time.time()

# # Entrenar en el set de entrenamiento
# lasso_model_corr.fit(X_corr_train, y_corr_train)

# # Predecir en el set de testeo
# lasso_y_pred_corr = lasso_model_corr.predict(X_corr_test)

# lasso_end_time_corr = time.time()

# # Darle puntaje al modelo
# lasso_score_corr = lasso_model_corr.score(X_corr_test, y_corr_test)
# lasso_mean_absolute_error_corr = mean_absolute_error(y_corr_test, lasso_y_pred_corr)
# lasso_mean_squared_error_corr = mean_squared_error(y_corr_test, lasso_y_pred_corr)

# # Calcular tiempo transcurrido
# lasso_time_corr = lasso_end_time_corr - lasso_start_time_corr

# print(f"Lasso Regression Model Time Spent: {lasso_time_corr}")
# print(f"Lasso Regression Model Score: {lasso_score_corr:0.3f}")
# print(f"Lasso Regression Mean Absolute Error: {lasso_mean_absolute_error_corr:0.3f}")
# print(f"Lasso Regression Mean Squared Error: {lasso_mean_squared_error_corr:0.3f}")

# corr_scores["precision"]["lasso"] = lasso_score_corr
# corr_scores["time"]["lasso"] = lasso_time_corr

In [843]:
# # Crear un pipeline que combine preprocesamiento y modelo de regresión lineal
# ridge_model_corr = Pipeline(
#     steps=[("preprocessor", preprocessor_corr), ("regressor", Ridge())]
# )

# ridge_start_time_corr = time.time()

# # Entrenar en el set de entrenamiento
# ridge_model_corr.fit(X_corr_train, y_corr_train)

# # Predecir en el set de testeo
# ridge_y_pred_corr = ridge_model_corr.predict(X_corr_test)

# ridge_end_time_corr = time.time()

# # Darle puntaje al modelo
# ridge_score_corr = ridge_model_corr.score(X_corr_test, y_corr_test)
# ridge_mean_absolute_error_corr = mean_absolute_error(y_corr_test, ridge_y_pred_corr)
# ridge_mean_squared_error_corr = mean_squared_error(y_corr_test, ridge_y_pred_corr)

# # Calcular tiempo transcurrido
# ridge_time_corr = ridge_end_time_corr - ridge_start_time_corr

# print(f"Ridge Regression Model Time Spent: {ridge_time_corr}")
# print(f"Ridge Regression Model Score: {ridge_score_corr:0.3f}")
# print(f"Ridge Regression Mean Absolute Error: {ridge_mean_absolute_error_corr:0.3f}")
# print(f"Ridge Regression Mean Squared Error: {ridge_mean_squared_error_corr:0.3f}")

# corr_scores["precision"]["ridge"] = ridge_score_corr
# corr_scores["time"]["ridge"] = ridge_time_corr

### Calcular variables de evaluacion

In [844]:
print(regular_scores)
# print(corr_scores)

# Normalizar la variable de evaluacion de la rapidez, a partir del tiempo de ejecucion

max_time_regular = max(regular_scores["time"], key=regular_scores["time"].get)
for model in regular_scores["time"]:
    regular_scores["time"][model] = regular_scores["time"][model] / regular_scores["time"][max_time_regular]

# max_time_corr = max(corr_scores["time"], key=corr_scores["time"].get)
# for model in corr_scores["time"]:
#     corr_scores["time"][model] = corr_scores["time"][model] / corr_scores["time"][max_time_corr]

# Calcular la eficiencia final de cada modelo

for model in regular_scores["efficiency"]:
    regular_scores["efficiency"][model] = (1 - regular_scores["time"][model]) * 0.2 + (regular_scores["precision"][model]) * 0.8

# for model in corr_scores["efficiency"]:
#     corr_scores["efficiency"][model] = (1 - corr_scores["time"][model]) * 0.2 + (corr_scores["precision"][model]) * 0.8

print("\n",regular_scores)
# print(corr_scores)

fields=[regular_scores["efficiency"]["linear"], regular_scores["time"]["linear"], regular_scores["precision"]["linear"], regular_scores["efficiency"]["poly"], regular_scores["time"]["poly"], regular_scores["precision"]["poly"], regular_scores["efficiency"]["lasso"], regular_scores["time"]["lasso"], regular_scores["precision"]["lasso"], regular_scores["efficiency"]["ridge"], regular_scores["time"]["ridge"], regular_scores["precision"]["ridge"]]

# Guardo registro de los resultados de cada ejecucion
# Esta desactivado porque ya se corrio 50 veces, y el archivo ya tiene suficientes registros
# with open(r"efficiency_registry.csv", "a", newline="") as f:
#     writer = csv.writer(f)
#     writer.writerow(fields)



{'efficiency': {'linear': 0, 'poly': 0, 'lasso': 0, 'ridge': 0}, 'time': {'linear': 0.044252634048461914, 'poly': 8.844347476959229, 'lasso': 0.08952546119689941, 'ridge': 0.03755378723144531}, 'precision': {'linear': 0.8392600703598627, 'poly': 0.7610868467333038, 'lasso': 0.8461496752506843, 'ridge': 0.8353322185037645}}

 {'efficiency': {'linear': 0.8704073576119564, 'poly': 0.6088694773866431, 'lasso': 0.8590146479611676, 'ridge': 0.8607550173567226}, 'time': {'linear': 0.005003493379669474, 'poly': 1.0, 'lasso': 0.08952546119689941, 'ridge': 0.03755378723144531}, 'precision': {'linear': 0.8392600703598627, 'poly': 0.7610868467333038, 'lasso': 0.8461496752506843, 'ridge': 0.8353322185037645}}


### Analizo la eficiencia promedio de cada modelo

In [876]:
efficiency_df = pd.read_csv("./csv_files/engineered/efficiency_registry.csv")

efficiency_df.drop(inplace=True, axis=1, columns=["linear_model_time", "linear_model_precision", "poly_model_time", "poly_model_precision", "lasso_model_time", "lasso_model_precision", "ridge_model_time", "ridge_model_precision"])

print(efficiency_df.describe())

# El promedio de eficiencia mas alto es el del modelo lasso

       linear_model_efficiency  poly_model_efficiency  lasso_model_efficiency  \
count                50.000000              50.000000               50.000000   
mean                  0.864184               0.633419                0.881660   
std                   0.043928               0.053509                0.047845   
min                   0.683922               0.437914                0.691104   
25%                   0.845605               0.599753                0.860093   
50%                   0.873456               0.649747                0.890980   
75%                   0.892674               0.672316                0.918188   
max                   0.923940               0.698956                0.937186   

       ridge_model_efficiency  
count               50.000000  
mean                 0.878381  
std                  0.047976  
min                  0.686176  
25%                  0.859655  
50%                  0.890967  
75%                  0.912302  
max           

### Usar datos propios para generar predicciones

No se utiliza la matriz de correlacion, porque, en caso de ingresar valor nuevos para las variables categoricas (por ejemplo, una nueva fecha), el valor de correlacion a la asistencia no estaria calculado, y tampoco se podria calcular.

Hago las predicciones solamente con el modelo lasso, porque parece ser el mas eficiente. Igualmente, se podrian hacer con cualquiera de los otros

In [845]:
sample_week = 1
sample_home_team = "Chiefs"
sample_away_team = "Ravens"
sample_date = "2024-09-05"
sample_time = "21:40:00"
sample_pts_win = 27
sample_pts_loss = 20
sample_home_team_previous_year_performance = 3.7
sample_away_team_previous_year_performance = 13.2
sample_home_team_current_year_performance = 10.1
sample_away_team_current_year_performance = 4.8
sample_home_team_current_sos = 3.5
sample_away_team_current_sos = 0.2
sample_weather_condition = "Cloudy"

# Serie de diccionarios para mappear los variables no dadas a partir de las dadas (por ejemplo, el estadio y la ciudad a partir del equipo local)
days_week = {
    0: "Mon",
    1: "Tue",
    2: "Wed",
    3: "Thu",
    4: "Fri",
    5: "Sat",
    6: "Sun",
}

home_team_names = {
    "Chiefs": "Kansas City Chiefs",
    "Raiders": "Oakland Raiders",
    "Falcons": "Atlanta Falcons",
    "Bills": "Buffalo Bills",
    "Steelers": "Pittsburgh Steelers",
    "Ravens": "Baltimore Ravens",
    "Eagles": "Philadelphia Eagles",
    "Lions": "Detroit Lions",
    "Jaguars": "Jacksonville Jaguars",
    "Rams": "Los Angeles Rams",
    "Panthers": "Carolina Panthers",
    "Packers": "Green Bay Packers",
    "Cowboys": "Dallas Cowboys",
    "Vikings": "Minnesota Vikings",
    "Broncos": "Denver Broncos",
    "Texans": "Houston Texans",
    "Cardinals": "Arizona Cardinals",
    "Patriots": "New England Patriots",
    "Buccaneers": "Tampa Bay Buccaneers",
    "Titans": "Tennessee Titans",
    "Dolphins": "Miami Dolphins",
    "Seahawks": "Seattle Seahawks",
    "Redskins": "Washington Redskins",
    "Commanders": "Washington Redskins",
    "Jets": "New York Jets",
    "Colts": "Indianapolis Colts",
    "Bears": "Chicago Bears",
    "Saints": "New Orleans Saints",
    "49ers": "San Francisco 49ers",
    "Browns": "Cleveland Browns",
    "Bengals": "Cincinnati Bengals",
    "Chargers": "Los Angeles Chargers",
    "Giants": "New York Giants",
}

stadium_names = {
    "Bears": "Soldier Field",
    "Packers": "Lambeau Field",
    "Chiefs": "Arrowhead",
    "Bills": "Highmark",
    "Saints": "Caesars Superdome",
    "Dolphins": "Hard Rock",
    "Jaguars": "EverBank",
    "Panthers": "Bank of America",
    "Redskins": "Northwest",
    "Ravens": "M&T Bank",
    "Buccaneers": "Raymond James",
    "Browns": "Huntington Bank",
    "Titans": "Nissan",
    "Bengals": "Paycor",
    "Broncos": "Empower Field at Mile High",
    "Steelers": "Acrisure",
    "Lions": "Ford Field",
    "Texans": "NRG",
    "Patriots": "Gillette",
    "Seahawks": "Lumen Field",
    "Eagles": "Lincoln Financial Field",
    "Cardinals": "State Farm",
    "Colts": "Lucas Oil",
    "Cowboys": "AT&T",
    "Jets": "MetLife",
    "Giants": "MetLife",
    "49ers": "Levi's",
    "Vikings": "US Bank",
    "Falcons": "Mercedes-Benz",
    "Raiders": "Oakland Coliseum",
    "Rams": "Los Angeles Memorial Coliseum",
    "Chargers": "Dignity Health Sports Park",
}

stadium_max_capacities = {
    "Bears": 66944,
    "Packers": 81441,
    "Chiefs": 76416,
    "Bills": 80290,
    "Saints": 76468,
    "Dolphins": 70000,
    "Jaguars": 82000,
    "Panthers": 74867,
    "Redskins": 67617,
    "Ravens": 70745,
    "Buccaneers": 74512,
    "Browns": 54147,
    "Titans": 69143,
    "Bengals": 67260,
    "Broncos": 76125,
    "Steelers": 68400,
    "Lions": 70000,
    "Texans": 75000,
    "Patriots": 71000,
    "Seahawks": 72000,
    "Eagles": 75000,
    "Cardinals": 72200,
    "Colts": 70000,
    "Cowboys": 85000,
    "Jets": 83367,
    "Giants": 83367,
    "49ers": 75000,
    "Vikings": 73000,
    "Falcons": 79330,
    "Raiders": 63132,
    "Rams": 93607,
    "Chargers": 27000,
}

stadium_regular_capacities = {
    "Bears": 62500,
    "Packers": 79704,
    "Chiefs": 76416,
    "Bills": 74000,
    "Saints": 73208,
    "Dolphins": 64767,
    "Jaguars": 67814,
    "Panthers": 74867,
    "Redskins": 67617,
    "Ravens": 70745,
    "Buccaneers": 69218,
    "Browns": 50805,
    "Titans": 69143,
    "Bengals": 65515,
    "Broncos": 76125,
    "Steelers": 68400,
    "Lions": 65000,
    "Texans": 72220,
    "Patriots": 65878,
    "Seahawks": 68740,
    "Eagles": 69879,
    "Cardinals": 63400,
    "Colts": 63000,
    "Cowboys": 80000,
    "Jets": 82500,
    "Giants": 82500,
    "49ers": 68500,
    "Vikings": 66655,
    "Falcons": 71000,
    "Raiders": 53200,
    "Rams": 77500,
    "Chargers": 27000,
}

cities = {
    "Patriots": "Foxborough,MA",
    "Packers": "Green_Bay,WI",
    "Chiefs": "Kansas_City,MO",
    "Cowboys": "Arlington,TX",
    "Rams": "Inglewood,CA",
    "Buccaneers": "Tampa,FL",
    "49ers": "Santa_Clara,CA",
    "Bears": "Chicago,IL",
    "Dolphins": "Miami_Gardens,FL",
    "Jets": "East_Rutherford,NJ",
    "Giants": "East_Rutherford,NJ",
    "Raiders": "Paradise,NV",
    "Broncos": "Denver,CO",
    "Seahawks": "Seattle,WA",
    "Cardinals": "Glendale,AZ",
    "Eagles": "Philadelphia,PA",
    "Bengals": "Cincinnati,OH",
    "Browns": "Cleveland,OH",
    "Steelers": "Pittsburgh,PA",
    "Ravens": "Baltimore,MD",
    "Falcons": "Atlanta,GA",
    "Saints": "New_Orleans,LA",
    "Panthers": "Charlotte,NC",
    "Vikings": "Minneapolis,MN",
    "Lions": "Detroit,MI",
    "Colts": "Indianapolis,IN",
    "Texans": "Houston,TX",
    "Titans": "Nashville,TN",
    "Jaguars": "Jacksonville,FL",
    "Bills": "Orchard_Park,NY",
    "Commanders": "Landover,MD",
    "Redskins": "Landover,MD",
    "Chargers": "Inglewood,CA",
}

sample_year = datetime.strptime(sample_date, "%Y-%m-%d").year
sample_day = days_week[datetime.strptime(sample_date, "%Y-%m-%d").weekday()]
sample_winner = home_team_names[sample_home_team]
sample_stadium_name = stadium_names[sample_home_team]
sample_stadium_max_capacity = stadium_max_capacities[sample_home_team]
sample_stadium_regular_capacity = stadium_regular_capacities[sample_home_team]
sample_home_city = cities[sample_home_team]

sample_data = pd.DataFrame(
    [[sample_year, sample_week, sample_winner, sample_day, sample_date, sample_time, sample_pts_win, sample_pts_loss, sample_home_team, sample_away_team, sample_home_team_previous_year_performance, sample_away_team_previous_year_performance, sample_home_team_current_year_performance, sample_away_team_current_year_performance, sample_home_team_current_sos, sample_away_team_current_sos, sample_stadium_name,sample_stadium_max_capacity,sample_stadium_regular_capacity,sample_home_city, sample_weather_condition]],
    columns=["year","week","winner","day","date","time","pts_win","pts_loss","home_team_name","away_team_name","home_team_previous_year_performance","away_team_previous_year_performance","home_team_current_year_performance","away_team_current_year_performance","home_team_current_sos","away_team_current_sos","stadium_name","stadium_max_capacity","stadium_regular_capacity","home_city","weather_condition"])

results = []

# Hace 100 iteraciones de la prediccion para calcular el promedio
for i in range(100):
    # Identificar columnas numéricas y categóricas
    numerical_features = sample_data.select_dtypes(include=["int64", "float64"]).columns
    categorical_features = sample_data.select_dtypes(include=["object"]).columns

    # Volver a calcular sets de entrenamiento y testeo
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

    # Entrenar en el set de entrenamiento
    lasso_model.fit(X_train, y_train)

    # Predecir en el set de testeo
    lasso_sample_data_prediction = lasso_model.predict(sample_data)

    results.append(lasso_sample_data_prediction)

results = np.array(results)
mean_result = np.mean(results)

print(mean_result)
print(results)

