In [2]:
import pandas as pd
import numpy as np
import warnings
from sklearn.preprocessing import OrdinalEncoder, StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error

In [3]:
df = pd.read_csv(r'./NBA_Temp2022_2023.csv') # importar el archivo csv
df = df.iloc[:, 0:6] # seleccionar solo las primeras 6 columnas del archivo csv
df.head() # imprimir las primeras 5 filas

Unnamed: 0,Team 1,Team 2,Team 1 Points,Team 2 Points,Team 1 Location,Team 2 Location
0,Boston,Philadelphia,126,117,1,0
1,Boston,Miami,111,104,0,1
2,Boston,Orlando,126,120,0,1
3,Boston,Chicago,102,120,0,1
4,Boston,Cleveland,123,132,1,0


In [4]:
# dividimos el dataset en 2, train y test
train_set, test_set = train_test_split(df, test_size=0.2, random_state=42)
X = train_set.drop("Team 2 Points", axis=1)
y = train_set["Team 2 Points"].copy()
X[["Team 1 Points"]] = X[["Team 1 Points"]].astype(int)
X.head()

Unnamed: 0,Team 1,Team 2,Team 1 Points,Team 1 Location,Team 2 Location
273,Denver,Milwaukee,129,1,0
259,Denver,Cleveland,115,0,1
30,Boston,Orlando,92,1,0
22,Boston,Miami,116,1,0
277,Denver,Golden State,112,1,0


In [19]:
# necesitamos convertir el texto a número
# para esto usamos OrdinalEncoder
# así pasamos de p. ej. "Miami" a 0
encoder_team_01 = OrdinalEncoder()
encoder_team_02 = OrdinalEncoder()
X[["Team 1"]] = encoder_team_01.fit_transform(X[["Team 1"]])
X[["Team 2"]] = encoder_team_02.fit_transform(X[["Team 2"]])

# y también necesitamos normalizar los valores (valores entre 0 - 1)
scaler = MinMaxScaler()
# los puntos ahora están en un rango de 0 a 1
X[["Team 1 Points"]] = scaler.fit_transform(X[["Team 1 Points"]])
X.head()

Unnamed: 0,Team 1,Team 2,Team 1 Points,Team 1 Location,Team 2 Location
273,1.0,16.0,0.711864,1,0
259,1.0,5.0,0.474576,0,1
30,0.0,21.0,0.084746,1,0
22,0.0,15.0,0.491525,1,0
277,1.0,9.0,0.423729,1,0


In [6]:
# entrenar primer modelo - LinearRegression (regresión lineal)
lin_reg = LinearRegression()
lin_reg.fit(X, y)

# probar con filas aleatorias del training set
data = X.iloc[:5]
labels = y.iloc[:5]
predictions_lin = lin_reg.predict(data)

lin_metric = mean_squared_error(labels, predictions_lin)
lin_metric_s = np.sqrt(lin_metric)

print("Puntos Equipo 2 (Predicciones): ", predictions_lin)
print("Puntos Equipo 2 (Originales): ", list(labels))
print("Error Promedio: ", lin_metric_s)

Puntos Equipo 2 (Predicciones):  [112.90404719 113.43185953 101.35991236 108.81828498 108.0208285 ]
Puntos Equipo 2 (Originales):  [106, 109, 95, 120, 110]
Error Promedio:  6.880459322098543


In [7]:
# entrenar segundo modelo - DecisionTreeRegressor
tree_reg = DecisionTreeRegressor()
tree_reg.fit(X, y)

# probar con las mismas filas aleatorias del training set
data = X.iloc[:5]
labels = y.iloc[:5]
predictions_tree = tree_reg.predict(data)

tree_metric = mean_squared_error(labels, predictions_tree)
tree_metric_s = np.sqrt(tree_metric)

print("Puntos Equipo 2 (Predicciones): ", predictions_tree)
print("Puntos Equipo 2 (Originales): ", list(labels))
print("Error Promedio: ", tree_metric_s) # error muy bajo, posible overfitting

Puntos Equipo 2 (Predicciones):  [106.  109.   95.  121.5 110. ]
Puntos Equipo 2 (Originales):  [106, 109, 95, 120, 110]
Error Promedio:  0.6708203932499369


In [8]:
# medir error con cross-validation
scores = cross_val_score(tree_reg, X, y, scoring="neg_mean_squared_error", cv=10)
tree_metric_s = np.sqrt(-scores)
print("Error Promedio: ", tree_metric_s.mean()) # el error real es de 13.87 puntos

Error Promedio:  14.102343786741793


In [15]:
# último paso, evaluar ambos modelos con el test set
X_test = test_set.drop("Team 2 Points", axis=1)
y_test = test_set["Team 2 Points"].copy()

# tambien necesitamos modificar X_test para 
# poder hacer predicciones con esos datos
encoder_test_01 = OrdinalEncoder()
encoder_test_02 = OrdinalEncoder()
X_test[["Team 1"]] = encoder_test_01.fit_transform(X_test[["Team 1"]])
X_test[["Team 2"]] = encoder_test_02.fit_transform(X_test[["Team 2"]])
scaler = MinMaxScaler()
X_test[["Team 1 Points"]] = scaler.fit_transform(X_test[["Team 1 Points"]])

final_predictions_lin = lin_reg.predict(X_test) # predicciones con modelo LinearRegression
final_predictions_tree = tree_reg.predict(X_test) # predicciones con modelo DecisionTreeRegressor

# calculamos los errores de las útlimas predicciones
lin_test_metric = mean_squared_error(y_test, final_predictions_lin)
lin_test_metric = np.sqrt(lin_test_metric)
print("Error Promedio Modelo (LinearRegression): ", lin_test_metric)

scores = cross_val_score(tree_reg, X_test, y_test, scoring="neg_mean_squared_error", cv=10)
tree_metric_s = np.sqrt(-scores)
print("Error Promedio Modelo (DecisionTreeRegressor): ", tree_metric_s.mean())

Error Promedio Modelo (LinearRegression):  28.043524216905162
Error Promedio Modelo (DecisionTreeRegressor):  23.587163738641856


In [18]:
warnings.filterwarnings('ignore') # ignorar advertencias
# ahora es momento de predecir el resultado de las finales...
# Team 1: Denver = 1, Miami = 2
# Team 2: Denver = 7, Miami = 15
denver_games = pd.read_csv(r'./Finals.csv') # importar el archivo csv
denver_games = denver_games.drop("Team 2 Points", axis=1)
original_points = denver_games[["Team 1 Points"]].to_numpy()

scaler = MinMaxScaler()
denver_games[["Team 1 Points"]] = scaler.fit_transform(denver_games[["Team 1 Points"]])
points_predictions = lin_reg.predict(denver_games)

denver = 0
miami = 0
print("------------ Resultados -------------")
print("---------- Denver vs Miami ----------")
for i in range(0, len(points_predictions)):
    if denver <= 3 and miami <= 3:
        if original_points[i][0] > int(points_predictions[i]):
            denver += 1
        else:
            miami += 1
        print(f"Juego {i + 1}: Denver {original_points[i][0]} - {int(points_predictions[i])} Miami")
    else:
        break
print("-------------------------------------")

------------ Resultados -------------
---------- Denver vs Miami ----------
Juego 1: Denver 114 - 112 Miami
Juego 2: Denver 109 - 105 Miami
Juego 3: Denver 110 - 103 Miami
Juego 4: Denver 108 - 100 Miami
-------------------------------------
