## Primera parte: extracción de los datos, transformación y limpieza

In [1]:
# Importar librerias

import pandas as pd
import kaggle
from kaggle.api.kaggle_api_extended import KaggleApi

In [2]:
# descargar archivo comprimido con los datasets

!kaggle datasets download -d mujinjo/stephen-curry-stats-20092021-in-nba

stephen-curry-stats-20092021-in-nba.zip: Skipping, found more recently modified local copy (use --force to force download)


In [3]:
# extraer los datasets del archivo comprimido

from zipfile import ZipFile
dataset = './stephen-curry-stats-20092021-in-nba.zip'

with ZipFile(dataset,'r') as z:
    z.extractall()

In [4]:
# asignar variable al dataset con las stats de la temporada regular y ver primeras 5 filas

regularseason = pd.read_csv('Stephen Curry Regularseason stats.csv')
regularseason.head()

Unnamed: 0,Season_year,Season_div,Date,OPP,Result,T Score,O Score,MIN,FG,FGM,...,FTM,FTA,FT%,REB,AST,BLK,STL,PF,TO,PTS
0,2009-2010,Regular,Wed 4/14,POR,W,122,116,48.0,13-25,13,...,12,12,100.0,9,8,1,2,0,2,42
1,2009-2010,Regular,Tue 4/13,UTA,L,94,103,41.0,5-15,5,...,6,6,100.0,5,6,0,2,4,2,17
2,2009-2010,Regular,Sun 4/11,OKC,W,120,117,35.0,9-16,9,...,4,6,66.7,7,7,0,1,2,5,25
3,2009-2010,Regular,Sat 4/10,LAC,L,104,107,41.0,10-19,10,...,4,4,100.0,9,4,0,3,5,3,29
4,2009-2010,Regular,Wed 4/7,MIN,W,116,107,44.0,12-22,12,...,0,0,0.0,8,14,0,7,4,4,27


In [5]:
# ver tipos de datos de la tabla

regularseason.dtypes

Season_year     object
Season_div      object
Date            object
OPP             object
Result          object
T Score          int64
O Score          int64
MIN            float64
FG              object
FGM              int64
FGA              int64
FG%            float64
3PT             object
3PTM             int64
3PTA             int64
3P%            float64
FT              object
FTM              int64
FTA              int64
FT%            float64
REB              int64
AST              int64
BLK              int64
STL              int64
PF               int64
TO               int64
PTS              int64
dtype: object

In [6]:
regularseason.tail()

Unnamed: 0,Season_year,Season_div,Date,OPP,Result,T Score,O Score,MIN,FG,FGM,...,FTM,FTA,FT%,REB,AST,BLK,STL,PF,TO,PTS
761,2020-2021,Regular,Fri 5/21,MEM,L,112,117,47.0,13-28,13,...,7,7,100.0,4,5,0,3,5,7,39
762,2020-2021,Regular,Wed 5/19,LAL,L,100,103,41.0,12-23,12,...,7,8,87.5,7,3,0,0,2,6,37
763,2020-2021,Regular,Thu 12/17,SAC,W,113,105,29.0,9-23,9,...,5,5,100.0,6,3,0,2,2,1,29
764,2020-2021,Regular,Tue 12/15,SAC,L,113,114,28.0,11-21,11,...,2,3,66.7,4,4,1,3,0,4,29
765,2020-2021,Regular,Sat 12/12,DEN,W,107,105,21.0,3-10,3,...,2,2,100.0,1,3,0,2,1,3,10


In [7]:
# transformar las columnas de fechas y años, dividir la columna de season year en primer y segundo año

seasons = regularseason["Season_year"].str.split('-',expand=True)
seasons.columns = ['season_year1', 'season_year2']
regularseason1 = pd.concat([regularseason, seasons], axis=1)
regularseason1.head()

Unnamed: 0,Season_year,Season_div,Date,OPP,Result,T Score,O Score,MIN,FG,FGM,...,FT%,REB,AST,BLK,STL,PF,TO,PTS,season_year1,season_year2
0,2009-2010,Regular,Wed 4/14,POR,W,122,116,48.0,13-25,13,...,100.0,9,8,1,2,0,2,42,2009,2010
1,2009-2010,Regular,Tue 4/13,UTA,L,94,103,41.0,5-15,5,...,100.0,5,6,0,2,4,2,17,2009,2010
2,2009-2010,Regular,Sun 4/11,OKC,W,120,117,35.0,9-16,9,...,66.7,7,7,0,1,2,5,25,2009,2010
3,2009-2010,Regular,Sat 4/10,LAC,L,104,107,41.0,10-19,10,...,100.0,9,4,0,3,5,3,29,2009,2010
4,2009-2010,Regular,Wed 4/7,MIN,W,116,107,44.0,12-22,12,...,0.0,8,14,0,7,4,4,27,2009,2010


In [8]:
# transformar columna date y dejarla en tipo fecha, primero la separo en 3 y luego concateno al dataframe

dates = regularseason["Date"].str.split('[ /]',expand=True)
dates.columns = ['day_name', 'month', 'day']
regularseason2 = pd.concat([regularseason1, dates], axis=1)
regularseason2.head()

Unnamed: 0,Season_year,Season_div,Date,OPP,Result,T Score,O Score,MIN,FG,FGM,...,BLK,STL,PF,TO,PTS,season_year1,season_year2,day_name,month,day
0,2009-2010,Regular,Wed 4/14,POR,W,122,116,48.0,13-25,13,...,1,2,0,2,42,2009,2010,Wed,4,14
1,2009-2010,Regular,Tue 4/13,UTA,L,94,103,41.0,5-15,5,...,0,2,4,2,17,2009,2010,Tue,4,13
2,2009-2010,Regular,Sun 4/11,OKC,W,120,117,35.0,9-16,9,...,0,1,2,5,25,2009,2010,Sun,4,11
3,2009-2010,Regular,Sat 4/10,LAC,L,104,107,41.0,10-19,10,...,0,3,5,3,29,2009,2010,Sat,4,10
4,2009-2010,Regular,Wed 4/7,MIN,W,116,107,44.0,12-22,12,...,0,7,4,4,27,2009,2010,Wed,4,7


In [9]:
regularseason2.dtypes
regularseason2.shape

(766, 32)

In [10]:
# transformo las columnas 'season_year1', 'season_year2', 'month' y 'day' a formato (int)

regularseason2 = regularseason2.astype({'season_year1': 'int32','season_year2': 'int32','month': 'int32','day': 'int32'})
regularseason2.dtypes
regularseason2.shape

(766, 32)

In [11]:
# unir las columnas de day y month con su respectivo año, si el mes es menor o igual a 6, va con season_year2, si no va con
# season_year1
# unir 'month', 'day' y 'season_year' correspondiente

mask = regularseason2['month'] <= 6 # booleano que define meses menores o iguales a 6
regularseason2['year'] = regularseason2['season_year2'] # columna nueva "year"
regularseason2.loc[~mask, 'year'] = regularseason2['season_year1'] # si el booleano es True, será year2, sino year1

date_str = regularseason2['year'].astype(str) + '/' + regularseason2['month'].astype(str) + '/' + regularseason2['day'].astype(str)
regularseason2['date_final'] = pd.to_datetime(date_str)

In [12]:
regularseason2

Unnamed: 0,Season_year,Season_div,Date,OPP,Result,T Score,O Score,MIN,FG,FGM,...,PF,TO,PTS,season_year1,season_year2,day_name,month,day,year,date_final
0,2009-2010,Regular,Wed 4/14,POR,W,122,116,48.0,13-25,13,...,0,2,42,2009,2010,Wed,4,14,2010,2010-04-14
1,2009-2010,Regular,Tue 4/13,UTA,L,94,103,41.0,5-15,5,...,4,2,17,2009,2010,Tue,4,13,2010,2010-04-13
2,2009-2010,Regular,Sun 4/11,OKC,W,120,117,35.0,9-16,9,...,2,5,25,2009,2010,Sun,4,11,2010,2010-04-11
3,2009-2010,Regular,Sat 4/10,LAC,L,104,107,41.0,10-19,10,...,5,3,29,2009,2010,Sat,4,10,2010,2010-04-10
4,2009-2010,Regular,Wed 4/7,MIN,W,116,107,44.0,12-22,12,...,4,4,27,2009,2010,Wed,4,7,2010,2010-04-07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
761,2020-2021,Regular,Fri 5/21,MEM,L,112,117,47.0,13-28,13,...,5,7,39,2020,2021,Fri,5,21,2021,2021-05-21
762,2020-2021,Regular,Wed 5/19,LAL,L,100,103,41.0,12-23,12,...,2,6,37,2020,2021,Wed,5,19,2021,2021-05-19
763,2020-2021,Regular,Thu 12/17,SAC,W,113,105,29.0,9-23,9,...,2,1,29,2020,2021,Thu,12,17,2020,2020-12-17
764,2020-2021,Regular,Tue 12/15,SAC,L,113,114,28.0,11-21,11,...,0,4,29,2020,2021,Tue,12,15,2020,2020-12-15


In [13]:
regularseason2.dtypes

Season_year             object
Season_div              object
Date                    object
OPP                     object
Result                  object
T Score                  int64
O Score                  int64
MIN                    float64
FG                      object
FGM                      int64
FGA                      int64
FG%                    float64
3PT                     object
3PTM                     int64
3PTA                     int64
3P%                    float64
FT                      object
FTM                      int64
FTA                      int64
FT%                    float64
REB                      int64
AST                      int64
BLK                      int64
STL                      int64
PF                       int64
TO                       int64
PTS                      int64
season_year1             int32
season_year2             int32
day_name                object
month                    int32
day                      int32
year    

In [14]:
# Eliminar columnas innecesarias para el analisis

regularseason2 = regularseason2.drop(['Season_div','Date','season_year1','season_year2','day_name'], axis=1)                     
regularseason2.shape

(766, 29)

In [15]:
regularseason2.dtypes

Season_year            object
OPP                    object
Result                 object
T Score                 int64
O Score                 int64
MIN                   float64
FG                     object
FGM                     int64
FGA                     int64
FG%                   float64
3PT                    object
3PTM                    int64
3PTA                    int64
3P%                   float64
FT                     object
FTM                     int64
FTA                     int64
FT%                   float64
REB                     int64
AST                     int64
BLK                     int64
STL                     int64
PF                      int64
TO                      int64
PTS                     int64
month                   int32
day                     int32
year                    int32
date_final     datetime64[ns]
dtype: object

In [16]:
# Eliminamos tambien las columnas 'FG' y '3PT'
regularseason2 = regularseason2.drop(['FG','3PT'], axis=1)                     
regularseason2.shape

(766, 27)

In [17]:
regularseason2.dtypes

Season_year            object
OPP                    object
Result                 object
T Score                 int64
O Score                 int64
MIN                   float64
FGM                     int64
FGA                     int64
FG%                   float64
3PTM                    int64
3PTA                    int64
3P%                   float64
FT                     object
FTM                     int64
FTA                     int64
FT%                   float64
REB                     int64
AST                     int64
BLK                     int64
STL                     int64
PF                      int64
TO                      int64
PTS                     int64
month                   int32
day                     int32
year                    int32
date_final     datetime64[ns]
dtype: object

In [18]:
# Guardamos el dataset en un csv

regularseason2.to_csv('./StephCurry-RegularSeasonClean.csv',index=False)

## Prediccion de números de Steph en la próxima temporada

In [19]:
# Importar librerias faltantes

import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

In [20]:
# Guardamos el dataset en un dataframe "df"

df = regularseason2
df.head()

Unnamed: 0,Season_year,OPP,Result,T Score,O Score,MIN,FGM,FGA,FG%,3PTM,...,AST,BLK,STL,PF,TO,PTS,month,day,year,date_final
0,2009-2010,POR,W,122,116,48.0,13,25,52.0,4,...,8,1,2,0,2,42,4,14,2010,2010-04-14
1,2009-2010,UTA,L,94,103,41.0,5,15,33.3,1,...,6,0,2,4,2,17,4,13,2010,2010-04-13
2,2009-2010,OKC,W,120,117,35.0,9,16,56.3,3,...,7,0,1,2,5,25,4,11,2010,2010-04-11
3,2009-2010,LAC,L,104,107,41.0,10,19,52.6,5,...,4,0,3,5,3,29,4,10,2010,2010-04-10
4,2009-2010,MIN,W,116,107,44.0,12,22,54.5,3,...,14,0,7,4,4,27,4,7,2010,2010-04-07


In [21]:
# Asignamos variables dummy a Result

dummy_result = pd.get_dummies(df["Result"], prefix = "Result")
dummy_result.head()

Unnamed: 0,Result_L,Result_W
0,0,1
1,1,0
2,0,1
3,1,0
4,0,1


In [22]:
# Asignamos variables dummy a OPP

dummy_OPP = pd.get_dummies(df["OPP"], prefix = "OPP")
dummy_OPP.head()

Unnamed: 0,OPP_ATL,OPP_BKN,OPP_BOS,OPP_CHA,OPP_CHI,OPP_CLE,OPP_DAL,OPP_DEN,OPP_DET,OPP_HOU,...,OPP_OKC,OPP_ORL,OPP_PHI,OPP_PHX,OPP_POR,OPP_SAC,OPP_SAS,OPP_TOR,OPP_UTA,OPP_WSH
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
2,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [23]:
# Guardamos en una lista las columnas que tenemos actualmente para luego unirlas a las columnas con variables dummy

column_names = df.columns.values.tolist()
column_names

['Season_year',
 'OPP',
 'Result',
 'T Score',
 'O Score',
 'MIN',
 'FGM',
 'FGA',
 'FG%',
 '3PTM',
 '3PTA',
 '3P%',
 'FT',
 'FTM',
 'FTA',
 'FT%',
 'REB',
 'AST',
 'BLK',
 'STL',
 'PF',
 'TO',
 'PTS',
 'month',
 'day',
 'year',
 'date_final']

In [24]:
# Unimos las columnas con las de variables dummy

df_new = df[column_names].join(dummy_result)
column_names = df_new.columns.values.tolist()
df_new.head()

Unnamed: 0,Season_year,OPP,Result,T Score,O Score,MIN,FGM,FGA,FG%,3PTM,...,STL,PF,TO,PTS,month,day,year,date_final,Result_L,Result_W
0,2009-2010,POR,W,122,116,48.0,13,25,52.0,4,...,2,0,2,42,4,14,2010,2010-04-14,0,1
1,2009-2010,UTA,L,94,103,41.0,5,15,33.3,1,...,2,4,2,17,4,13,2010,2010-04-13,1,0
2,2009-2010,OKC,W,120,117,35.0,9,16,56.3,3,...,1,2,5,25,4,11,2010,2010-04-11,0,1
3,2009-2010,LAC,L,104,107,41.0,10,19,52.6,5,...,3,5,3,29,4,10,2010,2010-04-10,1,0
4,2009-2010,MIN,W,116,107,44.0,12,22,54.5,3,...,7,4,4,27,4,7,2010,2010-04-07,0,1


In [25]:
# Unimos las columnas con las de variables dummy

df_new = df_new[column_names].join(dummy_OPP)
df_new.head()

Unnamed: 0,Season_year,OPP,Result,T Score,O Score,MIN,FGM,FGA,FG%,3PTM,...,OPP_OKC,OPP_ORL,OPP_PHI,OPP_PHX,OPP_POR,OPP_SAC,OPP_SAS,OPP_TOR,OPP_UTA,OPP_WSH
0,2009-2010,POR,W,122,116,48.0,13,25,52.0,4,...,0,0,0,0,1,0,0,0,0,0
1,2009-2010,UTA,L,94,103,41.0,5,15,33.3,1,...,0,0,0,0,0,0,0,0,1,0
2,2009-2010,OKC,W,120,117,35.0,9,16,56.3,3,...,1,0,0,0,0,0,0,0,0,0
3,2009-2010,LAC,L,104,107,41.0,10,19,52.6,5,...,0,0,0,0,0,0,0,0,0,0
4,2009-2010,MIN,W,116,107,44.0,12,22,54.5,3,...,0,0,0,0,0,0,0,0,0,0


In [26]:
df_new.dtypes

Season_year            object
OPP                    object
Result                 object
T Score                 int64
O Score                 int64
MIN                   float64
FGM                     int64
FGA                     int64
FG%                   float64
3PTM                    int64
3PTA                    int64
3P%                   float64
FT                     object
FTM                     int64
FTA                     int64
FT%                   float64
REB                     int64
AST                     int64
BLK                     int64
STL                     int64
PF                      int64
TO                      int64
PTS                     int64
month                   int32
day                     int32
year                    int32
date_final     datetime64[ns]
Result_L                uint8
Result_W                uint8
OPP_ATL                 uint8
OPP_BKN                 uint8
OPP_BOS                 uint8
OPP_CHA                 uint8
OPP_CHI   

In [27]:
# Vamos a predecir la cantidad de puntos que va a meter Steph la proxima temporada, primero definimos variables predictoras

feature_cols = ["MIN", "FGM", "FGA", 
                "3PTM", "3PTA", "FTM", 
                "FTA", "REB", "AST", 
                "BLK", "STL", "TO",
                "month", "day","year",
                "Result_L", "Result_W",                
                "OPP_ATL","OPP_BKN" ,"OPP_BOS",
                "OPP_CHA","OPP_CHI","OPP_CLE",
                "OPP_DAL","OPP_DEN","OPP_DET",
                "OPP_HOU", "OPP_IND","OPP_LAC",
                "OPP_LAL","OPP_MEM" ,"OPP_MIA",
                "OPP_MIL","OPP_MIN","OPP_NOP",
                "OPP_NYK","OPP_OKC" ,"OPP_ORL",
                "OPP_PHI" ,"OPP_PHX","OPP_POR",
                "OPP_SAC","OPP_SAS" ,"OPP_TOR",
                "OPP_UTA" ,"OPP_WSH"]

In [28]:
# Definimos las variables X e Y

X = df_new[feature_cols]
y = df_new["PTS"]

In [29]:
# Creamos el modelo

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [30]:
# Crear modelo de regresión lineal

model = LinearRegression()

In [31]:
# Ajustar modelo a los datos de entrenamiento

regresion_lineal = model.fit(X_train, y_train)

In [32]:
# Evaluar el modelo en los datos de test

score = model.score(X_test, y_test)
print("El coeficiente de determinación R^2 en la muestra de test es:", score)

El coeficiente de determinación R^2 en la muestra de test es: 1.0


In [33]:
# Hacer predicciones sobre los datos de test

y_pred = model.predict(X_test)

In [34]:
# Comparar las predicciones con las etiquetas reales

for pred, real in zip(y_pred, y_test):
    print("Predicción:", pred, "- Etiqueta real:", real)

Predicción: 14.000000000000007 - Etiqueta real: 14
Predicción: 40.99999999999997 - Etiqueta real: 41
Predicción: 16.000000000000025 - Etiqueta real: 16
Predicción: 23.000000000000004 - Etiqueta real: 23
Predicción: 15.000000000000018 - Etiqueta real: 15
Predicción: 37.99999999999996 - Etiqueta real: 38
Predicción: 19.000000000000007 - Etiqueta real: 19
Predicción: 29.999999999999975 - Etiqueta real: 30
Predicción: 20.000000000000007 - Etiqueta real: 20
Predicción: 13.000000000000028 - Etiqueta real: 13
Predicción: 26.000000000000004 - Etiqueta real: 26
Predicción: 24.999999999999996 - Etiqueta real: 25
Predicción: 40.99999999999994 - Etiqueta real: 41
Predicción: 7.000000000000037 - Etiqueta real: 7
Predicción: 19.999999999999996 - Etiqueta real: 20
Predicción: 20.000000000000007 - Etiqueta real: 20
Predicción: 22.999999999999996 - Etiqueta real: 23
Predicción: 25.00000000000001 - Etiqueta real: 25
Predicción: 37.999999999999986 - Etiqueta real: 38
Predicción: 30.999999999999975 - Etiq

In [35]:
# Calcular el error cuadrático medio (MSE) en los datos de test

mse = np.mean((y_pred - y_test) ** 2)
print("El error cuadrático medio (MSE) en la muestra de test es:", mse)

El error cuadrático medio (MSE) en la muestra de test es: 5.4062465601307715e-28


In [36]:
# Hacemos una validación cruzada del modelo
# Realizar validación cruzada con 5 iteraciones

from sklearn.model_selection import cross_val_score

scores = cross_val_score(model, X, y, cv=5)

In [37]:
# Imprimir los puntajes obtenidos en cada iteración

print("Puntajes de validación cruzada:", scores)

Puntajes de validación cruzada: [1. 1. 1. 1. 1.]


In [38]:
# Imprimir el puntaje promedio y la desviación estándar de los puntajes

print("Puntaje promedio:", np.mean(scores))
print("Desviación estándar de los puntajes:", np.std(scores))

Puntaje promedio: 1.0
Desviación estándar de los puntajes: 0.0


In [39]:
# Guardo el modelo creado

import pickle

with open('linear_regression_model.pkl', 'wb') as file:
    pickle.dump(model, file)

In [40]:
# Creo un csv con el dataset sin la columna de puntos

df_no_PTS = df_new.drop(['PTS','Season_year','OPP','Result','T Score','O Score','date_final','FG%','3P%','FT%',
                         'FT','FT%','PF'], axis=1)
df_no_PTS.shape

(766, 46)

In [41]:
df_no_PTS.to_csv('./StephCurry-RegularSeason_noPTS.csv',index=False)

In [42]:
# Cargo el dataset sin los PTS

test_data = pd.read_csv('./StephCurry-RegularSeason_noPTS.csv')
test_data.shape

(766, 46)

In [43]:
# Obtenemos los coeficientes y el término independiente de la función de regresión lineal

coeficientes = regresion_lineal.coef_
intercepto = regresion_lineal.intercept_

In [44]:
# Imprimimos los resultados

print("Coeficientes:", coeficientes)
print("Término independiente:", intercepto)

Coeficientes: [ 2.46171371e-16  2.00000000e+00 -3.83058457e-16  1.00000000e+00
 -2.11461766e-15  1.00000000e+00 -2.44434590e-15 -5.04530706e-18
 -4.18841226e-16  9.34258915e-16  2.97339643e-17  8.48071604e-17
  3.04289251e-16  4.85360847e-16 -5.45220271e-16  5.22811158e-04
  5.22811158e-04  3.69053822e-05  3.69053822e-05  3.69053822e-05
  3.69053822e-05  3.69053822e-05  3.69053822e-05  3.69053822e-05
  3.69053822e-05  3.69053822e-05  3.69053822e-05  3.69053822e-05
  3.69053822e-05  3.69053822e-05  3.69053822e-05  3.69053822e-05
  3.69053822e-05  3.69053822e-05  3.69053822e-05  3.69053822e-05
  3.69053822e-05  3.69053822e-05  3.69053822e-05  3.69053822e-05
  3.69053822e-05  3.69053822e-05  3.69053822e-05  3.69053822e-05
  3.69053822e-05  3.69053822e-05]
Término independiente: -0.000559716539466848


In [45]:
# Predecir los PTS en el dataset sin la columna PTS

X = test_data[feature_cols]

test_data["prediction_PTS"] = model.predict(pd.DataFrame(test_data[feature_cols]))

In [46]:
test_data

Unnamed: 0,MIN,FGM,FGA,3PTM,3PTA,FTM,FTA,REB,AST,BLK,...,OPP_ORL,OPP_PHI,OPP_PHX,OPP_POR,OPP_SAC,OPP_SAS,OPP_TOR,OPP_UTA,OPP_WSH,prediction_PTS
0,48.0,13,25,4,6,12,12,9,8,1,...,0,0,0,1,0,0,0,0,0,42.0
1,41.0,5,15,1,4,6,6,5,6,0,...,0,0,0,0,0,0,0,1,0,17.0
2,35.0,9,16,3,6,4,6,7,7,0,...,0,0,0,0,0,0,0,0,0,25.0
3,41.0,10,19,5,10,4,4,9,4,0,...,0,0,0,0,0,0,0,0,0,29.0
4,44.0,12,22,3,5,0,0,8,14,0,...,0,0,0,0,0,0,0,0,0,27.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
761,47.0,13,28,6,15,7,7,4,5,0,...,0,0,0,0,0,0,0,0,0,39.0
762,41.0,12,23,6,9,7,8,7,3,0,...,0,0,0,0,0,0,0,0,0,37.0
763,29.0,9,23,6,13,5,5,6,3,0,...,0,0,0,0,1,0,0,0,0,29.0
764,28.0,11,21,5,13,2,3,4,4,1,...,0,0,0,0,1,0,0,0,0,29.0


### Existe un posible sobreajuste en el modelo, tiene un score de "1", analizar más a profundidad.-