In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import ElasticNet
from sklearn.metrics import mean_squared_error
import pickle

In [5]:
df = pd.read_csv('data_ml.csv')

Primero separamos la variable predecir.

In [6]:
X = df.drop('price', axis=1)
y = df['price']

Instanciamos el modelo

In [7]:
model = ElasticNet()

Creamos la grilla de parámetros.

In [8]:
param_grid = {"alpha" : [0.9, 1.0],
             "l1_ratio" : [0.4, 0.5, 0.6,],
              "max_iter" : [800, 1000]}

Instanciamos el método GridSearchCV.

In [9]:
grid_search = GridSearchCV(model, param_grid, scoring='neg_mean_squared_error', cv=5)

Fiteamos el modelo.

In [10]:
grid_search.fit(X, y)

In [11]:
best_params = grid_search.best_params_
best_params

{'alpha': 0.9, 'l1_ratio': 0.4, 'max_iter': 800}

In [12]:
best_mse = grid_search.best_score_
rmse = np.sqrt(-best_mse)

In [13]:
rmse

10.113098070567762

Entrenamos el modelo con los mejores hiperparametros.

In [15]:
modelo_elastic = ElasticNet(alpha=0.9, l1_ratio=0.4, max_iter=800)

In [16]:
modelo_elastic.fit(X, y)

Guardamos el modelo como archivo pickle.

In [14]:
with open('modelo_elastic.pkl', 'wb') as model:
    pickle.dump(elastic, model)

Vamos a preparar un df con algunos valores seteados en 0, para luego pasarle los datos para hacer la predicción.

In [20]:
x_prediccion = X.head(1)
lista_columnas = X.columns.to_list()
x_prediccion

Unnamed: 0,early_access,sentiment,publisher_cat,developer_cat,year,month_2,month_3,month_4,month_5,month_6,...,Warhammer 40K,Web Publishing,Werewolves,Western,Word Game,World War I,World War II,Wrestling,Zombies,e-sports
0,False,0.0,1,1,2018,False,False,False,False,False,...,0,0,0,0,0,0,0,0,0,0


In [21]:
lista_columnas[:17]

['early_access',
 'sentiment',
 'publisher_cat',
 'developer_cat',
 'year',
 'month_2',
 'month_3',
 'month_4',
 'month_5',
 'month_6',
 'month_7',
 'month_8',
 'month_9',
 'month_10',
 'month_11',
 'month_12',
 "1990's"]

In [22]:
lista_features = lista_columnas[16:]
lista_features[0]

"1990's"

In [23]:
x_prediccion.loc[:,lista_features] = 0
x_prediccion

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


Unnamed: 0,early_access,sentiment,publisher_cat,developer_cat,year,month_2,month_3,month_4,month_5,month_6,...,Warhammer 40K,Web Publishing,Werewolves,Western,Word Game,World War I,World War II,Wrestling,Zombies,e-sports
0,False,0.0,1,1,2018,False,False,False,False,False,...,0,0,0,0,0,0,0,0,0,0


In [24]:
x_prediccion.columns = [column.lower() for column in x_prediccion.columns]
x_prediccion

Unnamed: 0,early_access,sentiment,publisher_cat,developer_cat,year,month_2,month_3,month_4,month_5,month_6,...,warhammer 40k,web publishing,werewolves,western,word game,world war i,world war ii,wrestling,zombies,e-sports
0,False,0.0,1,1,2018,False,False,False,False,False,...,0,0,0,0,0,0,0,0,0,0


In [25]:
with open('x_prediccion.pkl', 'wb') as x_pred:
    pickle.dump(x_prediccion, x_pred)