In [16]:
from basepc_mal import *

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import mean_absolute_error
from sklearn.compose import ColumnTransformer

import xgboost

In [17]:
df_train = pd.read_csv('./data/train_limpio_pipeline.csv')

In [18]:
train_set, validation_set = train_test_split(df_train, test_size= 0.2, random_state= 42)

x_train = train_set.drop(['Price_euros', 'id'], axis= 1)
y_train = train_set['Price_euros']

x_validation = validation_set.drop(['Price_euros', 'id'], axis= 1)
y_validation = validation_set['Price_euros']

y_train = np.log10(y_train)

In [19]:
categoricas = ['Company', 'Product', 'TypeName', 'ScreenResolution', 'Memory', 'Gpu', 'OpSys']
numericas = ['Inches', 'Cpu', 'Ram', 'Weight']

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown= 'ignore'), categoricas),
        ('num', StandardScaler(), numericas)
        ])

In [20]:
model_xgboost = xgboost.XGBRegressor()

In [21]:
pipeline = Pipeline(steps=[
('preprocessor', preprocessor),
('model', model_xgboost)
])

In [22]:
param_grid = {
    'model__n_estimators': [i for i in range(20, 61, 10)],  
    'model__max_depth': [10, 15, 20],  
    'model__learning_rate': [i for i in np.arange(0.01, 0.31, 0.05)],
    'model__reg_alpha': [i for i in np.arange(0.1, 1, 0.1)],  
    'model__min_samples_leaf': [4, 6, 8],  
}

In [23]:
grid_search = GridSearchCV(pipeline, param_grid, cv=5, n_jobs= -1, scoring= 'neg_mean_absolute_error')
grid_search.fit(x_train, y_train)

Parameters: { "min_samples_leaf" } are not used.



In [24]:
best_model = grid_search.best_estimator_

In [25]:
y_pred = best_model.predict(x_validation)
y_pred = np.power(10, y_pred)

mean_absolute_error(y_validation, y_pred)

170.9694953559917

In [26]:
y_pred

array([1899.9778 , 1101.2534 ,  781.1772 ,  805.3022 , 1642.9869 ,
        453.2106 ,  410.8594 ,  218.16756,  425.76102,  863.1835 ,
        294.57043, 1580.0961 ,  638.18616, 1550.9849 ,  377.26648,
       1385.5929 ,  447.93286, 1502.8616 ,  223.0761 ,  867.1127 ,
        496.18094, 1052.0435 , 1314.5548 ,  440.55927, 1011.0645 ,
        546.122  , 1360.083  , 1339.8859 , 1242.1919 , 1776.6274 ,
       1030.6266 , 2604.531  ,  298.00668,  891.3827 ,  270.6123 ,
        388.98248,  248.73775, 2842.58   , 1221.5901 , 3023.208  ,
       1109.8304 ,  821.4088 , 2466.0012 ,  353.03418,  472.95877,
        298.0777 , 1548.663  ,  477.43558,  864.0008 , 1247.458  ,
       1558.7191 , 1861.0068 ,  803.6403 , 1254.9333 ,  829.8793 ,
        811.265  ,  373.01788,  499.60965,  913.5653 , 1731.4996 ,
       1753.3557 ,  541.34753,  857.5953 , 1019.7739 , 1835.6772 ,
       1214.7148 , 1600.0024 ,  437.91238, 1058.0997 ,  512.3577 ,
        285.83426,  729.5231 , 1313.149  , 1091.8026 ,  304.36