In [None]:
[["a2","a4","b2","b4","c2","c4","g2","l2","l4"]]

## ITA 2021

<br>

Dicionário de Dados:

* n: número de agentes
* p: fração de traders
* f: grau de interesse dos traders
* x, y, z: dimensões do espaço aéreo
* a1, a2: média e desvio padrão do coeficiente do preço fundamental dos consumidores
* a3, a4: idem para os traders
* b1, b2: média e desvio padrão do coeficiente do preço de mercado dos consumidores
* b3, b4: idem para os traders
* c1, c2: média e desvio padrão do coeficiente do preço aleatório dos consumidores
* c3, c4: idem para os traders
* g1, g2: média e desvio padrão do grau de agressividade dos consumidores
* l1, l2: média e desvio padrão do coeficiente de desvalorização para os consumidores
* l3, l4: idem para os traders
* e1, e2: variabilidade no preço fundamental dos consumidores e traders, respectivamente
* cent_price_cor: correlação entre o preço final e centralidade das permissões de vôo
* cent_trans_cor: idem para o número de transações

In [1]:
# Importando Ferramentas Básicas
import pandas                  as pd
import matplotlib.pyplot       as plt
import numpy                   as np
import                            os
from   datetime            import datetime

In [4]:
# Importando Ferramentas de Limpeza
from sklearn.decomposition    import PCA
from sklearn.preprocessing    import StandardScaler

In [5]:
# Importando Ferramentas de Modelo
from sklearn.svm              import SVR
from xgboost                  import XGBRegressor
from sklearn.model_selection  import train_test_split
from sklearn.model_selection  import GridSearchCV, RandomizedSearchCV
from sklearn.metrics          import accuracy_score, mean_absolute_error
from sklearn.linear_model     import LinearRegression, LogisticRegression, Lasso

In [6]:
# Execucao do programa

# Importando os dados
train = pd.read_csv('./../Dados/train.csv')
test = pd.read_csv('./../Dados/test.csv')

dataframes = [train, test]

for df in dataframes:
    df['volume']  = df.x * df.y * df.z
    df['densidade'] = df.volume / df.n
    
X = train.drop(columns = ['cent_price_cor', 'cent_trans_cor'], axis = 1)

In [7]:
y_price = train.cent_price_cor
y_trans = train.cent_trans_cor

X_train, X_test, y_price_train, y_price_test = train_test_split(X,y_price,
                                                    test_size = 0.25,
                                                    random_state = 0)

X_train, X_test, y_trans_train, y_trans_test = train_test_split(X,y_trans,
                                                    test_size = 0.25,
                                                    random_state = 0)

In [26]:
scaler = StandardScaler()

transf_X = scaler.fit_transform(X)
transf_X_test = scaler.fit_transform(X_test)

In [27]:
# Coletanea de parametros para o GridSearch
params_grid = [

#Linear Regression
{'normalize': ['True', 'False'],
'fit_intercept': ['True', 'False']},
 
#SVR teste
# {'C': [0.1], 
#  'coef0': [0.01], 
#  'degree': [3], 
#  'epsilon': [0.1], 
#  'gamma': ['auto'], 
#  'kernel': ['rbf']}
    
 #SVR RBF
# {'kernel': ['rbf'],
# 'C':[0.1, 0.5, 1, 5, 10],
# 'degree': [3,8],
# 'coef0': [0.01,10,0.5],
#  'gamma': ('auto','scale'),
#  'epsilon': [0.1,0.2]}
    
 #SVR POLY
 #,{'kernel': ['poly'],
# 'C':[0.1, 0.5, 1, 5, 10],
 #'degree': [3,8],
 #'coef0': [0.01,10,0.5],
# 'gamma': ('auto','scale'),
 #'epsilon': [0.1,0.2]}
    
#Lasso
#,{'alpha':[0.02, 0.024, 0.025, 0.026, 0.03]}  
    
# #XGBoost
{'nthread':[4], #when use hyperthread, xgboost may become slower
'objective':['reg:linear'],
'learning_rate': [.03, 0.05, .07], #so called `eta` value
'max_depth': [5, 6, 7],
'min_child_weight': [4],
'silent': [1],
'subsample': [0.7],
'colsample_bytree': [0.7],
'n_estimators': [500]}
 ]

In [28]:
def prever(X_train, X_test, y_train, y_test, target_name, components = [20,21,22,23,24,25,26,27,28]):
    
    lista_scores = []
    lista_PCA = []
    lista_params = []
    lista_models = []
    
    models = [
        LinearRegression(),
        #SVR(),
        #SVR(),
        #Lasso()
        XGBRegressor()
        ]
     
    for n in components:
        
        pca = PCA(n_components = n)
        X_train_PCA = pca.fit_transform(X_train)
        X_test_PCA = pca.transform(X_test)
            
        for i, model in enumerate(models):

            print(f"\n\nModelo: {model}\nComponent: {n}\n\n" + str(X_train_PCA.shape) + str(X_test_PCA.shape))

            clf = GridSearchCV(model, param_grid = params_grid[i],
                               scoring = 'neg_mean_absolute_error', #destaque Ã  mÃ©trica pedida
                               n_jobs=-1, refit=True, cv=5, verbose=6,
                               pre_dispatch='2*n_jobs', error_score='raise', 
                               return_train_score=True)
            
            clf.fit(X_train_PCA, y_train)

            pred_cv = clf.predict(X_test_PCA)
            score_cv = mean_absolute_error(y_test, pred_cv)
            print(f"Melhores parametros: {clf.best_params_}")
            print(f"\nScore Grid: {score_cv}")
            
            lista_params.append(clf.best_params_)
            lista_models.append(model)
            lista_scores.append(round(score_cv,15))
            lista_PCA.append(n)

    print("Exportando DataFrame de Scores\n")

    df_scores = pd.DataFrame()
    
    df_scores.insert(loc=0, column='PCA', value= pd.Series(lista_PCA))
    df_scores.insert(loc=0, column='Scores', value= pd.Series(lista_scores))
    df_scores.insert(loc=0, column='Params', value= pd.Series(lista_params))
    df_scores.insert(loc=0, column='Model', value= pd.Series(lista_models))
    df_scores.to_csv(f"./../Resultados/{target_name}_scores_"+"{}.csv".format(datetime.now().strftime("%d-%m-%Y_%Hh%Mm%Ss")))
            
    return df_scores

In [22]:
X_medias = X.drop(["a2","a4","b2","b4","c2","c4","g2","l2","l4"], axis = 1)

X_test_medias = X_test.drop(["a2","a4","b2","b4","c2","c4","g2","l2","l4"], axis = 1)

df_scores_trans = prever(X_medias, X_test_medias, y_trans, y_trans_test, "trans",[10,11,12,13,14,15,16,17,18,19])

#df_scores_trans = prever(X, X_test, y_trans, y_trans_test, "trans")



Modelo: LinearRegression()
Component: 10

(11940, 10)(2985, 10)
Fitting 5 folds for each of 4 candidates, totalling 20 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 out of  20 | elapsed:    1.9s remaining:    5.8s
[Parallel(n_jobs=-1)]: Done   9 out of  20 | elapsed:    1.9s remaining:    2.3s
[Parallel(n_jobs=-1)]: Done  13 out of  20 | elapsed:    1.9s remaining:    1.0s
[Parallel(n_jobs=-1)]: Done  17 out of  20 | elapsed:    1.9s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    2.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.


Melhores parametros: {'fit_intercept': 'True', 'normalize': 'True'}

Score Grid: 0.09016819877206027


Modelo: XGBRegressor(base_score=None, booster=None, colsample_bylevel=None,
             colsample_bynode=None, colsample_bytree=None, gamma=None,
             gpu_id=None, importance_type='gain', interaction_constraints=None,
             learning_rate=None, max_delta_step=None, max_depth=None,
             min_child_weight=None, missing=nan, monotone_constraints=None,
             n_estimators=100, n_jobs=None, num_parallel_tree=None,
             random_state=None, reg_alpha=None, reg_lambda=None,
             scale_pos_weight=None, subsample=None, tree_method=None,
             validate_parameters=None, verbosity=None)
Component: 10

(11940, 10)(2985, 10)
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=-1)]: Done  30 out of  45 | elapsed:   31.9s remaining:   15.9s
[Parallel(n_jobs=-1)]: Done  38 out of  45 | elapsed:   39.3s remaining:    7.2s
[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed:   41.9s finished


Parameters: { "silent" } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Melhores parametros: {'colsample_bytree': 0.7, 'learning_rate': 0.03, 'max_depth': 5, 'min_child_weight': 4, 'n_estimators': 500, 'nthread': 4, 'objective': 'reg:linear', 'silent': 1, 'subsample': 0.7}

Score Grid: 0.07896500452000092


Modelo: LinearRegression()
Component: 11

(11940, 11)(2985, 11)
Fitting 5 folds for each of 4 candidates, totalling 20 fits
Melhores parametros: {'fit_intercept': 'True', 'normalize': 'True'}

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   9 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  13 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  17 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.




Score Grid: 0.09016640495073881


Modelo: XGBRegressor(base_score=None, booster=None, colsample_bylevel=None,
             colsample_bynode=None, colsample_bytree=None, gamma=None,
             gpu_id=None, importance_type='gain', interaction_constraints=None,
             learning_rate=None, max_delta_step=None, max_depth=None,
             min_child_weight=None, missing=nan, monotone_constraints=None,
             n_estimators=100, n_jobs=None, num_parallel_tree=None,
             random_state=None, reg_alpha=None, reg_lambda=None,
             scale_pos_weight=None, subsample=None, tree_method=None,
             validate_parameters=None, verbosity=None)
Component: 11

(11940, 11)(2985, 11)
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=-1)]: Done  30 out of  45 | elapsed:   31.8s remaining:   15.8s
[Parallel(n_jobs=-1)]: Done  38 out of  45 | elapsed:   38.8s remaining:    7.1s
[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed:   41.4s finished


Parameters: { "silent" } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Melhores parametros: {'colsample_bytree': 0.7, 'learning_rate': 0.03, 'max_depth': 5, 'min_child_weight': 4, 'n_estimators': 500, 'nthread': 4, 'objective': 'reg:linear', 'silent': 1, 'subsample': 0.7}

Score Grid: 0.07901644268115761


Modelo: LinearRegression()
Component: 12

(11940, 12)(2985, 12)
Fitting 5 folds for each of 4 candidates, totalling 20 fits
Melhores parametros: {'fit_intercept': 'True', 'normalize': 'True'}

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   9 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  13 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  17 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.




Score Grid: 0.09015308508174778


Modelo: XGBRegressor(base_score=None, booster=None, colsample_bylevel=None,
             colsample_bynode=None, colsample_bytree=None, gamma=None,
             gpu_id=None, importance_type='gain', interaction_constraints=None,
             learning_rate=None, max_delta_step=None, max_depth=None,
             min_child_weight=None, missing=nan, monotone_constraints=None,
             n_estimators=100, n_jobs=None, num_parallel_tree=None,
             random_state=None, reg_alpha=None, reg_lambda=None,
             scale_pos_weight=None, subsample=None, tree_method=None,
             validate_parameters=None, verbosity=None)
Component: 12

(11940, 12)(2985, 12)
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=-1)]: Done  30 out of  45 | elapsed:   34.8s remaining:   17.4s
[Parallel(n_jobs=-1)]: Done  38 out of  45 | elapsed:   44.7s remaining:    8.1s
[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed:   46.3s finished


Parameters: { "silent" } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Melhores parametros: {'colsample_bytree': 0.7, 'learning_rate': 0.03, 'max_depth': 5, 'min_child_weight': 4, 'n_estimators': 500, 'nthread': 4, 'objective': 'reg:linear', 'silent': 1, 'subsample': 0.7}

Score Grid: 0.07827996617373909


Modelo: LinearRegression()
Component: 13

(11940, 13)(2985, 13)
Fitting 5 folds for each of 4 candidates, totalling 20 fits
Melhores parametros: {'fit_intercept': 'True', 'normalize': 'True'}

Score Grid: 0.09015496830345414


Modelo: XGBRegressor(base_score=None, booster=None, colsample_bylevel=None,
             colsample_bynode=None, colsample_bytree=None, gamma=None,
             gpu_id=None, importance_type='gain', interaction_constraints=None,
             learning_rat

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   9 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  13 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  17 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  30 out of  45 | elapsed:   38.0s remaining:   18.9s
[Parallel(n_jobs=-1)]: Done  38 out of  45 | elapsed:   46.0s remaining:    8.4s
[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed:   49.9s finished


Parameters: { "silent" } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Melhores parametros: {'colsample_bytree': 0.7, 'learning_rate': 0.03, 'max_depth': 5, 'min_child_weight': 4, 'n_estimators': 500, 'nthread': 4, 'objective': 'reg:linear', 'silent': 1, 'subsample': 0.7}

Score Grid: 0.07803325629589745


Modelo: LinearRegression()
Component: 14

(11940, 14)(2985, 14)
Fitting 5 folds for each of 4 candidates, totalling 20 fits
Melhores parametros: {'fit_intercept': 'True', 'normalize': 'True'}

Score Grid: 0.09015764879883158


Modelo: XGBRegressor(base_score=None, booster=None, colsample_bylevel=None,
             colsample_bynode=None, colsample_bytree=None, gamma=None,
             gpu_id=None, importance_type='gain', interaction_constraints=None,
             learning_rat

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   9 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  13 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  17 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  30 out of  45 | elapsed:   38.0s remaining:   18.9s
[Parallel(n_jobs=-1)]: Done  38 out of  45 | elapsed:   46.5s remaining:    8.5s
[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed:   50.5s finished


Parameters: { "silent" } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Melhores parametros: {'colsample_bytree': 0.7, 'learning_rate': 0.03, 'max_depth': 5, 'min_child_weight': 4, 'n_estimators': 500, 'nthread': 4, 'objective': 'reg:linear', 'silent': 1, 'subsample': 0.7}

Score Grid: 0.07783726094463962


Modelo: LinearRegression()
Component: 15

(11940, 15)(2985, 15)
Fitting 5 folds for each of 4 candidates, totalling 20 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   9 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  13 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  17 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.0s finished


Melhores parametros: {'fit_intercept': 'True', 'normalize': 'True'}

Score Grid: 0.09014761032496964


Modelo: XGBRegressor(base_score=None, booster=None, colsample_bylevel=None,
             colsample_bynode=None, colsample_bytree=None, gamma=None,
             gpu_id=None, importance_type='gain', interaction_constraints=None,
             learning_rate=None, max_delta_step=None, max_depth=None,
             min_child_weight=None, missing=nan, monotone_constraints=None,
             n_estimators=100, n_jobs=None, num_parallel_tree=None,
             random_state=None, reg_alpha=None, reg_lambda=None,
             scale_pos_weight=None, subsample=None, tree_method=None,
             validate_parameters=None, verbosity=None)
Component: 15

(11940, 15)(2985, 15)
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  30 out of  45 | elapsed:   41.1s remaining:   20.5s
[Parallel(n_jobs=-1)]: Done  38 out of  45 | elapsed:   50.2s remaining:    9.2s
[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed:   53.8s finished


Parameters: { "silent" } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Melhores parametros: {'colsample_bytree': 0.7, 'learning_rate': 0.03, 'max_depth': 5, 'min_child_weight': 4, 'n_estimators': 500, 'nthread': 4, 'objective': 'reg:linear', 'silent': 1, 'subsample': 0.7}

Score Grid: 0.0776632755437688


Modelo: LinearRegression()
Component: 16

(11940, 16)(2985, 16)
Fitting 5 folds for each of 4 candidates, totalling 20 fits
Melhores parametros: {'fit_intercept': 'True', 'normalize': 'True'}

Score Grid: 0.09012950056941228


Modelo: XGBRegressor(base_score=None, booster=None, colsample_bylevel=None,
             colsample_bynode=None, colsample_bytree=None, gamma=None,
             gpu_id=None, importance_type='gain', interaction_constraints=None,
             learning_rate

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   9 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  13 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  17 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  30 out of  45 | elapsed:   44.6s remaining:   22.2s
[Parallel(n_jobs=-1)]: Done  38 out of  45 | elapsed:   54.2s remaining:    9.9s
[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed:   57.8s finished


Parameters: { "silent" } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Melhores parametros: {'colsample_bytree': 0.7, 'learning_rate': 0.03, 'max_depth': 5, 'min_child_weight': 4, 'n_estimators': 500, 'nthread': 4, 'objective': 'reg:linear', 'silent': 1, 'subsample': 0.7}

Score Grid: 0.07737602377480998


Modelo: LinearRegression()
Component: 17

(11940, 17)(2985, 17)
Fitting 5 folds for each of 4 candidates, totalling 20 fits
Melhores parametros: {'fit_intercept': 'True', 'normalize': 'True'}

Score Grid: 0.09017790276717755


Modelo: XGBRegressor(base_score=None, booster=None, colsample_bylevel=None,
             colsample_bynode=None, colsample_bytree=None, gamma=None,
             gpu_id=None, importance_type='gain', interaction_constraints=None,
             learning_rat

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   9 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  13 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  17 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  30 out of  45 | elapsed:   45.2s remaining:   22.5s
[Parallel(n_jobs=-1)]: Done  38 out of  45 | elapsed:   56.6s remaining:   10.3s
[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed:  1.0min finished


Parameters: { "silent" } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Melhores parametros: {'colsample_bytree': 0.7, 'learning_rate': 0.03, 'max_depth': 5, 'min_child_weight': 4, 'n_estimators': 500, 'nthread': 4, 'objective': 'reg:linear', 'silent': 1, 'subsample': 0.7}

Score Grid: 0.07722517608581875


Modelo: LinearRegression()
Component: 18

(11940, 18)(2985, 18)
Fitting 5 folds for each of 4 candidates, totalling 20 fits
Melhores parametros: {'fit_intercept': 'True', 'normalize': 'True'}

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   9 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  13 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  17 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.0s finished




Score Grid: 0.09017834678825122


Modelo: XGBRegressor(base_score=None, booster=None, colsample_bylevel=None,
             colsample_bynode=None, colsample_bytree=None, gamma=None,
             gpu_id=None, importance_type='gain', interaction_constraints=None,
             learning_rate=None, max_delta_step=None, max_depth=None,
             min_child_weight=None, missing=nan, monotone_constraints=None,
             n_estimators=100, n_jobs=None, num_parallel_tree=None,
             random_state=None, reg_alpha=None, reg_lambda=None,
             scale_pos_weight=None, subsample=None, tree_method=None,
             validate_parameters=None, verbosity=None)
Component: 18

(11940, 18)(2985, 18)
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  30 out of  45 | elapsed:   50.4s remaining:   25.2s
[Parallel(n_jobs=-1)]: Done  38 out of  45 | elapsed:  1.1min remaining:   11.5s
[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed:  1.1min finished


Parameters: { "silent" } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Melhores parametros: {'colsample_bytree': 0.7, 'learning_rate': 0.03, 'max_depth': 5, 'min_child_weight': 4, 'n_estimators': 500, 'nthread': 4, 'objective': 'reg:linear', 'silent': 1, 'subsample': 0.7}

Score Grid: 0.0769452017771339


Modelo: LinearRegression()
Component: 19

(11940, 19)(2985, 19)
Fitting 5 folds for each of 4 candidates, totalling 20 fits
Melhores parametros: {'fit_intercept': 'True', 'normalize': 'True'}

Score Grid: 0.09017040586954322


Modelo: XGBRegressor(base_score=None, booster=None, colsample_bylevel=None,
             colsample_bynode=None, colsample_bytree=None, gamma=None,
             gpu_id=None, importance_type='gain', interaction_constraints=None,
             learning_rate

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   9 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  13 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  17 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.



Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=-1)]: Done  30 out of  45 | elapsed:   50.2s remaining:   25.1s
[Parallel(n_jobs=-1)]: Done  38 out of  45 | elapsed:  1.0min remaining:   11.5s
[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed:  1.1min finished


Parameters: { "silent" } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Melhores parametros: {'colsample_bytree': 0.7, 'learning_rate': 0.03, 'max_depth': 5, 'min_child_weight': 4, 'n_estimators': 500, 'nthread': 4, 'objective': 'reg:linear', 'silent': 1, 'subsample': 0.7}

Score Grid: 0.0767880470459585
Exportando DataFrame de Scores



In [23]:
X_medias = X.drop(["a2","a4","b2","b4","c2","c4","g2","l2","l4"], axis = 1)

X_test_medias = X_test.drop(["a2","a4","b2","b4","c2","c4","g2","l2","l4"], axis = 1)

df_scores_price = prever(X_medias, X_test_medias, y_price, y_price_test, "price",[10,11,12,13,14,15,16,17,18,19])



Modelo: LinearRegression()
Component: 10

(11940, 10)(2985, 10)
Fitting 5 folds for each of 4 candidates, totalling 20 fits
Melhores parametros: {'fit_intercept': 'True', 'normalize': 'True'}

Score Grid: 0.09399396637407219


Modelo: XGBRegressor(base_score=None, booster=None, colsample_bylevel=None,
             colsample_bynode=None, colsample_bytree=None, gamma=None,
             gpu_id=None, importance_type='gain', interaction_constraints=None,
             learning_rate=None, max_delta_step=None, max_depth=None,
             min_child_weight=None, missing=nan, monotone_constraints=None,
             n_estimators=100, n_jobs=None, num_parallel_tree=None,
             random_state=None, reg_alpha=None, reg_lambda=None,
             scale_pos_weight=None, subsample=None, tree_method=None,
             validate_parameters=None, verbosity=None)
Component: 10

(11940, 10)(2985, 10)
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   9 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  13 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  17 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  30 out of  45 | elapsed:   31.4s remaining:   15.7s
[Parallel(n_jobs=-1)]: Done  38 out of  45 | elapsed:   40.0s remaining:    7.3s
[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed:   41.9s finished


Parameters: { "silent" } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Melhores parametros: {'colsample_bytree': 0.7, 'learning_rate': 0.03, 'max_depth': 5, 'min_child_weight': 4, 'n_estimators': 500, 'nthread': 4, 'objective': 'reg:linear', 'silent': 1, 'subsample': 0.7}

Score Grid: 0.08265644933404635


Modelo: LinearRegression()
Component: 11

(11940, 11)(2985, 11)
Fitting 5 folds for each of 4 candidates, totalling 20 fits
Melhores parametros: {'fit_intercept': 'True', 'normalize': 'True'}

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   9 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  13 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  17 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.




Score Grid: 0.09399441891328775


Modelo: XGBRegressor(base_score=None, booster=None, colsample_bylevel=None,
             colsample_bynode=None, colsample_bytree=None, gamma=None,
             gpu_id=None, importance_type='gain', interaction_constraints=None,
             learning_rate=None, max_delta_step=None, max_depth=None,
             min_child_weight=None, missing=nan, monotone_constraints=None,
             n_estimators=100, n_jobs=None, num_parallel_tree=None,
             random_state=None, reg_alpha=None, reg_lambda=None,
             scale_pos_weight=None, subsample=None, tree_method=None,
             validate_parameters=None, verbosity=None)
Component: 11

(11940, 11)(2985, 11)
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=-1)]: Done  30 out of  45 | elapsed:   31.0s remaining:   15.4s
[Parallel(n_jobs=-1)]: Done  38 out of  45 | elapsed:   40.0s remaining:    7.3s
[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed:   41.3s finished


Parameters: { "silent" } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Melhores parametros: {'colsample_bytree': 0.7, 'learning_rate': 0.03, 'max_depth': 5, 'min_child_weight': 4, 'n_estimators': 500, 'nthread': 4, 'objective': 'reg:linear', 'silent': 1, 'subsample': 0.7}

Score Grid: 0.08272216887537957


Modelo: LinearRegression()
Component: 12

(11940, 12)(2985, 12)
Fitting 5 folds for each of 4 candidates, totalling 20 fits
Melhores parametros: {'fit_intercept': 'True', 'normalize': 'True'}

Score Grid: 0.0940017435798522


Modelo: XGBRegressor(base_score=None, booster=None, colsample_bylevel=None,
             colsample_bynode=None, colsample_bytree=None, gamma=None,
             gpu_id=None, importance_type='gain', interaction_constraints=None,
             learning_rate

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   9 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  13 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  17 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  30 out of  45 | elapsed:   33.3s remaining:   16.6s
[Parallel(n_jobs=-1)]: Done  38 out of  45 | elapsed:   41.8s remaining:    7.6s
[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed:   44.1s finished


Parameters: { "silent" } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Melhores parametros: {'colsample_bytree': 0.7, 'learning_rate': 0.03, 'max_depth': 5, 'min_child_weight': 4, 'n_estimators': 500, 'nthread': 4, 'objective': 'reg:linear', 'silent': 1, 'subsample': 0.7}

Score Grid: 0.08206592565826834


Modelo: LinearRegression()
Component: 13

(11940, 13)(2985, 13)
Fitting 5 folds for each of 4 candidates, totalling 20 fits
Melhores parametros: {'fit_intercept': 'True', 'normalize': 'True'}

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   9 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  13 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  17 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.




Score Grid: 0.09401215967137432


Modelo: XGBRegressor(base_score=None, booster=None, colsample_bylevel=None,
             colsample_bynode=None, colsample_bytree=None, gamma=None,
             gpu_id=None, importance_type='gain', interaction_constraints=None,
             learning_rate=None, max_delta_step=None, max_depth=None,
             min_child_weight=None, missing=nan, monotone_constraints=None,
             n_estimators=100, n_jobs=None, num_parallel_tree=None,
             random_state=None, reg_alpha=None, reg_lambda=None,
             scale_pos_weight=None, subsample=None, tree_method=None,
             validate_parameters=None, verbosity=None)
Component: 13

(11940, 13)(2985, 13)
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=-1)]: Done  30 out of  45 | elapsed:   36.9s remaining:   18.4s
[Parallel(n_jobs=-1)]: Done  38 out of  45 | elapsed:   46.1s remaining:    8.4s
[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed:   48.8s finished


Parameters: { "silent" } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Melhores parametros: {'colsample_bytree': 0.7, 'learning_rate': 0.03, 'max_depth': 5, 'min_child_weight': 4, 'n_estimators': 500, 'nthread': 4, 'objective': 'reg:linear', 'silent': 1, 'subsample': 0.7}

Score Grid: 0.08196659550798599


Modelo: LinearRegression()
Component: 14

(11940, 14)(2985, 14)
Fitting 5 folds for each of 4 candidates, totalling 20 fits
Melhores parametros: {'fit_intercept': 'True', 'normalize': 'True'}

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   9 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  13 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  17 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.




Score Grid: 0.09401592839365935


Modelo: XGBRegressor(base_score=None, booster=None, colsample_bylevel=None,
             colsample_bynode=None, colsample_bytree=None, gamma=None,
             gpu_id=None, importance_type='gain', interaction_constraints=None,
             learning_rate=None, max_delta_step=None, max_depth=None,
             min_child_weight=None, missing=nan, monotone_constraints=None,
             n_estimators=100, n_jobs=None, num_parallel_tree=None,
             random_state=None, reg_alpha=None, reg_lambda=None,
             scale_pos_weight=None, subsample=None, tree_method=None,
             validate_parameters=None, verbosity=None)
Component: 14

(11940, 14)(2985, 14)
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=-1)]: Done  30 out of  45 | elapsed:   36.6s remaining:   18.3s
[Parallel(n_jobs=-1)]: Done  38 out of  45 | elapsed:   45.6s remaining:    8.3s
[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed:   49.3s finished


Parameters: { "silent" } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Melhores parametros: {'colsample_bytree': 0.7, 'learning_rate': 0.03, 'max_depth': 5, 'min_child_weight': 4, 'n_estimators': 500, 'nthread': 4, 'objective': 'reg:linear', 'silent': 1, 'subsample': 0.7}

Score Grid: 0.08178999133699104


Modelo: LinearRegression()
Component: 15

(11940, 15)(2985, 15)
Fitting 5 folds for each of 4 candidates, totalling 20 fits
Melhores parametros: {'fit_intercept': 'True', 'normalize': 'True'}

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   9 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  13 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  17 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.




Score Grid: 0.09401127750415346


Modelo: XGBRegressor(base_score=None, booster=None, colsample_bylevel=None,
             colsample_bynode=None, colsample_bytree=None, gamma=None,
             gpu_id=None, importance_type='gain', interaction_constraints=None,
             learning_rate=None, max_delta_step=None, max_depth=None,
             min_child_weight=None, missing=nan, monotone_constraints=None,
             n_estimators=100, n_jobs=None, num_parallel_tree=None,
             random_state=None, reg_alpha=None, reg_lambda=None,
             scale_pos_weight=None, subsample=None, tree_method=None,
             validate_parameters=None, verbosity=None)
Component: 15

(11940, 15)(2985, 15)
Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=-1)]: Done  30 out of  45 | elapsed:   40.4s remaining:   20.2s
[Parallel(n_jobs=-1)]: Done  38 out of  45 | elapsed:   50.6s remaining:    9.2s
[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed:   53.6s finished


Parameters: { "silent" } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Melhores parametros: {'colsample_bytree': 0.7, 'learning_rate': 0.03, 'max_depth': 5, 'min_child_weight': 4, 'n_estimators': 500, 'nthread': 4, 'objective': 'reg:linear', 'silent': 1, 'subsample': 0.7}

Score Grid: 0.08137313553038952


Modelo: LinearRegression()
Component: 16

(11940, 16)(2985, 16)
Fitting 5 folds for each of 4 candidates, totalling 20 fits
Melhores parametros: {'fit_intercept': 'True', 'normalize': 'True'}

Score Grid: 0.09401436144093704


Modelo: XGBRegressor(base_score=None, booster=None, colsample_bylevel=None,
             colsample_bynode=None, colsample_bytree=None, gamma=None,
             gpu_id=None, importance_type='gain', interaction_constraints=None,
             learning_rat

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   9 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  13 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  17 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  30 out of  45 | elapsed:   43.6s remaining:   21.8s
[Parallel(n_jobs=-1)]: Done  38 out of  45 | elapsed:   56.6s remaining:   10.3s
[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed:   58.1s finished


Parameters: { "silent" } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Melhores parametros: {'colsample_bytree': 0.7, 'learning_rate': 0.03, 'max_depth': 5, 'min_child_weight': 4, 'n_estimators': 500, 'nthread': 4, 'objective': 'reg:linear', 'silent': 1, 'subsample': 0.7}

Score Grid: 0.08095656368173347


Modelo: LinearRegression()
Component: 17

(11940, 17)(2985, 17)
Fitting 5 folds for each of 4 candidates, totalling 20 fits
Melhores parametros: {'fit_intercept': 'True', 'normalize': 'True'}

Score Grid: 0.09397767145831261


Modelo: XGBRegressor(base_score=None, booster=None, colsample_bylevel=None,
             colsample_bynode=None, colsample_bytree=None, gamma=None,
             gpu_id=None, importance_type='gain', interaction_constraints=None,
             learning_rat

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   9 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  13 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  17 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  30 out of  45 | elapsed:   45.8s remaining:   22.9s
[Parallel(n_jobs=-1)]: Done  38 out of  45 | elapsed:   53.4s remaining:    9.8s
[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed:   58.9s finished


Parameters: { "silent" } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Melhores parametros: {'colsample_bytree': 0.7, 'learning_rate': 0.03, 'max_depth': 5, 'min_child_weight': 4, 'n_estimators': 500, 'nthread': 4, 'objective': 'reg:linear', 'silent': 1, 'subsample': 0.7}

Score Grid: 0.08102677872803943


Modelo: LinearRegression()
Component: 18

(11940, 18)(2985, 18)
Fitting 5 folds for each of 4 candidates, totalling 20 fits
Melhores parametros: {'fit_intercept': 'True', 'normalize': 'True'}

Score Grid: 0.09399120889881327


Modelo: XGBRegressor(base_score=None, booster=None, colsample_bylevel=None,
             colsample_bynode=None, colsample_bytree=None, gamma=None,
             gpu_id=None, importance_type='gain', interaction_constraints=None,
             learning_rat

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   9 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  13 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  17 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  30 out of  45 | elapsed:   49.9s remaining:   24.9s
[Parallel(n_jobs=-1)]: Done  38 out of  45 | elapsed:  1.1min remaining:   11.8s
[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed:  1.1min finished


Parameters: { "silent" } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Melhores parametros: {'colsample_bytree': 0.7, 'learning_rate': 0.03, 'max_depth': 5, 'min_child_weight': 4, 'n_estimators': 500, 'nthread': 4, 'objective': 'reg:linear', 'silent': 1, 'subsample': 0.7}

Score Grid: 0.08073055786810927


Modelo: LinearRegression()
Component: 19

(11940, 19)(2985, 19)
Fitting 5 folds for each of 4 candidates, totalling 20 fits
Melhores parametros: {'fit_intercept': 'True', 'normalize': 'True'}

Score Grid: 0.0939895182171829


Modelo: XGBRegressor(base_score=None, booster=None, colsample_bylevel=None,
             colsample_bynode=None, colsample_bytree=None, gamma=None,
             gpu_id=None, importance_type='gain', interaction_constraints=None,
             learning_rate

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   9 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  13 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  17 out of  20 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  30 out of  45 | elapsed:   49.6s remaining:   24.7s
[Parallel(n_jobs=-1)]: Done  38 out of  45 | elapsed:  1.1min remaining:   11.7s
[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed:  1.1min finished


Parameters: { "silent" } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Melhores parametros: {'colsample_bytree': 0.7, 'learning_rate': 0.03, 'max_depth': 5, 'min_child_weight': 4, 'n_estimators': 500, 'nthread': 4, 'objective': 'reg:linear', 'silent': 1, 'subsample': 0.7}

Score Grid: 0.08049927850109048
Exportando DataFrame de Scores



In [14]:
df_scores_price = prever(X, X_test, y_price, y_price_test, "price")



Modelo: SVR()
Component: 20

(11940, 20)(2985, 20)
Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done   5 out of   5 | elapsed:   32.5s finished


Melhores parametros: {'C': 0.1, 'coef0': 0.01, 'degree': 3, 'epsilon': 0.1, 'gamma': 'auto', 'kernel': 'rbf'}

Score Grid: 0.07340112996089755


Modelo: SVR()
Component: 21

(11940, 21)(2985, 21)
Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done   5 out of   5 | elapsed:   33.0s finished


Melhores parametros: {'C': 0.1, 'coef0': 0.01, 'degree': 3, 'epsilon': 0.1, 'gamma': 'auto', 'kernel': 'rbf'}

Score Grid: 0.07340545735792878


Modelo: SVR()
Component: 22

(11940, 22)(2985, 22)
Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done   5 out of   5 | elapsed:   36.9s finished


Melhores parametros: {'C': 0.1, 'coef0': 0.01, 'degree': 3, 'epsilon': 0.1, 'gamma': 'auto', 'kernel': 'rbf'}

Score Grid: 0.07340937641843025


Modelo: SVR()
Component: 23

(11940, 23)(2985, 23)
Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done   5 out of   5 | elapsed:   36.2s finished


Melhores parametros: {'C': 0.1, 'coef0': 0.01, 'degree': 3, 'epsilon': 0.1, 'gamma': 'auto', 'kernel': 'rbf'}

Score Grid: 0.0734123720169559


Modelo: SVR()
Component: 24

(11940, 24)(2985, 24)
Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done   5 out of   5 | elapsed:   34.8s finished


Melhores parametros: {'C': 0.1, 'coef0': 0.01, 'degree': 3, 'epsilon': 0.1, 'gamma': 'auto', 'kernel': 'rbf'}

Score Grid: 0.07341799510140862


Modelo: SVR()
Component: 25

(11940, 25)(2985, 25)
Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done   5 out of   5 | elapsed:   34.3s finished


Melhores parametros: {'C': 0.1, 'coef0': 0.01, 'degree': 3, 'epsilon': 0.1, 'gamma': 'auto', 'kernel': 'rbf'}

Score Grid: 0.07342286630702807


Modelo: SVR()
Component: 26

(11940, 26)(2985, 26)
Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done   5 out of   5 | elapsed:   34.0s finished


Melhores parametros: {'C': 0.1, 'coef0': 0.01, 'degree': 3, 'epsilon': 0.1, 'gamma': 'auto', 'kernel': 'rbf'}

Score Grid: 0.0734280944597349


Modelo: SVR()
Component: 27

(11940, 27)(2985, 27)
Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done   5 out of   5 | elapsed:   35.8s finished


Melhores parametros: {'C': 0.1, 'coef0': 0.01, 'degree': 3, 'epsilon': 0.1, 'gamma': 'auto', 'kernel': 'rbf'}

Score Grid: 0.0734339078476797


Modelo: SVR()
Component: 28

(11940, 28)(2985, 28)
Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done   5 out of   5 | elapsed:   35.1s finished


Melhores parametros: {'C': 0.1, 'coef0': 0.01, 'degree': 3, 'epsilon': 0.1, 'gamma': 'auto', 'kernel': 'rbf'}

Score Grid: 0.0734392225418337
Exportando DataFrame de Scores



In [16]:
df_scores_price.Scores.min()

0.073401129960898

In [18]:
df_scores_price[df_scores_price.Scores == 0.073401129960898]

Unnamed: 0,Model,Params,Scores,PCA
0,SVR(),"{'C': 0.1, 'coef0': 0.01, 'degree': 3, 'epsilo...",0.073401,20


In [38]:
df_scores_trans.Scores.min()

0.071282454469712

In [16]:
df_scores_trans[df_scores_trans.Scores == 0.089959673362208]

Unnamed: 0,Model,Params,Scores,PCA
16,LinearRegression(),"{'fit_intercept': 'True', 'normalize': 'True'}",0.08996,28


In [17]:
# Resultado sem scaling

0.089947941686274 + 0.093949935453886
0.089959673362208 + 0.093937671406177

0.18389734476838498

## Melhores resultados:

Trans = 0.089947941686274 (com scaling) <br><br>
``LinearRegression()	{'fit_intercept': 'True', 'normalize': 'True'}	PCA = 25``

<br><br><br>
Price = 0.093937671406177 (sem scaling) <br><br>
``LinearRegression()	{'fit_intercept': 'True', 'normalize': 'True'}  PCA = 26``


In [46]:
0.089947941686274  + 0.093937671406177

0.183885613092451

In [17]:
0.071282454469712 + 0.073401129960898

0.14468358443061

In [None]:
# PRICE: PCA 19 - XGRegressor {'colsample_bytree': 0.7, 'learning_rate': 0.03, 'max_depth': 5, 'min_child_weight': 4, 'n_estimators': 500, 'nthread': 4, 'objective': 'reg:linear', 'silent': 1, 'subsample': 0.7}

In [33]:
def geral_resultados_submissao(test_price_pca, test_trans_pca, clf_price, clf_trans):

    # pca_price = PCA(n_components = n_price)
    # pca_trans = PCA(n_components = n_price)

    # test_price_PCA = pca_price.fit_transform(test.drop("id", axis=1))
    # test_trans_PCA = pca_trans.fit_transform(test.drop("id", axis=1))
    
    cent_price_cor = clf_price.predict(test_price_PCA)
    cent_trans_cor = clf_trans.predict(test_trans_PCA)


    df_sub = pd.DataFrame({"cent_price_cor": cent_price_cor, "cent_trans_cor": cent_trans_cor})
    
    df_sub.to_csv("./../Submissoes/df_sub_SVR_{}.csv".format(datetime.now().strftime("%d-%m-%Y_%Hh%Mm%Ss")), index=False)

    return df_sub

In [34]:
pca = PCA(n_components = 19)

train_PCA = pca.fit_transform(X)

clf_price = XGBRegressor(colsample_bytree = 0.7, 
                         learning_rate = 0.03, 
                         max_depth = 5, 
                          min_child_weight= 4, 
                          n_estimators= 500, 
                          nthread= 4, 
                          objective= 'reg:linear', 
                          silent= 1, 
                          subsample= 0.7)

clf_price.fit(train_PCA, y_price)

clf_trans = LinearRegression()

clf_trans.fit(train_PCA, y_trans)

test_price_PCA = pca.fit_transform(test.drop("id", axis=1))

test_trans_PCA = pca.fit_transform(test.drop("id", axis=1))


df_sub = geral_resultados_submissao(test_price_PCA, test_trans_PCA, clf_price, clf_trans)

print(df_sub)

print("\nPrograma executado com sucesso \n")

Parameters: { "silent" } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


      cent_price_cor  cent_trans_cor
0          -0.205326        0.371357
1          -0.241557        0.360156
2          -0.183218        0.357172
3          -0.226850        0.367688
4          -0.199653        0.362715
...              ...             ...
8054       -0.205482        0.362250
8055       -0.203140        0.363461
8056       -0.186810        0.361155
8057       -0.201581        0.364197
8058       -0.184401        0.366141

[8059 rows x 2 columns]

Programa executado com sucesso 





In [36]:
sub_06_05 = pd.read_csv("./../Submissoes/df_sub_06-05-2021_15h26m49s.csv") 

In [37]:
sub_06_05.head()

Unnamed: 0,cent_price_cor,cent_trans_cor
0,-0.195485,0.363419
1,-0.195772,0.369798
2,-0.210286,0.368865
3,-0.202342,0.361955
4,-0.208677,0.364952


In [39]:
sub_nova = pd.DataFrame()

sub_nova["cent_price_cor"] = df_sub["cent_price_cor"]

In [41]:
sub_nova["cent_trans_cor"] = sub_06_05["cent_trans_cor"]

In [42]:
sub_nova

Unnamed: 0,cent_price_cor,cent_trans_cor
0,-0.205326,0.363419
1,-0.241557,0.369798
2,-0.183218,0.368865
3,-0.226850,0.361955
4,-0.199653,0.364952
...,...,...
8054,-0.205482,0.360974
8055,-0.203140,0.362880
8056,-0.186810,0.373963
8057,-0.201581,0.356346


In [48]:
sub_nova.to_csv("sub_nova.csv", index=False)

In [23]:
pca_price = PCA(n_components = 26)
pca_trans = PCA(n_components = 25)

X_trans = scaler.fit_transform(X)

train_price_PCA = pca_price.fit_transform(X)
train_trans_PCA = pca_trans.fit_transform(X_trans)

clf_price = LinearRegression({'fit_intercept': 'True', 'normalize': 'True'})
clf_price.fit(train_price_PCA, y_price)

clf_trans = LinearRegression({'fit_intercept': 'True', 'normalize': 'True'})
clf_trans.fit(train_trans_PCA, y_trans)

test_price_PCA = pca_price.fit_transform(test.drop("id", axis=1))

test_trans = scaler.fit_transform(test.drop("id", axis=1))
test_trans_PCA = pca_trans.fit_transform(test)


# PRICE ==> PCA = 26 // Linear Reg

# TRANS ==> PCA = 25 // Linear Reg - com scaling

df_sub = geral_resultados_submissao(test_price_PCA, test_trans_PCA, clf_price, clf_trans)

print(df_sub)

print("\nPrograma executado com sucesso \n")

NameError: name 'scaler' is not defined

In [36]:
df_scores_price_scaling.Scores.min()

0.093949935453886

In [13]:
df_scores_trans_scaling.Scores.min()

0.071282454469712

In [38]:
0.089947941686274 + 0.093949935453886

0.18389787714015998

In [44]:
df_scores_price_scaling[df_scores_price_scaling.Scores == 0.093949935453886]

Unnamed: 0,Model,Params,Scores,PCA
2,LinearRegression(),"{'fit_intercept': 'True', 'normalize': 'True'}",0.09395,21


In [15]:
df_scores_trans_scaling[df_scores_trans_scaling.Scores == 0.071282454469712]

Unnamed: 0,Model,Params,Scores,PCA
0,SVR(),"{'C': 0.1, 'coef0': 0.01, 'degree': 3, 'epsilo...",0.071282,20


In [60]:
test.head()

Unnamed: 0,id,n,p,f,x,y,z,a1,a2,a3,...,g1,g2,l1,l2,l3,l4,e1,e2,volume,densidade
0,1,558,0.5,0.2,28,21,2,1.0,0.29,1.3,...,0.198,1.8,0.149,2.0,0.078,1.7,0.5,2.9,1176,2.107527
1,2,910,0.9,0.3,28,19,9,2.2,0.14,1.6,...,0.172,1.0,0.086,0.8,0.03,0.8,0.2,1.6,4788,5.261538
2,3,213,0.8,0.5,21,15,8,2.2,0.22,2.6,...,0.083,0.7,0.019,1.4,0.016,1.2,2.6,2.4,2520,11.830986
3,4,654,0.7,0.5,14,15,2,2.2,0.12,3.8,...,0.185,1.3,0.11,0.8,0.006,1.7,0.2,1.7,420,0.642202
4,5,672,0.7,0.5,24,10,5,3.7,0.2,3.7,...,0.158,0.9,0.148,1.9,0.038,1.3,1.1,2.8,1200,1.785714
