In [2]:
import time
import os


import numpy as np
import pandas as pd

import datetime as dt
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.model_selection import GridSearchCV,train_test_split



from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.externals import joblib

from sklearn.linear_model import LinearRegression
from sklearn import svm
from sklearn.ensemble import RandomForestRegressor

from sklearn.ensemble import GradientBoostingRegressor

from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor



In [3]:

models = {
            'linear' : {
                    'mod' : LinearRegression(),
                    'par' : {}
                    },  
    
            'gradient' : {
                    'mod' : GradientBoostingRegressor(warm_start = True),
                    'par' : {'loss' : ('ls', 'quantile'),
                             'max_depth' : [3, 4, 5, 6, 7]}
                        },
           'tree':{'mod': DecisionTreeRegressor(),
                     'par':{'splitter':('best','random'),
                            'max_depth': [None, 2,4,6],
                            'min_samples_leaf':[1,5,8]}},
            
        'RandomForest' : {
                    'mod' : RandomForestRegressor(n_estimators=200, random_state=39, max_depth=4, criterion = 'mse'),
                    'par' : {'max_depth' :[None, 2,4,6],
                            'min_samples_leaf':[1,5,8]}
                        },
        'Knn' : {
                    'mod' : KNeighborsRegressor(),
                    'par' : {'n_neighbors' :[5, 10, 15],
                            'leaf_size':[15,25,30]}
                        }, 
    
        }


In [4]:
def grid(x_name,n_proc, os_X_tt, os_Y_tt, X_test, y_test,  models, score = r2_score, cv = 7):
    
    # Gridsearch
    
    result = dict()
    bestmodels = models.copy()
    for name in models:
        print('*'*80)
        print("Model: " + name)
        t_beg = time.time()

        pipeline = Pipeline([('scaler', StandardScaler()), (name,  bestmodels[name]['mod'])])          
        parameters = {}          
        for par in bestmodels[name]['par']:
            aux = name + '__' +  par
            parameters[aux] = bestmodels[name]['par'][par]    
        
        aux = GridSearchCV(pipeline, parameters, n_jobs = n_proc,\
                          scoring = score, verbose=2, cv = cv)
        
        aux.fit(os_X_tt, os_Y_tt)
        y_true, y_pred = y_test , aux.predict(X_test)
        

        mse = mean_squared_error(y_test,y_pred)
        r2 = r2_score(y_test, y_pred, multioutput='uniform_average')
        
        bestmodels[name]['bestModel'] = aux.best_estimator_
        bestmodels[name][score] = aux.best_score_
        bestmodels[name]['cols_order'] = os_X_tt.columns.values
        selection_time = time.time() - t_beg

        bestmodels[name]['selection_time'] = selection_time

        sample_f_path = f'modelos/{x_name}' + f'{name}_{dt.datetime.now().strftime("%Y%m%d-%H%M")}.sav'

        print(f"Saving model at {sample_f_path}")    
        joblib.dump(bestmodels[name]['bestModel'], sample_f_path)

        print(f"El tiempo de seleccion fue: {selection_time:0.3f} s")
        print(f"El error {score} de la familia {name} es: {bestmodels[name][score]:0.3f}")
        print('*'*80)
    
    
        result[name] = {"mse": mse, "r2": r2}
        
    mod_name = None
    best_mae = -np.inf
    for name in models:
        if bestmodels[name][score] > best_mae:
            mod_name = name
            best_mae = bestmodels[name][score]

    print(f"best model: " + mod_name + f" with an error {score} of: " + str(best_mae))
    
    return bestmodels, result

In [5]:
#Bestmodels

def get_max(dictionary, key_val):
    auc_list = []
    auc_dict = {}

    for key in dictionary:
        for key2 in dictionary[key]:
            if key_val in key2:
                auc_list.append(dictionary[key][key_val])

    max_key = ''
    max_val = max(auc_list)

    for key in dictionary:
        for key2 in dictionary[key]:
            if max_val == dictionary[key][key_val]:
                max_key = key
                
    return max_key, max_val

In [6]:
#Bestmodels

def get_min(dictionary, key_val):
    auc_list = []
    auc_dict = {}

    for key in dictionary:
        for key2 in dictionary[key]:
            if key_val in key2:
                auc_list.append(dictionary[key][key_val])

    min_key = ''
    min_val = min(auc_list)

    for key in dictionary:
        for key2 in dictionary[key]:
            if min_val == dictionary[key][key_val]:
                min_key = key
                
    return min_key, min_val

# 1. Lectura de los datos

In [7]:
path = 'datanew.csv'
data = pd.read_csv(path, sep = ',', na_filter = False)
#data = data.set_index('id')

In [8]:
# variables seleccionadas según diferentes criterios
y = data[['y']].copy()
X = data.drop(columns = ['y']).copy()

In [9]:
# Definición del tamaño del test
test_size = 0.3

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)

In [11]:

# variables base original
X0_train= X_train
X0_test= X_test

# variables con probabilidad de selección mayor al 50%
X1_train= X_train[['X1','X2','X4','X5','X8','X9','X11','X12','X13','X14','X16','X17','X18','X19','X24','X30','X32','X33','X35','X39','X44','X48']].copy()
X1_test = X_test[['X1','X2','X4','X5','X8','X9','X11','X12','X13','X14','X16','X17','X18','X19','X24','X30','X32','X33','X35','X39','X44','X48']].copy()

# variables con probabilidad de selección mayor al 60%
X2_train= X_train[['X1','X2','X4','X8','X11','X13','X14','X16','X17','X18','X19','X24','X33','X35','X39','X44']].copy()
X2_test= X_test[['X1','X2','X4','X8','X11','X13','X14','X16','X17','X18','X19','X24','X33','X35','X39','X44']].copy()

# variables con probabilidad de selección mayor al 70%
X3_train= X_train[['X1','X4','X11','X13','X16','X18','X19','X39']].copy()
X3_test= X_test[['X1','X4','X11','X13','X16','X18','X19','X39']].copy()

# variables con probabilidad de selección mayor al 90%
X4_train= X_train[['X11','X18','X39']].copy()
X4_test= X_test[['X11','X18','X39']].copy()



In [12]:
Bestmodels_X0, result_X0  = grid('X1', -1, X0_train, y_train.values, X0_test, y_test, models, score = 'r2', cv = 5)

********************************************************************************
Model: linear
Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    0.9s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.


Saving model at modelos/X1linear_20191012-1145.sav
El tiempo de seleccion fue: 0.957 s
El error r2 de la familia linear es: -0.554
********************************************************************************
********************************************************************************
Model: gradient
Fitting 5 folds for each of 10 candidates, totalling 50 fits


[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done  19 out of  50 | elapsed:    0.8s remaining:    1.3s
[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:    1.5s finished
  y = column_or_1d(y, warn=True)
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 out of 120 | elapsed:    0.1s remaining:    0.3s


Saving model at modelos/X1gradient_20191012-1146.sav
El tiempo de seleccion fue: 1.563 s
El error r2 de la familia gradient es: -0.065
********************************************************************************
********************************************************************************
Model: tree
Fitting 5 folds for each of 24 candidates, totalling 120 fits
Saving model at modelos/X1tree_20191012-1146.sav
El tiempo de seleccion fue: 0.156 s
El error r2 de la familia tree es: -0.026
********************************************************************************
********************************************************************************
Model: RandomForest
Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=-1)]: Done 120 out of 120 | elapsed:    0.2s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed:    1.4s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed:    1.4s finished
  self._final_estimator.fit(Xt, y, **fit_params)


Saving model at modelos/X1RandomForest_20191012-1146.sav
El tiempo de seleccion fue: 1.577 s
El error r2 de la familia RandomForest es: 0.071
********************************************************************************
********************************************************************************
Model: Knn
Fitting 5 folds for each of 9 candidates, totalling 45 fits
Saving model at modelos/X1Knn_20191012-1146.sav
El tiempo de seleccion fue: 0.171 s
El error r2 de la familia Knn es: -0.011
********************************************************************************
best model: RandomForest with an error r2 of: 0.07058656637533871


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  14 out of  45 | elapsed:    0.0s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed:    0.2s finished


In [13]:
result_X0

{'linear': {'mse': 11.02991383923831, 'r2': 0.0773524648404681},
 'gradient': {'mse': 12.175649314103095, 'r2': -0.018487813446042223},
 'tree': {'mse': 12.05330119959762, 'r2': -0.008253446439627776},
 'RandomForest': {'mse': 11.173361334666971, 'r2': 0.06535314372054879},
 'Knn': {'mse': 11.836937699820323, 'r2': 0.009845266993459001}}

In [14]:
Bestmodels_X1, result_X1  = grid('X1', -1, X1_train, y_train.values, X1_test, y_test, models, score = 'r2', cv = 5)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.


********************************************************************************
Model: linear
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Saving model at modelos/X1linear_20191012-1146.sav
El tiempo de seleccion fue: 0.020 s
El error r2 de la familia linear es: -0.307
********************************************************************************
********************************************************************************
Model: gradient
Fitting 5 folds for each of 10 candidates, totalling 50 fits


[Parallel(n_jobs=-1)]: Done  19 out of  50 | elapsed:    0.2s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:    0.9s finished
  y = column_or_1d(y, warn=True)
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 out of 120 | elapsed:    0.1s remaining:    0.4s


Saving model at modelos/X1gradient_20191012-1146.sav
El tiempo de seleccion fue: 0.909 s
El error r2 de la familia gradient es: -0.101
********************************************************************************
********************************************************************************
Model: tree
Fitting 5 folds for each of 24 candidates, totalling 120 fits


[Parallel(n_jobs=-1)]: Done 120 out of 120 | elapsed:    0.2s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.


Saving model at modelos/X1tree_20191012-1146.sav
El tiempo de seleccion fue: 0.219 s
El error r2 de la familia tree es: -0.012
********************************************************************************
********************************************************************************
Model: RandomForest
Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed:    1.1s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed:    1.1s finished
  self._final_estimator.fit(Xt, y, **fit_params)
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.


Saving model at modelos/X1RandomForest_20191012-1146.sav
El tiempo de seleccion fue: 1.213 s
El error r2 de la familia RandomForest es: 0.054
********************************************************************************
********************************************************************************
Model: Knn
Fitting 5 folds for each of 9 candidates, totalling 45 fits
Saving model at modelos/X1Knn_20191012-1146.sav
El tiempo de seleccion fue: 0.138 s
El error r2 de la familia Knn es: 0.094
********************************************************************************
best model: Knn with an error r2 of: 0.09375145026211877


[Parallel(n_jobs=-1)]: Done  14 out of  45 | elapsed:    0.1s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed:    0.1s finished


In [15]:
result_X1

{'linear': {'mse': 11.138857015744936, 'r2': 0.06823941511575438},
 'gradient': {'mse': 14.116814825147499, 'r2': -0.1808654711690234},
 'tree': {'mse': 13.1895171914119, 'r2': -0.10329742407496734},
 'RandomForest': {'mse': 11.486340488764494, 'r2': 0.039172572476455},
 'Knn': {'mse': 11.892945574403232, 'r2': 0.005160232442272261}}

In [16]:
Bestmodels_X2, result_X2  = grid('X2', -1, X2_train, y_train.values, X2_test, y_test, models, score = 'r2', cv = 5)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.


********************************************************************************
Model: linear
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Saving model at modelos/X2linear_20191012-1146.sav
El tiempo de seleccion fue: 0.019 s
El error r2 de la familia linear es: -0.111
********************************************************************************
********************************************************************************
Model: gradient
Fitting 5 folds for each of 10 candidates, totalling 50 fits


[Parallel(n_jobs=-1)]: Done  19 out of  50 | elapsed:    0.2s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:    1.0s finished
  y = column_or_1d(y, warn=True)
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 out of 120 | elapsed:    0.1s remaining:    0.4s


Saving model at modelos/X2gradient_20191012-1146.sav
El tiempo de seleccion fue: 1.032 s
El error r2 de la familia gradient es: -0.141
********************************************************************************
********************************************************************************
Model: tree
Fitting 5 folds for each of 24 candidates, totalling 120 fits


[Parallel(n_jobs=-1)]: Done 120 out of 120 | elapsed:    0.2s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.


Saving model at modelos/X2tree_20191012-1146.sav
El tiempo de seleccion fue: 0.200 s
El error r2 de la familia tree es: 0.040
********************************************************************************
********************************************************************************
Model: RandomForest
Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed:    1.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed:    1.0s finished
  self._final_estimator.fit(Xt, y, **fit_params)
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.


Saving model at modelos/X2RandomForest_20191012-1146.sav
El tiempo de seleccion fue: 1.105 s
El error r2 de la familia RandomForest es: 0.060
********************************************************************************
********************************************************************************
Model: Knn
Fitting 5 folds for each of 9 candidates, totalling 45 fits
Saving model at modelos/X2Knn_20191012-1146.sav
El tiempo de seleccion fue: 0.103 s
El error r2 de la familia Knn es: 0.098
********************************************************************************
best model: Knn with an error r2 of: 0.09750976008480833


[Parallel(n_jobs=-1)]: Done  14 out of  45 | elapsed:    0.1s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed:    0.1s finished


In [17]:
result_X2

{'linear': {'mse': 11.25791788030339, 'r2': 0.05828002514953068},
 'gradient': {'mse': 15.918936539056734, 'r2': -0.3316121752349046},
 'tree': {'mse': 12.80028722657763, 'r2': -0.07073850540173043},
 'RandomForest': {'mse': 11.614012622816988, 'r2': 0.028492853531322804},
 'Knn': {'mse': 12.002645159731856, 'r2': -0.004016090554142648}}

In [18]:
Bestmodels_X3, result_X3  = grid('X3', -1, X3_train, y_train.values, X3_test, y_test, models, score = 'r2', cv = 5)

********************************************************************************
Model: linear
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Saving model at modelos/X3linear_20191012-1146.sav
El tiempo de seleccion fue: 0.020 s
El error r2 de la familia linear es: 0.033
********************************************************************************
********************************************************************************
Model: gradient
Fitting 5 folds for each of 10 candidates, totalling 50 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  19 out of  50 | elapsed:    0.1s remaining:    0.2s
[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:    0.9s finished
  y = column_or_1d(y, warn=True)
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 out of 120 | elapsed:    0.1s remaining:    0.3s


Saving model at modelos/X3gradient_20191012-1146.sav
El tiempo de seleccion fue: 0.934 s
El error r2 de la familia gradient es: -0.135
********************************************************************************
********************************************************************************
Model: tree
Fitting 5 folds for each of 24 candidates, totalling 120 fits
Saving model at modelos/X3tree_20191012-1146.sav
El tiempo de seleccion fue: 0.190 s
El error r2 de la familia tree es: 0.017
********************************************************************************
********************************************************************************
Model: RandomForest
Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=-1)]: Done 120 out of 120 | elapsed:    0.2s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed:    0.9s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed:    0.9s finished
  self._final_estimator.fit(Xt, y, **fit_params)
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.


Saving model at modelos/X3RandomForest_20191012-1146.sav
El tiempo de seleccion fue: 0.987 s
El error r2 de la familia RandomForest es: 0.041
********************************************************************************
********************************************************************************
Model: Knn
Fitting 5 folds for each of 9 candidates, totalling 45 fits
Saving model at modelos/X3Knn_20191012-1146.sav
El tiempo de seleccion fue: 0.095 s
El error r2 de la familia Knn es: 0.105
********************************************************************************
best model: Knn with an error r2 of: 0.10453541185319319


[Parallel(n_jobs=-1)]: Done  14 out of  45 | elapsed:    0.1s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done  37 out of  45 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed:    0.1s finished


In [19]:
result_X3

{'linear': {'mse': 11.576046414519645, 'r2': 0.03166871047956843},
 'gradient': {'mse': 15.2424564087483, 'r2': -0.2750249041183266},
 'tree': {'mse': 11.266219866032124, 'r2': 0.05758556762418754},
 'RandomForest': {'mse': 11.622067520385498, 'r2': 0.027819064824006534},
 'Knn': {'mse': 12.371097828441073, 'r2': -0.03483699736831469}}

In [20]:
Bestmodels_X4, result_X4  = grid('X4', -1, X4_train, y_train.values, X4_test, y_test, models, score = 'r2', cv = 5)

********************************************************************************
Model: linear
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Saving model at modelos/X4linear_20191012-1146.sav
El tiempo de seleccion fue: 0.020 s
El error r2 de la familia linear es: 0.032
********************************************************************************
********************************************************************************
Model: gradient
Fitting 5 folds for each of 10 candidates, totalling 50 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  19 out of  50 | elapsed:    0.1s remaining:    0.2s
[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:    0.9s finished
  y = column_or_1d(y, warn=True)
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  28 out of 120 | elapsed:    0.1s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done 120 out of 120 | elapsed:    0.2s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.


Saving model at modelos/X4gradient_20191012-1146.sav
El tiempo de seleccion fue: 0.959 s
El error r2 de la familia gradient es: -0.330
********************************************************************************
********************************************************************************
Model: tree
Fitting 5 folds for each of 24 candidates, totalling 120 fits
Saving model at modelos/X4tree_20191012-1146.sav
El tiempo de seleccion fue: 0.157 s
El error r2 de la familia tree es: -0.002
********************************************************************************
********************************************************************************
Model: RandomForest
Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done  29 out of  60 | elapsed:    0.4s remaining:    0.5s
[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed:    0.8s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed:    0.8s finished
  self._final_estimator.fit(Xt, y, **fit_params)
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.


Saving model at modelos/X4RandomForest_20191012-1146.sav
El tiempo de seleccion fue: 0.913 s
El error r2 de la familia RandomForest es: -0.032
********************************************************************************
********************************************************************************
Model: Knn
Fitting 5 folds for each of 9 candidates, totalling 45 fits
Saving model at modelos/X4Knn_20191012-1146.sav
El tiempo de seleccion fue: 0.115 s
El error r2 de la familia Knn es: -0.001
********************************************************************************
best model: linear with an error r2 of: 0.032238121484270264


[Parallel(n_jobs=-1)]: Done  14 out of  45 | elapsed:    0.1s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed:    0.1s finished


In [21]:
result_X4

{'linear': {'mse': 11.094943346442458, 'r2': 0.07191277461178269},
 'gradient': {'mse': 12.917775461159124, 'r2': -0.08056634554869802},
 'tree': {'mse': 11.10463002158051, 'r2': 0.0711024884146032},
 'RandomForest': {'mse': 11.441695709434084, 'r2': 0.042907089010972954},
 'Knn': {'mse': 11.626972512490903, 'r2': 0.027408764350041914}}

In [22]:
# Forma en que vas a llamar la función

resultado = get_max(result_X0, 'r2')
best_X0 = {}
best_X0[resultado[0]] = resultado[1]
print('BestX0: ' + str(best_X0))

resultado = get_max(result_X1, 'r2')
best_X1 = {}
best_X1[resultado[0]] = resultado[1]
print('BestX1: ' + str(best_X1))


resultado = get_max(result_X2, 'r2')
best_X2 = {}
best_X2[resultado[0]] = resultado[1]
print('BestX2: ' + str(best_X2))

resultado = get_max(result_X3, 'r2')
best_X3 = {}
best_X3[resultado[0]] = resultado[1]
print('BestX3: ' + str(best_X3))


resultado = get_max(result_X4, 'r2')
best_X4 = {}
best_X4[resultado[0]] = resultado[1]
print('BestX4: ' + str(best_X4))






BestX0: {'linear': 0.0773524648404681}
BestX1: {'linear': 0.06823941511575438}
BestX2: {'linear': 0.05828002514953068}
BestX3: {'tree': 0.05758556762418754}
BestX4: {'linear': 0.07191277461178269}


In [23]:
# Forma en que vas a llamar la función

resultado = get_max(result_X0, 'mse')
best_X0 = {}
best_X0[resultado[0]] = resultado[1]
print('BestX0: ' + str(best_X0))

resultado = get_min(result_X1, 'mse')
best_X1 = {}
best_X1[resultado[0]] = resultado[1]
print('BestX1: ' + str(best_X1))


resultado = get_min(result_X2, 'mse')
best_X2 = {}
best_X2[resultado[0]] = resultado[1]
print('BestX2: ' + str(best_X2))

resultado = get_min(result_X3, 'mse')
best_X3 = {}
best_X3[resultado[0]] = resultado[1]
print('BestX3: ' + str(best_X3))


resultado = get_min(result_X4, 'mse')
best_X4 = {}
best_X4[resultado[0]] = resultado[1]
print('BestX4: ' + str(best_X4))

BestX0: {'gradient': 12.175649314103095}
BestX1: {'linear': 11.138857015744936}
BestX2: {'linear': 11.25791788030339}
BestX3: {'tree': 11.266219866032124}
BestX4: {'linear': 11.094943346442458}


In [28]:
Selected_model = Bestmodels_X4['linear']
Bestmodels_X4['linear']

{'mod': LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False),
 'par': {},
 'bestModel': Pipeline(memory=None,
          steps=[('scaler',
                  StandardScaler(copy=True, with_mean=True, with_std=True)),
                 ('linear',
                  LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
                                   normalize=False))],
          verbose=False),
 'r2': 0.032238121484270264,
 'cols_order': array(['X11', 'X18', 'X39'], dtype=object),
 'selection_time': 0.020483732223510742}

In [30]:
import pickle 
pickle.dump(Selected_model, open('modeleconomy2.sav', 'wb'))