## ITA 2021

<br>

Dicionário de Dados:

* n: número de agentes
* p: fração de traders
* f: grau de interesse dos traders
* x, y, z: dimensões do espaço aéreo
* a1, a2: média e desvio padrão do coeficiente do preço fundamental dos consumidores
* a3, a4: idem para os traders
* b1, b2: média e desvio padrão do coeficiente do preço de mercado dos consumidores
* b3, b4: idem para os traders
* c1, c2: média e desvio padrão do coeficiente do preço aleatório dos consumidores
* c3, c4: idem para os traders
* g1, g2: média e desvio padrão do grau de agressividade dos consumidores
* l1, l2: média e desvio padrão do coeficiente de desvalorização para os consumidores
* l3, l4: idem para os traders
* e1, e2: variabilidade no preço fundamental dos consumidores e traders, respectivamente
* cent_price_cor: correlação entre o preço final e centralidade das permissões de vôo
* cent_trans_cor: idem para o número de transações

In [None]:
import sys
!{sys.executable} -m pip install jupyternotify

In [1]:
%load_ext jupyternotify

<IPython.core.display.Javascript object>

In [21]:
# Importando Ferramentas Básicas
import pandas                  as pd
import matplotlib.pyplot       as plt
import numpy                   as np
import                            os
from   datetime            import datetime

In [32]:
# Importando Ferramentas de Limpeza
from sklearn.decomposition    import PCA
from sklearn.preprocessing    import StandardScaler
from sklearn.pipeline         import make_pipeline, Pipeline

In [23]:
# Importando Ferramentas de Modelo
from sklearn.svm              import SVR
from xgboost                  import XGBRegressor
from sklearn.model_selection  import train_test_split
from sklearn.model_selection  import GridSearchCV, RandomizedSearchCV
from sklearn.metrics          import accuracy_score, mean_absolute_error
from sklearn.linear_model     import LinearRegression, LogisticRegression, Lasso
from sklearn.base             import BaseEstimator

In [50]:
# Importando os dados
train = pd.read_csv('./../Dados/train.csv')
test = pd.read_csv('./../Dados/test.csv')

In [51]:
# Criando Features
dataframes = [train, test]

for df in dataframes:
    df['volume']  = df.x * df.y * df.z
    df['densidade'] = df.volume / df.n

In [None]:
train.head()

In [None]:
test.head()

In [None]:
train.isna().sum()/len(train)

In [None]:
test.isna().sum()/len(train)

In [None]:
train.cent_price_cor.describe()

In [None]:
train.cent_trans_cor.describe()

In [None]:
train.corr()["cent_price_cor"].abs().sort_values(ascending = True)

In [None]:
train.corr()["cent_trans_cor"].abs().sort_values(ascending = True)

In [None]:
y_1_pred = pipe_1.predict(X_test)

In [None]:
mean_absolute_error(y_1_test, y_1_pred)

## PCA

In [27]:
X = train.drop(columns = ['cent_price_cor', 'cent_trans_cor'], axis = 1)

scaler = StandardScaler()

transf_X_train = scaler.fit_transform(X_train)
transf_X_test = scaler.fit_transform(X_test)

In [28]:
y_price = train.cent_price_cor
y_trans = train.cent_trans_cor

X_train, X_test, y_price_train, y_price_test = train_test_split(X,y_price,
                                                    test_size = 0.25,
                                                    random_state = 0)

X_train, X_test, y_trans_train, y_trans_test = train_test_split(X,y_trans,
                                                    test_size = 0.25,
                                                    random_state = 0)

In [29]:
models = {'Linear Regression': LinearRegression(n_jobs = -1),
          'SVR': SVR(epsilon=0.2),
          'Lasso': Lasso(),
          'XGBoostRegressor': XGBRegressor()}

In [67]:
def fit_score_PCA(models,X_train,y_train,X_test,y_test,components):

    # Make a dict to keep model scores
    model_scores = {}
    
    for i in components:
        
        pca = PCA(n_components = i)
        X_train_PCA = pca.fit_transform(X_train)
        X_test_PCA = pca.transform(X_test)
        
        print(X_train_PCA.shape)
        print(X_test_PCA.shape)

        # Loop through models
        for name, model in models.items():

            # Fit the model to the data
            model.fit(X_train_PCA,y_train)
        
            y_pred = model.predict(X_test_PCA)

            #Evaluates the model and append its score to model_scores
            model_scores[name + '_' + str(i)] = mean_absolute_error(y_test, y_pred)

    return model_scores

In [68]:
model_scores_trans = fit_score_PCA(models,X_train,y_trans_train,X_test,y_trans_test, [0.95])
model_scores_price = fit_score_PCA(models,X_train,y_price_train,X_test,y_price_test, [0.95])

(8955, 1)
(2985, 1)
(8955, 1)
(2985, 1)


In [None]:
# Melhores scores para 0.95 (sem scaling)
0.0941312117033256 + 0.090252152275057

In [61]:
model_scores_trans = fit_score_PCA(models,transf_X_train,y_trans_train,transf_X_test,y_trans_test, [0.2])
model_scores_price = fit_score_PCA(models,transf_X_train,y_price_train,transf_X_test,y_price_test, [1])

(8955, 4)
(8955, 1)


In [None]:
# Melhores scores para 0.95 (com scaling)
0.0941305091243686 + 0.09025552474334281

In [74]:
model_scores_trans = fit_score_PCA(models,transf_X_train,y_trans_train,transf_X_test,y_trans_test, [0.1])
model_scores_price = fit_score_PCA(models,transf_X_train,y_price_train,transf_X_test,y_price_test, [1])

(8955, 2)
(2985, 2)
(8955, 1)
(2985, 1)


In [89]:
model_scores_trans = fit_score_PCA(models,transf_X_train,y_trans_train,transf_X_test,y_trans_test, [0.8,0.85,0.9,0.95])
#model_scores_price = fit_score_PCA(models,transf_X_train,y_price_train,transf_X_test,y_price_test, [0.91,0.95,1])

(8955, 21)
(2985, 21)
(8955, 22)
(2985, 22)
(8955, 24)
(2985, 24)
(8955, 25)
(2985, 25)


In [17]:
#'Linear Regression_0.95': 0.09020619481829613

sorted(model_scores_trans, key = model_scores_trans.get)

['Linear Regression_0.95',
 'Linear Regression_0.91',
 'Lasso_0.91',
 'Lasso_0.95',
 'Lasso_1',
 'Linear Regression_1',
 'SVR_1',
 'XGBoostRegressor_1',
 'XGBoostRegressor_0.91',
 'XGBoostRegressor_0.95',
 'SVR_0.91',
 'SVR_0.95']

In [18]:
# 'Lasso_0.91': 0.09411110306791731,
sorted(model_scores_price, key = model_scores_price.get)

['Lasso_0.91',
 'Lasso_0.95',
 'Lasso_1',
 'Linear Regression_1',
 'Linear Regression_0.95',
 'Linear Regression_0.91',
 'SVR_1',
 'XGBoostRegressor_1',
 'SVR_0.95',
 'SVR_0.91',
 'XGBoostRegressor_0.95',
 'XGBoostRegressor_0.91']

In [None]:
model_scores_trans = fit_score_PCA(models,transf_X_train,y_trans_train,transf_X_test,y_trans_test, [0.8,0.85,0.9,0.95])
model_scores_price = fit_score_PCA(models,transf_X_train,y_price_train,transf_X_test,y_price_test, [0.8,0.85,0.9,0.95])

In [19]:
{k: v for k, v in sorted(model_scores_price.items(), key=lambda item: item[1])}

{'Lasso_0.91': 0.09411110306791731,
 'Lasso_0.95': 0.09411110306791731,
 'Lasso_1': 0.09411110306791731,
 'Linear Regression_1': 0.09413049877635919,
 'Linear Regression_0.95': 0.09417792238315124,
 'Linear Regression_0.91': 0.09417835915003633,
 'SVR_1': 0.09434654907849423,
 'XGBoostRegressor_1': 0.09705142031202166,
 'SVR_0.95': 0.09830355128110657,
 'SVR_0.91': 0.09886755546696292,
 'XGBoostRegressor_0.95': 0.10000986378128596,
 'XGBoostRegressor_0.91': 0.10054150387570636}

In [20]:
{k: v for k, v in sorted(model_scores_trans.items(), key=lambda item: item[1])}

{'Linear Regression_0.95': 0.09020619481829613,
 'Linear Regression_0.91': 0.09021593345191478,
 'Lasso_0.91': 0.09025387237696017,
 'Lasso_0.95': 0.09025387237696017,
 'Lasso_1': 0.09025387237696017,
 'Linear Regression_1': 0.09025552931887561,
 'SVR_1': 0.0931376154572009,
 'XGBoostRegressor_1': 0.09335223579586452,
 'XGBoostRegressor_0.91': 0.09602769537295529,
 'XGBoostRegressor_0.95': 0.09618455696513305,
 'SVR_0.91': 0.10137796527210743,
 'SVR_0.95': 0.10148067436569513}

## GridSearch

In [80]:
# Execucao do programa

# Importando os dados
train = pd.read_csv('./../Dados/train.csv')
test = pd.read_csv('./../Dados/test.csv')

dataframes = [train, test]

for df in dataframes:
    df['volume']  = df.x * df.y * df.z
    df['densidade'] = df.volume / df.n

X = train.drop(columns = ['cent_price_cor', 'cent_trans_cor'], axis = 1)

y_price = train.cent_price_cor
y_trans = train.cent_trans_cor

X_train, X_test, y_price_train, y_price_test = train_test_split(X,y_price,
                                                    test_size = 0.25,
                                                    random_state = 0)

X_train, X_test, y_trans_train, y_trans_test = train_test_split(X,y_trans,
                                                    test_size = 0.25,
                                                    random_state = 0)

In [118]:
# Coletanea de parametros para o GridSearch
params_grid = [

#Linear Regression
{'normalize': ['True', 'False'],
'fit_intercept': ['True', 'False']},
    
 #SVR RBF
 #{'kernel': ['rbf'],
 #'C':[0.1, 0.5, 1, 5, 10],
 #'degree': [3,8],
 #'coef0': [0.01,10,0.5],
 #'gamma': ('auto','scale'),
 #'epsilon': [0.1,0.2]},
    
 #SVR POLY
 #{'kernel': ['poly'],
 #'C':[0.1, 0.5, 1, 5, 10],
 #'degree': [3,8],
 #'coef0': [0.01,10,0.5],
 #'gamma': ('auto','scale'),
 #'epsilon': [0.1,0.2]},
    
#Lasso
{'alpha':[0.02, 0.024, 0.025, 0.026, 0.03]}  
    
# #XGBoost
# ,{'nthread':[4], #when use hyperthread, xgboost may become slower
# 'objective':['reg:linear'],
# 'learning_rate': [.03, 0.05, .07], #so called `eta` value
# 'max_depth': [5, 6, 7],
# 'min_child_weight': [4],
# 'silent': [1],
# 'subsample': [0.7],
# 'colsample_bytree': [0.7],
# 'n_estimators': [500]}
 ]

In [115]:
def prever(X_train, X_test, y_train, y_test, target_name, components = [20,21,22,23,24,25,26,27,28]):
    
    lista_scores = []
    lista_PCA = []
    lista_params = []
    lista_models = []
    
    models = [
        LinearRegression(),
        #SVR(),
        #SVR(),
        Lasso()
        #XGBRegressor()
        ]
     
    for n in components:
        
        pca = PCA(n_components = n)
        X_train_PCA = pca.fit_transform(X_train)
        X_test_PCA = pca.transform(X_test)
            
        for i, model in enumerate(models):

            print(f"\n\nModelo: {model}\nComponent: {n}\n\n" + str(X_train_PCA.shape) + str(X_test_PCA.shape))

            clf = GridSearchCV(model, param_grid = params_grid[i],
                               scoring = 'neg_mean_absolute_error', #destaque Ã  mÃ©trica pedida
                               n_jobs=2, refit=True, cv=5, verbose=5,
                               pre_dispatch='2*n_jobs', error_score='raise', 
                               return_train_score=True)
            
            clf.fit(X_train_PCA, y_train)

            pred_cv = clf.predict(X_test_PCA)
            score_cv = mean_absolute_error(y_test, pred_cv)
            print(f"Melhores parametros: {clf.best_params_}")
            print(f"\nScore Grid: {score_cv}")
            
            lista_params.append(clf.best_params_)
            lista_models.append(model)
            lista_scores.append(round(score_cv,15))
            lista_PCA.append(n)

    print("Exportando DataFrame de Scores\n")

    df_scores = pd.DataFrame()
    
    df_scores.insert(loc=0, column='PCA', value= pd.Series(lista_PCA))
    df_scores.insert(loc=0, column='Scores', value= pd.Series(lista_scores))
    df_scores.insert(loc=0, column='Params', value= pd.Series(lista_params))
    df_scores.insert(loc=0, column='Model', value= pd.Series(lista_models))
    df_scores.to_csv(f"./../Resultados/{target_name}_scores_"+"{}.csv".format(datetime.now().strftime("%d-%m-%Y_%Hh%Mm%Ss")))
            
    return df_scores

In [117]:
df_scores_trans = prever(X, X_test, y_trans, y_trans_test, "trans")



Modelo: LinearRegression()
Component: 20

(11940, 20)(2985, 20)
Fitting 5 folds for each of 4 candidates, totalling 20 fits


[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  14 tasks      | elapsed:    2.2s
[Parallel(n_jobs=2)]: Done  20 out of  20 | elapsed:    3.4s finished
[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.


Melhores parametros: {'fit_intercept': 'True', 'normalize': 'True'}

Score Grid: 0.08999589299732849


Modelo: SVR()
Component: 20

(11940, 20)(2985, 20)
Fitting 5 folds for each of 120 candidates, totalling 600 fits


[Parallel(n_jobs=2)]: Done  14 tasks      | elapsed:   31.1s
[Parallel(n_jobs=2)]: Done  68 tasks      | elapsed:  2.2min
[Parallel(n_jobs=2)]: Done 158 tasks      | elapsed:  4.7min


KeyboardInterrupt: 

In [104]:
df_scores_trans

Unnamed: 0,Model,Params,Scores,PCA
0,LinearRegression(),"{'fit_intercept': 'True', 'normalize': 'True'}",0.089996,20
1,Lasso(),{'alpha': 0.03},0.090183,20
2,LinearRegression(),"{'fit_intercept': 'True', 'normalize': 'True'}",0.089999,21
3,Lasso(),{'alpha': 0.03},0.090183,21
4,LinearRegression(),"{'fit_intercept': 'True', 'normalize': 'True'}",0.090003,22
5,Lasso(),{'alpha': 0.03},0.090183,22
6,LinearRegression(),"{'fit_intercept': 'True', 'normalize': 'True'}",0.08997,23
7,Lasso(),{'alpha': 0.03},0.090183,23
8,LinearRegression(),"{'fit_intercept': 'True', 'normalize': 'True'}",0.089991,24
9,Lasso(),{'alpha': 0.03},0.090183,24


In [105]:
df_scores_price = prever(X, X_test, y_trans, y_trans_test, "price")



Modelo: LinearRegression()
Component: 20

(11940, 20)(2985, 20)
Fitting 5 folds for each of 4 candidates, totalling 20 fits


[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  14 tasks      | elapsed:    1.3s
[Parallel(n_jobs=2)]: Done  20 out of  20 | elapsed:    1.4s finished


Melhores parametros: {'fit_intercept': 'True', 'normalize': 'True'}

Score Grid: 0.08999589299732849


Modelo: Lasso()
Component: 20

(11940, 20)(2985, 20)
Fitting 5 folds for each of 5 candidates, totalling 25 fits


[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  14 tasks      | elapsed:    0.9s
[Parallel(n_jobs=2)]: Done  22 out of  25 | elapsed:    1.0s remaining:    0.1s
[Parallel(n_jobs=2)]: Done  25 out of  25 | elapsed:    1.0s finished


Melhores parametros: {'alpha': 0.03}

Score Grid: 0.09018283462097536


Modelo: LinearRegression()
Component: 21

(11940, 21)(2985, 21)
Fitting 5 folds for each of 4 candidates, totalling 20 fits


[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  20 out of  20 | elapsed:    0.4s finished
[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.


Melhores parametros: {'fit_intercept': 'True', 'normalize': 'True'}

Score Grid: 0.08999919611191273


Modelo: Lasso()
Component: 21

(11940, 21)(2985, 21)
Fitting 5 folds for each of 5 candidates, totalling 25 fits


[Parallel(n_jobs=2)]: Done  25 out of  25 | elapsed:    0.4s finished


Melhores parametros: {'alpha': 0.03}

Score Grid: 0.09018283462097536


Modelo: LinearRegression()
Component: 22

(11940, 22)(2985, 22)
Fitting 5 folds for each of 4 candidates, totalling 20 fits


[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  12 out of  20 | elapsed:    0.2s remaining:    0.1s
[Parallel(n_jobs=2)]: Done  20 out of  20 | elapsed:    0.2s finished


Melhores parametros: {'fit_intercept': 'True', 'normalize': 'True'}

Score Grid: 0.09000255719155117


Modelo: Lasso()
Component: 22

(11940, 22)(2985, 22)
Fitting 5 folds for each of 5 candidates, totalling 25 fits


[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  25 out of  25 | elapsed:    0.3s finished


Melhores parametros: {'alpha': 0.03}

Score Grid: 0.09018283462097536


Modelo: LinearRegression()
Component: 23

(11940, 23)(2985, 23)
Fitting 5 folds for each of 4 candidates, totalling 20 fits


[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  12 out of  20 | elapsed:    0.2s remaining:    0.1s
[Parallel(n_jobs=2)]: Done  20 out of  20 | elapsed:    0.5s finished
[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.


Melhores parametros: {'fit_intercept': 'True', 'normalize': 'True'}

Score Grid: 0.08997003169200213


Modelo: Lasso()
Component: 23

(11940, 23)(2985, 23)
Fitting 5 folds for each of 5 candidates, totalling 25 fits


[Parallel(n_jobs=2)]: Done  25 out of  25 | elapsed:    0.7s finished


Melhores parametros: {'alpha': 0.03}

Score Grid: 0.09018283462097536


Modelo: LinearRegression()
Component: 24

(11940, 24)(2985, 24)
Fitting 5 folds for each of 4 candidates, totalling 20 fits


[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  20 out of  20 | elapsed:    0.4s finished
[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.


Melhores parametros: {'fit_intercept': 'True', 'normalize': 'True'}

Score Grid: 0.08999134560616019


Modelo: Lasso()
Component: 24

(11940, 24)(2985, 24)
Fitting 5 folds for each of 5 candidates, totalling 25 fits


[Parallel(n_jobs=2)]: Done  25 out of  25 | elapsed:    0.3s finished
[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.


Melhores parametros: {'alpha': 0.03}

Score Grid: 0.09018283462097536


Modelo: LinearRegression()
Component: 25

(11940, 25)(2985, 25)
Fitting 5 folds for each of 4 candidates, totalling 20 fits


[Parallel(n_jobs=2)]: Done  12 out of  20 | elapsed:    0.1s remaining:    0.1s
[Parallel(n_jobs=2)]: Done  20 out of  20 | elapsed:    0.3s finished


Melhores parametros: {'fit_intercept': 'True', 'normalize': 'True'}

Score Grid: 0.08999480207423594


Modelo: Lasso()
Component: 25

(11940, 25)(2985, 25)
Fitting 5 folds for each of 5 candidates, totalling 25 fits


[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  14 tasks      | elapsed:    0.6s
[Parallel(n_jobs=2)]: Done  25 out of  25 | elapsed:    0.7s finished


Melhores parametros: {'alpha': 0.03}

Score Grid: 0.09018283462097536


Modelo: LinearRegression()
Component: 26

(11940, 26)(2985, 26)
Fitting 5 folds for each of 4 candidates, totalling 20 fits


[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  12 out of  20 | elapsed:    0.2s remaining:    0.1s
[Parallel(n_jobs=2)]: Done  20 out of  20 | elapsed:    0.3s finished


Melhores parametros: {'fit_intercept': 'True', 'normalize': 'True'}

Score Grid: 0.08996883544143594


Modelo: Lasso()
Component: 26

(11940, 26)(2985, 26)
Fitting 5 folds for each of 5 candidates, totalling 25 fits


[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  25 out of  25 | elapsed:    0.3s finished
[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.


Melhores parametros: {'alpha': 0.03}

Score Grid: 0.09018283462097536


Modelo: LinearRegression()
Component: 27

(11940, 27)(2985, 27)
Fitting 5 folds for each of 4 candidates, totalling 20 fits


[Parallel(n_jobs=2)]: Done  12 out of  20 | elapsed:    0.1s remaining:    0.1s
[Parallel(n_jobs=2)]: Done  20 out of  20 | elapsed:    0.2s finished


Melhores parametros: {'fit_intercept': 'True', 'normalize': 'True'}

Score Grid: 0.0899659254636646


Modelo: Lasso()
Component: 27

(11940, 27)(2985, 27)
Fitting 5 folds for each of 5 candidates, totalling 25 fits


[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  25 out of  25 | elapsed:    0.4s finished
[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.


Melhores parametros: {'alpha': 0.03}

Score Grid: 0.09018283462097536


Modelo: LinearRegression()
Component: 28

(11940, 28)(2985, 28)
Fitting 5 folds for each of 4 candidates, totalling 20 fits


[Parallel(n_jobs=2)]: Done  12 out of  20 | elapsed:    0.3s remaining:    0.1s
[Parallel(n_jobs=2)]: Done  20 out of  20 | elapsed:    0.4s finished
[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.


Melhores parametros: {'fit_intercept': 'True', 'normalize': 'True'}

Score Grid: 0.08995967336220755


Modelo: Lasso()
Component: 28

(11940, 28)(2985, 28)
Fitting 5 folds for each of 5 candidates, totalling 25 fits
Melhores parametros: {'alpha': 0.03}

Score Grid: 0.09018283462097536
Exportando DataFrame de Scores



[Parallel(n_jobs=2)]: Done  25 out of  25 | elapsed:    0.5s finished


In [111]:
df_scores_price

Unnamed: 0,Model,Params,Scores,PCA
0,LinearRegression(),"{'fit_intercept': 'True', 'normalize': 'True'}",0.089996,20
1,Lasso(),{'alpha': 0.03},0.090183,20
2,LinearRegression(),"{'fit_intercept': 'True', 'normalize': 'True'}",0.089999,21
3,Lasso(),{'alpha': 0.03},0.090183,21
4,LinearRegression(),"{'fit_intercept': 'True', 'normalize': 'True'}",0.090003,22
5,Lasso(),{'alpha': 0.03},0.090183,22
6,LinearRegression(),"{'fit_intercept': 'True', 'normalize': 'True'}",0.08997,23
7,Lasso(),{'alpha': 0.03},0.090183,23
8,LinearRegression(),"{'fit_intercept': 'True', 'normalize': 'True'}",0.089991,24
9,Lasso(),{'alpha': 0.03},0.090183,24


In [123]:
df_scores_price.Scores.min()

0.089959673362208

In [124]:
df_scores_price[df_scores_price.Scores == 0.089959673362208]

Unnamed: 0,Model,Params,Scores,PCA
16,LinearRegression(),"{'fit_intercept': 'True', 'normalize': 'True'}",0.08996,28


In [120]:
df_scores_trans.Scores.min()

0.089959673362208

In [125]:
df_scores_trans[df_scores_trans.Scores == 0.089959673362208]

Unnamed: 0,Model,Params,Scores,PCA
16,LinearRegression(),"{'fit_intercept': 'True', 'normalize': 'True'}",0.08996,28


In [127]:
def geral_resultados_submissao(test, clf_price, clf_trans):

    cent_price_cor = clf_price.predict(test.drop("id", axis=1))
    cent_trans_cor = clf_trans.predict(test.drop("id", axis=1))

    df_sub = pd.DataFrame({"cent_price_cor": cent_price_cor, "cent_trans_cor": cent_trans_cor})
    
    df_sub.to_csv("./../Submissoes/df_sub_{}.csv".format(datetime.now().strftime("%d-%m-%Y_%Hh%Mm%Ss")), index=False)

    return df_sub

In [129]:
print("\nPrevisao para o Price concluida \n")

clf_price = LinearRegression({'fit_intercept': 'True', 'normalize': 'True'})
clf_price.fit(X, y_price)

clf_trans = LinearRegression({'fit_intercept': 'True', 'normalize': 'True'})
clf_trans.fit(X, y_trans)

#print(X)
#print(test)

df_sub = geral_resultados_submissao(test, clf_price, clf_trans)

df_sub

#print("\nPrograma executado com sucesso \n")


Previsao para o Price concluida 





Unnamed: 0,cent_price_cor,cent_trans_cor
0,-0.194916,0.364345
1,-0.196930,0.369713
2,-0.209772,0.369003
3,-0.203155,0.360736
4,-0.208389,0.365105
...,...,...
8054,-0.197291,0.359655
8055,-0.195932,0.362658
8056,-0.212489,0.373609
8057,-0.201509,0.352812
