In [6]:

import sys
sys.path.append(r'C:\Users\gustavo\Documents\Data Science\08-GitHub\Portifolio/Regression/house_prices_single_model/src')

import os
import pandas as pd
import json
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import (
    RandomForestRegressor, 
    AdaBoostRegressor, 
    GradientBoostingRegressor, 
    HistGradientBoostingRegressor)
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import (
    root_mean_squared_error,
    r2_score    
    )
from sklearn.model_selection import GridSearchCV
import yaml
from sklearn.decomposition import PCA
plt.style.use('ggplot')
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.float_format', '{:.4f}'.format)


yaml_path = r'C:\Users\gustavo\Documents\Data Science\08-GitHub\Portifolio\Regression\house_prices_single_model\src\config.yaml'
with open(yaml_path, "r", encoding="utf-8") as f:
    config = yaml.safe_load(f)

In [7]:
params_ = {        
        'X_train_feat_sel': os.path.join(
            config['feat_selection']['path'],
            config['feat_selection']['X_train']),
        'X_val_feat_sel': os.path.join(
            config['feat_selection']['path'],
            config['feat_selection']['X_val']),
        'y_train_feat_sel': os.path.join(
            config['feat_selection']['path'],
            config['feat_selection']['y_train']),
        'y_val_feat_sel': os.path.join(
            config['feat_selection']['path'],
            config['feat_selection']['y_val']),
        'cols_2_drop':config['model_selection']['cols_2_drop'],
        'reports': config['save_reports']['path_reports'],
        'save_plot': config['save_reports']['path_plot'],
        'score': config['model_selection']['score'],
        'target':config['feat_selection_params']['target'],
        'random_state': 42,
        'cv': 5,
        'pca_threshold':config['feat_selection_params']['pca_threshold']
        }

In [8]:
X_train = pd.read_parquet(params_['X_train_feat_sel'])
X_val = pd.read_parquet(params_['X_val_feat_sel'])
y_train = pd.read_parquet(params_['y_train_feat_sel'])
y_val = pd.read_parquet(params_['y_val_feat_sel']) 

In [9]:
X_val.columns

Index(['numerical_pipe__MSSubClass', 'numerical_pipe__OverallCond',
       'numerical_pipe__OverallQual', 'numerical_pipe__YearBuilt',
       'numerical_pipe__YearRemodAdd', 'numerical_pipe__BsmtUnfSF',
       'numerical_pipe__GrLivArea', 'numerical_pipe__BsmtFullBath',
       'numerical_pipe__FullBath', 'numerical_pipe__HalfBath',
       'numerical_pipe__BedroomAbvGr', 'numerical_pipe__TotRmsAbvGrd',
       'numerical_pipe__Fireplaces', 'numerical_pipe__GarageYrBlt',
       'numerical_pipe__GarageCars', 'numerical_pipe__GarageArea',
       'numerical_pipe__MoSold', 'numerical_pipe__YrSold',
       'numerical_pipe__BsmtFinSF2', 'numerical_pipe__EnclosedPorch',
       'numerical_pipe__LotFrontage', 'numerical_pipe__LotArea',
       'numerical_pipe__MasVnrArea', 'numerical_pipe__BsmtFinSF1',
       'numerical_pipe__TotalBsmtSF', 'numerical_pipe__1stFlrSF',
       'numerical_pipe__2ndFlrSF', 'numerical_pipe__WoodDeckSF',
       'numerical_pipe__OpenPorchSF', 'categorical_pipe__MSZoning_RL

In [15]:
X_train.drop(
        columns=config['model_selection']['cols_2_drop'],
        inplace=True)
# X_val.drop(
#         columns=config['model_selection']['cols_2_drop'],
#         inplace=True)

In [6]:
X_train.columns

Index(['numerical_pipe__MSSubClass', 'numerical_pipe__OverallCond',
       'numerical_pipe__OverallQual', 'numerical_pipe__YearBuilt',
       'numerical_pipe__YearRemodAdd', 'numerical_pipe__BsmtUnfSF',
       'numerical_pipe__GrLivArea', 'numerical_pipe__BsmtFullBath',
       'numerical_pipe__FullBath', 'numerical_pipe__HalfBath',
       'numerical_pipe__BedroomAbvGr', 'numerical_pipe__TotRmsAbvGrd',
       'numerical_pipe__Fireplaces', 'numerical_pipe__GarageYrBlt',
       'numerical_pipe__GarageCars', 'numerical_pipe__GarageArea',
       'numerical_pipe__MoSold', 'numerical_pipe__YrSold',
       'numerical_pipe__BsmtFinSF2', 'numerical_pipe__EnclosedPorch',
       'num_pipe_2__LotFrontage', 'num_pipe_2__LotArea',
       'num_pipe_2__MasVnrArea', 'num_pipe_2__BsmtFinSF1',
       'num_pipe_2__TotalBsmtSF', 'num_pipe_2__1stFlrSF',
       'num_pipe_2__2ndFlrSF', 'num_pipe_2__WoodDeckSF',
       'num_pipe_2__OpenPorchSF', 'categorical_pipe__MSZoning_RL',
       'categorical_pipe__MSZonin