In [1]:
# run stopwatch
from tools import Stopwatch
stopwatch = Stopwatch()
start = stopwatch.start()

### Load libraries, functions, palette, theme

In [2]:
%run _libraries.ipynb

In [3]:
%run _functions.ipynb

In [189]:
save_files = 'files/Section6-HPP-evaluation'

In [190]:
save_img = 'docs/img/Section6-HPP-evaluation'

In [191]:
session_name = 'Section6-HPP-evaluation'

# Section VII. HPP evaluation

## Load Saved Section if exists

## Load Data

In [5]:
# dicts
datasets_dict = loadit(
    'datasets_dict', dir='Section3-Feature-selection-and-Preprocessing')
features_dict = loadit(
    'features_dict', dir='Section4-Linear-models-research')
transform_dict = loadit(
    'transform_dict', dir='Section3-Feature-selection-and-Preprocessing')
groups_dict = loadit(
    'groups_dict', dir='Section2-Explore-and-Clean')
stack = loadit(
    'estimator_stack', dir='Section5-Residuals-prediction'
)
simulation_datasets_dict = loadit(
    'simulation_datasets_dict', dir='Section4-Linear-models-research')
estimators_dict = loadit(
    'estimators_dict', dir='Section4-Linear-models-research')
evaluation_dict = loadit(
    'evaluation_dict', dir='Section4-Linear-models-research')

# datasets
train_raw = datasets_dict['train_raw'].copy()
train = datasets_dict['train'].copy()
train_cv = datasets_dict['train_cv'].copy()
test = datasets_dict['test'].copy()

# models
lr = estimators_dict['lr']

# features lists
features = features_dict['features'].copy()
features_linear = features_dict['features_linear'].copy()
features_raw = features_dict['features_raw'].copy()
numeric = features_dict['numeric'].copy()
numeric_raw = features_dict['numeric_raw'].copy()
categorical = features_dict['categorical'].copy()
categorical_transform = features_dict['categorical_transform'].copy()
categorical_raw = features_dict['categorical_raw'].copy()
factor = features_dict['factor'].copy()
target = features_dict['target']

# overview transformers
transformer_overview = transform_dict['transformer_overview']
transformer_imputer_median = transform_dict['transformer_imputer_median']
transformer_imputer_frequent = transform_dict['transformer_imputer_frequent']
transformer_label = transform_dict['transformer_label']

# feature selection transformers
transformer_features_creator = transform_dict['transformer_features_creator']
transformer_features_logger = transform_dict['transformer_features_logger']

# preprocessing transformers
encoder = transform_dict['encoder']
scaler = transform_dict['scaler']

# additional features lists
features_na = transform_dict['features_na'].copy()
features_log = transform_dict['features_log'].copy()

## HHP evaluation with K-Fold CV

In [6]:
n_folds = 20

In [7]:
encoder = OrdinalEncoder(
    encoding_method='ordered',
    variables=categorical_transform,
    missing_values='ignore',
    unseen='encode'
)

In [8]:
scaler = StandardScaler()

In [9]:
pipeline_transform = Pipeline(steps=[
    ('encoder', encoder),
    ('scaler', scaler)
])

In [10]:
hpp = HousePricePredictor(
    residuals_estimator=stack,
    predictors=features_linear,
    features=features,
    target=target
)

In [11]:
st = stopwatch.start()
hpp_cv = cross_validation(
    estimator=hpp,
    data=train_cv,
    features=features,
    target=target,
    pipeline_transform=pipeline_transform,
    n_folds=n_folds
)
print(f'Execution time: {stopwatch.stop(st)}')

Execution time: 0:25:36


In [12]:
hpp_cv['test_score'].mean()

0.11006958103254331

In [13]:
hpp_cv['test_score'].std()

0.021920503956610778

In [14]:
test_normality(hpp_cv['test_score'])

Unnamed: 0,Test,P or Statistic (s),Condition
0,Kolmogorov-Smirnov,0.0,Not normal
1,Anderson-Darling (s),0.5356,Normal
2,Shapiro-Wilk,0.1767,Normal
3,Jarque-Bera,0.4587,Normal
4,D’Agostino-Pearson,0.2953,Normal


### Save Data

In [15]:
st = stopwatch.start()
hpp.fit(train[features], train[target])
print(f'Fit time: {stopwatch.stop(st)}')

Fit time: 0:01:19


In [16]:
simulation_datasets_dict['train_hpp'] = train
simulation_datasets_dict['features_hpp'] = features

In [17]:
estimators_dict['hpp'] = hpp

In [18]:
evaluation_dict['cv_hpp'] = hpp_cv

In [19]:
saveit(simulation_datasets_dict, 'simulation_datasets_dict', save_files)

In [20]:
saveit(estimators_dict, 'estimators_dict', save_files)

In [21]:
saveit(evaluation_dict, 'evaluation_dict', save_files)

In [22]:
saveit(test, 'test_preprocessed', save_files)

### Save Session

In [23]:
save_session(session_name)

### Execution time

In [24]:
print(f'Execution time: {stopwatch.stop(start)}')

Execution time: 0:27:02
