In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import joblib
import sklearn
from sklearn.linear_model import Lasso
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import LinearSVR
from sklearn.model_selection import RandomizedSearchCV
from sklearn import metrics
import yaml
import time

In [33]:
def read_data_test(params):
    x_test = joblib.load(params['DUMP_TEST'])
    y_test = joblib.load(params['Y_PATH_TEST'])

    return x_test, y_test

def evaluate(true, predicted):
    mae = metrics.mean_absolute_error(true, predicted)
    mse = metrics.mean_squared_error(true, predicted)
    rmse = np.sqrt(metrics.mean_squared_error(true, predicted))
    r2_square = metrics.r2_score(true, predicted)
    mape = metrics.mean_absolute_percentage_error(true, predicted)
    exp_var = metrics.explained_variance_score(true, predicted)
    return mae, mse, rmse, r2_square, mape, exp_var


def prediction_score(x_test, y_test, model_fitted):
    
    # Report default
    y_predicted = model_fitted.predict(x_test)
    mae, mse, rmse, r2_square, mape, exp_var = evaluate(y_test, y_predicted)
    score = {'mae':mae, 'mse':mse, 'rmse':rmse, 'r2': r2_square, 'mape': mape, 'exp_var': exp_var}

    return score

In [34]:
def main(params):
    model_name = joblib.load(params['MODEL_NAME'])
    print(f"Working on test data with {model_name} model")
    
    main_model = joblib.load(params['BEST_MODEL'])
    
    test_log_dict = {'model': [main_model],
                  'model_name': [model_name],
                  'model_score': []}

    x_test, y_test  = read_data_test(params)
    score = prediction_score(x_test, y_test, main_model)
    test_log_dict['model_score'].append(score)
    joblib.dump(test_log_dict, 'output/isrelated_test_log.pkl')
    print(
        f"Model: {test_log_dict['model_name']},\n Score: {test_log_dict['model_score']},\n Model's parameter: {test_log_dict['model']}")
    

In [31]:
f = open("src/params/preprocess_params.yaml", "r")
params = yaml.load(f, Loader=yaml.SafeLoader)
f.close()

In [32]:
main(params)

Working on test data with RandomForestRegressor model
Model: ['RandomForestRegressor'],
 Score: [{'mae': 16892.570407534244, 'mse': 831830028.3032011, 'rmse': 28841.463699042757, 'r2': 0.8653912856536792}],
 Model's parameter: [RandomForestRegressor(n_estimators=1000, n_jobs=-1, random_state=0)]
