In [None]:
# Copyright 2021, Battelle Energy Alliance, LLC

import pandas as pd 
import numpy as np
import json
import os
import re

# Change working directory if not the project directory
current_dir = os.getcwd()
folders = re.split('\/', current_dir)
if folders[len(folders)-1] == 'prediction':
    os.chdir(os.path.abspath(os.path.join('..')))

# Load environment variables from .env file    
!pip install python-dotenv
%load_ext dotenv
%dotenv
import settings
%pwd

In [None]:
with open(os.getenv("ML_ADAPTER_OBJECT_LOCATION"), 'r') as fp:
    data = json.load(fp)

In [None]:
def load_model():
    """
    Load the model from disk
    """
    import pickle
    filename = data["MODEL"]["model_serialization_file"]
    loaded_model = pickle.load(open(filename, 'rb'))
    return loaded_model

In [None]:
def standardize_mean_normalization(data, X_train_mean, X_train_std):
    """
    Standardizes the data according to the z-score formula 
    z = (x – μ) / σ 
    
    Note: Only the training mean and standard deviation are used for the standardization of the data.
    This ensures that there is no contamination of the test data set.
    """
    standardize_data = (data - X_train_mean) / X_train_std
    return standardize_data

In [None]:
def prediction(model, data):
    """
    Use an existing model to make a prediction with the incoming data
    """
    yhat = model.predict(data)
    return yhat

In [None]:
def unstandardize_mean_normalization(yhat, y_train_mean, y_train_std):
    """
    Unstandardizes the data according to the z-score formula 
    z = (x * σ) + μ
    
    Note: Only the training mean and standard deviation are used for the standardization of the data.
    This ensures that there is no contamination of the test data set.
    """
    yhat = (yhat * y_train_std) + y_train_mean
    return yhat

In [None]:
def create_JSON_file(yhat, independent_variables, dependent_variables, tolerance=2):
    """
    Create a .json file of the machine learning results
    """
    # Create a dictionary of the machine learning results
    json_data = dict()
    json_data["Independent Variables"] = independent_variables
    json_data["Dependent Variables"] = dependent_variables
    json_data["Fitted"] = {}
    json_data["Fitted"]["test"] = yhat.round(tolerance).tolist()
    
    # Write the data to a JSON File
    location = data["PREDICTION"]["output_file"]
    with open(location, "w") as f:
        json.dump(json_data, f)
        f.close()

In [None]:
def make_prediction():
    # Retrieve Data
    test_data = pd.read_csv('data/test.csv')
    
    # Load the model from a file
    model = load_model()
    
    # Read standardization input file
    with open(data["PREDICTION"]["input_file"], 'r') as fp:
        model_info = json.load(fp)
    
    # Set standardization values
    X_train_mean = model_info['data']['mean']['X_train']
    X_train_std = model_info['data']['std']['X_train']
    y_train_mean = model_info['data']['mean']['y_train']
    y_train_std = model_info['data']['std']['y_train']
    
    # Standardize the data
    test_data = standardize_mean_normalization(test_data, X_train_mean, X_train_std)
    
    # Make a prediction with the incoming data
    yhat = prediction(model, test_data)
    
    # Unstandardize the data
    yhat = unstandardize_mean_normalization(yhat, y_train_mean, y_train_std)
    
    # Generate JSON file of results
    create_JSON_file(yhat, test_time, tolerance=2)

In [None]:
# make_prediction()