In [None]:
from sklearn import metrics
import joblib
import torch
import torch.nn as nn

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
import pytz
from datetime import timedelta, datetime
local_tz = pytz.timezone('America/Toronto') # Set local timezone for InfluxDB based times calculations
today=datetime.today().strftime('%Y-%m-%d')

#### Note: if you did not create new data for the model training, a sample is provided in this repository. 
#### To use it, ensure that in the next cell the instruction with the file name lt_results_2022-10-01.csv in it is executed and not the one using "today's" date

In [None]:
#data = pd.read_csv('lt_results_'+today+'.csv', index_col='DateTime', parse_dates=True)
data = pd.read_csv('lt_results_2022-10-01.csv', index_col='DateTime', parse_dates=True)

In [None]:
data.head()

In [None]:
data = data.drop(columns=['req2xx', 'testDurationSeconds'])

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(data, data.mean_tps,
                                                    test_size=0.1,
                                                    random_state=0) # we are setting the seed here
X_train.shape, X_test.shape

In [None]:
target_var = 'mean_tps'
X_train = X_train.drop(target_var, axis=1)
X_test = X_test.drop(target_var, axis=1)

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
stdScaler = StandardScaler()
targetStdScaler = StandardScaler()

In [None]:
X_train_scaled = stdScaler.fit_transform(X_train.values)
y_train_scaled = targetStdScaler.fit_transform(y_train.values.reshape(-1,1))

In [None]:
X_test_scaled = stdScaler.transform(X_test.values)
y_test_scaled = targetStdScaler.transform(y_test.values.reshape(-1,1))

In [None]:
# Set fixed random number seed
torch.manual_seed(42)

In [None]:
X_train_scaled = torch.FloatTensor(X_train_scaled)
y_train_scaled = torch.FloatTensor(y_train_scaled)
X_test_scaled = torch.FloatTensor(X_test_scaled)
y_test_scaled = torch.FloatTensor(y_test_scaled)

In [None]:
class MLPMLasp(nn.Module):
    def __init__(self, input, layerSize, hidden, output):
        super(MLPMLasp, self).__init__()
        self.net = nn.Sequential()
        #input layer
        self.net.append(nn.Linear(input,layerSize))
        self.net.append(nn.ReLU())
        
        #hidden layers in a loop
        for i in range(hidden):
            self.net.append(nn.Linear(layerSize,layerSize))
            self.net.append(nn.ReLU())

        #output
        self.net.append(nn.Linear(layerSize,output))

    def forward(self, x):
        return self.net(x)

In [None]:
epochs = 1000

In [None]:
inputSize = X_train_scaled.shape[1]
colList = ['HiddenLayers', 'R2Score', 'MAE', 'MSE', 'MAPE', 'model', 'TrainLoses', 'TestLoses', 'TrainPredictions']

In [None]:
# This function is the reference for creating and training the models inside the Jupyter notebook pod. We will evaluate it against the Ray cluster distributed one
def createModel(testResultsFrame, inputSize, layerSize, loops, 
                y_train, X_train, y_test, X_test, 
                targetScaler, labelSet):
    
    print(f'Creating models using layer size = {layerSize} on set = {labelSet}.\n')
    for i in range(loops):
        print(f'Create and training model with {i} hidden layers\n')
        model = MLPMLasp(inputSize, layerSize, i, 1)

        criterion = nn.MSELoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
        
        train_loses = np.zeros(epochs)
        test_loses = np.zeros(epochs)

        for epoch in range(epochs):
            #forward and get a prediction
            y_pred_train = model.forward(X_train)
            #calculate the loss
            loss = criterion(y_pred_train, y_train)
            train_loses[epoch] = loss

            #perform backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            #save the test loss to compare train vs test scores
            y_eval = model.forward(X_test)
            test_loss = criterion(y_eval, y_test)
            test_loses[epoch] = test_loss
    
        with torch.no_grad():
            y_pred_scaled = model.forward(X_test)
            
        #Evaluate metrics
        y_pred = targetScaler.inverse_transform(y_pred_scaled)
        r2_score = metrics.r2_score(y_test, y_pred_scaled)
        mae = metrics.mean_absolute_error(y_test, y_pred_scaled)
        mse = metrics.mean_squared_error(y_test, y_pred_scaled)
        mape = metrics.mean_absolute_percentage_error(y_test, y_pred_scaled)
        row = [i, r2_score, mae, mse, mape, model, train_loses, test_loses, y_pred]
        df = pd.DataFrame(np.array(row, dtype=object).reshape(1, len(colList)), columns=colList)
        testResultsFrame = pd.concat([testResultsFrame, df], ignore_index=True)

        del(model)
        
    return testResultsFrame

In [None]:
%%time
testResDataFrame = pd.DataFrame(columns=colList)
layerSize = 64
loops = 15
testResDataFrame = createModel(testResDataFrame, inputSize, layerSize, loops, 
                               y_train_scaled, X_train_scaled,
                               y_test_scaled, X_test_scaled, 
                               targetStdScaler, 'all')

In [None]:
testResDataFrame

In [None]:
#Plot train vs validation
plt.figure(figsize=(20,10))
#plt.plot(testResDataFrame['R2Score'])
plt.plot(testResDataFrame['MAE'])
#plt.plot(testResDataFrame['MSE'])
plt.title('Training Scores MLP')
plt.ylabel('Score')
plt.xlabel('Iteration')
plt.legend(['MAE'], loc='upper right')
plt.show()

In [None]:
# Determine the IDX value where the MAE is smallest
minMaeIDX = testResDataFrame.loc[testResDataFrame['MAE']==testResDataFrame['MAE'].min()].index[0]

In [None]:
testResDataFrame.iloc[minMaeIDX]

In [None]:
y_pred_MLP_std = testResDataFrame['TrainPredictions'][minMaeIDX]
# Plot prediction vs original
plt.figure(figsize=(20,10))
plt.scatter(range(y_test.shape[0]),y_test,label="Original Data", alpha=0.6, c='red')
plt.scatter(range(y_pred_MLP_std.shape[0]),y_pred_MLP_std,label="Predicted Data", 
            alpha=0.6, c='black')
plt.ylabel('Mean TPS')
plt.xlabel('Test Records')
plt.title('MLP Std Model for X_test dataset prediction vs original')
plt.legend()
plt.show()

In [None]:
train_loses = testResDataFrame['TrainLoses'][minMaeIDX]
test_loses = testResDataFrame['TestLoses'][minMaeIDX]
#Plot train vs validation
plt.figure(figsize=(20,10))
plt.plot(train_loses)
plt.plot(test_loses)
plt.title('Test vs Train loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['train', 'test'], loc='upper right')
plt.show()

In [None]:
MLFLOW_URI='http://mlflow-server.mlflow-strangiato.svc.cluster.local:8080/'
#MLFLOW_URI='http://localhost:8080/'

In [None]:
import os
os.environ["RAY_IGNORE_UNHANDLED_ERRORS"] = "1"

In [None]:
import logging
import ray
from codeflare_sdk import TokenAuthentication, Cluster, ClusterConfiguration
from codeflare_sdk import generate_cert

In [None]:
auth = TokenAuthentication(
    token = "sha256~zAPMzZL-O4dWaBL8oMKXk2Wq8UtUiSb4JwrWEiFe3Cs", # execute ocp whoami -t on the authenticated cluster to obtain the token
    server = "https://api.cluster-ffqgg.ffqgg.sandbox1386.opentlc.com:6443",
    skip_tls = False
)
auth.login()

In [None]:
# Create required TLS cert and export the environment variables to enable TLS
generate_cert.generate_tls_cert('raycluster-complete', 'raycluster')
generate_cert.export_env('raycluster-complete', 'raycluster')

In [None]:
ray_endpoint = 'ray://raycluster-complete-head-svc.raycluster.svc.cluster.local:10001' # ensure your ray cluster URL is correct
ray.shutdown()
ray.init(address=ray_endpoint, logging_level=logging.ERROR, log_to_driver=False)

In [None]:
import onnx
import onnxruntime

In [None]:
import mlflow

In [None]:
from ray.air.integrations.mlflow import setup_mlflow

In [None]:
@ray.remote
def createRemoteModel(inputSize, layerSize, hiddenSize, 
                y_train, X_train, y_test, X_test, 
                targetScaler, labelSet):

    mlflow_exp_name = f'mlasp-1-{labelSet}-{hiddenSize}'
    mlflow_ray_config = None
    mlflow_ray = setup_mlflow(config=mlflow_ray_config,
                              tracking_uri=MLFLOW_URI,
                              registry_uri=MLFLOW_URI,
                              create_experiment_if_not_exists=True,
                              rank_zero_only=False,
                             experiment_name=mlflow_exp_name)
    
    print(f'Create and training model with {hiddenSize} hidden layers using layer size = {layerSize} on set = {labelSet}.\n')
    model = MLPMLasp(inputSize, layerSize, hiddenSize, 1)

    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    
    train_loses = np.zeros(epochs)
    test_loses = np.zeros(epochs)

    for epoch in range(epochs):
        #forward and get a prediction
        y_pred_train = model.forward(X_train)
        #calculate the loss
        loss = criterion(y_pred_train, y_train)
        train_loses[epoch] = loss

        #perform backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        #save the test loss if you want to compare train vs test scores
        y_eval = model.forward(X_test)
        test_loss = criterion(y_eval, y_test)
        test_loses[epoch] = test_loss

    with torch.no_grad():
        y_pred_scaled = model.forward(X_test)
    
    #Evaluate metrics
    y_pred = targetScaler.inverse_transform(y_pred_scaled)
    y_test_orig = targetScaler.inverse_transform(y_test)
    r2_score = metrics.r2_score(y_test, y_pred_scaled)
    mae = metrics.mean_absolute_error(y_test, y_pred_scaled)
    mse = metrics.mean_squared_error(y_test, y_pred_scaled)
    mape = metrics.mean_absolute_percentage_error(y_test, y_pred_scaled)
    
    row = [i, r2_score, mae, mse, mape, model, train_loses, test_loses, y_pred]
    
    fig=plt.figure(figsize=(20,10))
    plt.scatter(range(y_test_orig.shape[0]),y_test_orig,label="Original Data", alpha=0.6, c='red')
    plt.scatter(range(y_pred.shape[0]),y_pred,label="Predicted Data", 
                alpha=0.6, c='black')
    plt.ylabel('Mean TPS')
    plt.xlabel('Test Records')
    plt.title('MLP StdScaler Model for X_test dataset prediction vs original')
    plt.legend()    

    mlflow_ray.log_figure(fig,f"{mlflow_exp_name}.png")
    
    mlflow_ray.log_param("batch_size", 32)
    mlflow_ray.log_param("layer_size", layerSize)
    mlflow_ray.log_param("hidden_layers", loops)
    mlflow_ray.log_param("activation_function", "relu")
    mlflow_ray.log_param("dense_kernel_initializer", "torchnormal")
    mlflow_ray.log_param("epochs", epochs)
    mlflow_ray.log_param("learning_rate", 0.001)
    mlflow_ray.log_param("optimizer", "adam")

    mlflow_ray.log_metric("mae", mae)
    mlflow_ray.log_metric("mse", mse)
    mlflow_ray.log_metric("mape", mape)
    mlflow_ray.log_metric("r2_score", r2_score)

    model_onnx = torch.onnx.dynamo_export(model, X_train[0])
    mlflow_ray.onnx.log_model(model_onnx.model_proto, f"model-{mlflow_exp_name}")

    row = [hiddenSize, r2_score, mae, mse, mape]
    del(model)

    return row

In [None]:
%%time

testResDataFrame2 = []
layerSize = 64
loops = 15

for i in range(loops):
    rowResult = createRemoteModel.remote(inputSize, layerSize, i, 
                        y_train_scaled, X_train_scaled,
                        y_test_scaled, X_test_scaled, 
                        targetStdScaler, labelSet='all_ray')
    testResDataFrame2.append(rowResult)

In [None]:
testResDataFrame2

In [None]:
%%time

tResDF2 = ray.get(testResDataFrame2)

In [None]:
ray.shutdown()

In [None]:
colList2=colList.copy()

In [None]:
df = pd.DataFrame(tResDF2, columns=colList2[0:5])

In [None]:
df

In [None]:
minMaeIDX_ray = df.loc[df['MAE']==df['MAE'].min()].index[0]

In [None]:
df.iloc[minMaeIDX_ray]

In [None]:
#Plot train vs validation
plt.figure(figsize=(20,10))
#plt.plot(df['R2Score'])
plt.plot(df['MAE'])
#plt.plot(df['MSE'])
plt.title('Training Scores MLP')
plt.ylabel('Score')
plt.xlabel('Iteration')
plt.legend(['MAE'], loc='upper right')
plt.show()

In [None]:
X_test.columns

In [None]:
data.head(1)

In [None]:
record = [[True, 21, 277, 1712, 262, 7, 31, 5]]

In [None]:
test_rec = stdScaler.transform(record)

In [None]:
test_rec = torch.FloatTensor(test_rec)

In [None]:
test_rec

In [None]:
ml_model = testResDataFrame['model'][minMaeIDX]

In [None]:
with torch.no_grad():
    y_pred_scaled = ml_model.forward(test_rec)

y_pred = targetStdScaler.inverse_transform(y_pred_scaled)
y_pred

### Save the scalers for the inference calls

In [None]:
joblib.dump(targetStdScaler,'target_scaler.pkl')

In [None]:
joblib.dump(stdScaler,'standard_scaler.pkl')

In [None]:
torch.onnx.export(ml_model, test_rec, 'torch_mlasp.onnx')