In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.model_selection import train_test_split
from pyro.infer import Predictive
import torch
import seaborn as sns
import pyro
import pyro.distributions as dist
from pyro.contrib.autoguide import AutoDiagonalNormal, AutoMultivariateNormal
from pyro.infer import MCMC, NUTS, HMC, SVI, Trace_ELBO
from pyro.optim import ClippedAdam
from tqdm.autonotebook import tqdm
from IPython.display import clear_output
import os

In [None]:
%load_ext autoreload
%autoreload 2
from __init__ import root_dir, data_path, src_path

In [None]:
os.path.abspath(root_dir)

In [None]:
%load_ext autoreload
%autoreload 2
from src.models.models import compute_error, model
from src.visualization.visualize import true_vs_preds_plot

In [None]:
data = pd.read_csv(os.path.join(data_path,'processed','data_processed.csv'))

In [None]:
data.columns

In [None]:
data.drop(['Unnamed: 0'], axis=1, inplace=True)
data.dropna(axis=0, inplace=True)

In [None]:
data['datetime'] = pd.to_datetime(data['datetime'])

Try linear regression with one target variable, the continuous variable `comp1_life`:

In [None]:
cols = ['voltmean_3h', 'rotatemean_3h',
       'pressuremean_3h', 'vibrationmean_3h', 'voltsd_3h', 'rotatesd_3h',
       'pressuresd_3h', 'vibrationsd_3h', 'voltmean_24h', 'rotatemean_24h',
       'pressuremean_24h', 'vibrationmean_24h', 'voltsd_24h', 'rotatesd_24h',
       'pressuresd_24h', 'vibrationsd_24h', 'error1count', 'error2count',
       'error3count', 'error4count', 'error5count','age',
       'model_model1', 'model_model2', 'model_model3', 'model_model4','comp2_life']

data_tst = data[cols]

In [None]:
data_tst.shape

In [None]:
X = data_tst.to_numpy()

# Keep the last column as target y
y = X[:,-1]
X = X[:,:-1]

In [None]:
X_train_, X_test_, y_train_, y_test_ = train_test_split(X, y, test_size=0.1, random_state=0)

print(f"X_train: {X_train_.shape}")
print(f"y_train: {y_train_.shape}")

print(f"X_test: {X_test_.shape}")
print(f"y_test: {y_test_.shape}")

In [None]:
X_mean = X_train_.mean(axis=0)
X_std = X_train_.std(axis=0)

y_std = y_train_.std()
y_mean = y_train_.mean()

In [None]:
X_train = (X_train_ - X_mean)/X_std
X_test = (X_test_- X_mean)/X_std 

y_train = (y_train_- y_mean)/y_std 
y_test = (y_test_- y_mean)/y_std 

In [None]:
# Prepare data for Pyro model
X_train_torch = torch.tensor(X_train).float()
y_train_torch = torch.tensor(y_train).float()

print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")

In [None]:
# Define guide function
guide = AutoMultivariateNormal(model)

# Reset parameter values
pyro.clear_param_store()

In [None]:
# Define the number of optimization steps
n_steps = 40000

# Setup the optimizer
adam_params = {"lr": 0.0001} # learning rate (lr) of optimizer
optimizer = ClippedAdam(adam_params)

# Setup the inference algorithm
elbo = Trace_ELBO(num_particles=1)
svi = SVI(model, guide, optimizer, loss=elbo)

In [None]:
# Do gradient steps
for step in range(n_steps):
    elbo = svi.step(X_train_torch, y_train_torch)
    if step % 1000 == 0:
#         clear_output(wait=0.01)
        print("[%d] ELBO: %.1f" % (step, elbo))

In [None]:
predictive = Predictive(model, guide=guide, num_samples=1000,
                        return_sites=("alpha", "beta", "sigma"))
samples = predictive(X_train_torch, y_train_torch)

In [None]:
y_std, y_mean

In [None]:
alpha_samples = samples["alpha"].detach().numpy()
beta_samples = samples["beta"].detach().numpy()
y_hat_train = np.mean(alpha_samples.T + np.dot(X_train, beta_samples[:,0].T), axis=1) 
y_hat = np.mean(alpha_samples.T + np.dot(X_test, beta_samples[:,0].T), axis=1)

# convert back to the original scale
preds = y_hat * y_std + y_mean
preds_train = y_hat_train*y_std + y_mean
y_true = y_test * y_std + y_mean

In [None]:
corr, mae, rae, rmse, r2, y_true, preds = compute_error(y_true, preds, None)
print("CorrCoef: %.3f\nMAE: %.3f\nRMSE: %.3f\nR2: %.3f" % (corr, mae, rmse, r2))

In [None]:
figures_path = os.path.abspath(os.path.join(root_dir,'reports','figures'))

In [None]:
true_vs_preds_plot(y_true, preds, window=None, fig_path=figures_path, name = 'svi_100_test')

In [None]:
corr, mae, rae, rmse, r2, yy_true, ppreds = compute_error(y_true, preds, 60)
print("CorrCoef: %.3f\nMAE: %.3f\nRMSE: %.3f\nR2: %.3f" % (corr, mae, rmse, r2))

In [None]:
true_vs_preds_plot(yy_true, ppreds, window=None, fig_path=figures_path, name = 'svi_60_test')