In [3]:
from pathlib import Path
import pandas as pd

In [45]:
%load_ext autoreload
%autoreload 2
from src.models.models_svi import SVI_model_format, SVI_regression_model, svi_main
from src.models.models_mcmc import MCMC_regression_model, MCMC_model_format
from src.models.utils import compute_error
from src.visualization.visualize import true_vs_preds_plot 

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
data_path = Path('../data/processed/data_processed.csv')
data = pd.read_csv(data_path.as_posix())

In [6]:
data.drop(['Unnamed: 0'], axis=1, inplace=True)
data.dropna(axis=0, inplace=True)

In [7]:
data['datetime'] = pd.to_datetime(data['datetime'])

# SVI

In [8]:
svi_regression = SVI_regression_model(data, 'comp2_life')

In [9]:
svi_dataset = svi_regression.get_data_for_component()

In [10]:
svi_dataset.head()

Unnamed: 0,voltmean_3h,rotatemean_3h,pressuremean_3h,vibrationmean_3h,voltsd_3h,rotatesd_3h,pressuresd_3h,vibrationsd_3h,voltmean_24h,rotatemean_24h,...,error2count,error3count,error4count,error5count,age,model_model1,model_model2,model_model3,model_model4,comp2_life
17,166.281848,453.787824,106.187582,51.99008,24.276228,23.621315,11.176731,3.394073,172.042428,450.418764,...,0.0,1.0,0.0,1.0,18,0,0,1,0,105.875
18,175.412103,445.450581,100.887363,54.251534,34.918687,11.001625,10.580336,2.921501,171.219623,443.802134,...,0.0,1.0,0.0,1.0,18,0,0,1,0,105.75
19,157.347716,451.882075,101.28938,48.602686,24.617739,28.950883,9.966729,2.356486,172.013443,444.882018,...,0.0,1.0,0.0,1.0,18,0,0,1,0,105.625
20,176.45055,446.033068,84.521555,47.638836,8.0714,76.511343,2.636879,4.108621,170.176321,445.069594,...,0.0,1.0,0.0,1.0,18,0,0,1,0,105.5
21,190.325814,422.692565,107.393234,49.552856,8.390777,7.176553,4.262645,7.598552,172.932248,444.618018,...,0.0,1.0,0.0,1.0,18,0,0,1,0,105.375


### Poisson

In [11]:
poisson = SVI_model_format.poisson

In [12]:
y_poisson, X_poisson, X_train_torch_poisson, y_train_torch_poisson, X_test_poisson, y_test_poisson, X_train_poisson, y_train_poisson, y_std_poisson, y_mean_poisson = svi_regression.preprocess(X_init=svi_dataset, model=poisson['name'])

POISSON Regression


In [13]:
poisson_guide = svi_regression.pyro_inference(X_train_torch=X_train_torch_poisson, y_train_torch=y_train_torch_poisson, model=poisson['model'], steps=1000)

[0] ELBO: 318520.5
[100] ELBO: 309064.4
[200] ELBO: 306878.6
[300] ELBO: 276268.2
[400] ELBO: 269723.6
[500] ELBO: 261796.3
[600] ELBO: 291743.2
[700] ELBO: 265881.5
[800] ELBO: 267817.8
[900] ELBO: 262180.1


In [14]:
type(poisson_guide), type(poisson['model'])

(pyro.infer.autoguide.guides.AutoDiagonalNormal, function)

In [15]:
poisson_guide

AutoDiagonalNormal()

In [16]:
poisson['model']

<function src.models.models.poisson_model(X, obs=None)>

In [20]:
poisson_preds, poisson_y_true = svi_regression.post_process(guide=poisson_guide, model=poisson, X_train_torch=X_train_torch_poisson, y_train_torch=y_train_torch_poisson, X_test=X_test_poisson, y_test=y_test_poisson, y_std=y_std_poisson, y_mean=y_mean_poisson)

In [21]:
corr, mae, rae, rmse, r2, svi_trues, svi_pred = compute_error(trues=poisson_y_true, predicted=poisson_preds, threshold=None)
print("CorrCoef: %.3f\nMAE: %.3f\nRMSE: %.3f\nR2: %.3f" % (corr, mae, rmse, r2))

No threshold
CorrCoef: 0.125
MAE: 24.353
RMSE: 40.124
R2: 0.000


### Linear

In [22]:
linear = SVI_model_format.linear

In [23]:
y_linear, X_linear, X_train_torch_linear, y_train_torch_linear, X_test_linear, y_test_linear, X_train_linear, y_train_linear, y_std_linear, y_mean_linear = svi_regression.preprocess(X_init=svi_dataset, model=linear['name'])

LINEAR Regression


In [24]:
linear_guide = svi_regression.pyro_inference(X_train_torch=X_train_torch_linear, y_train_torch=y_train_torch_linear, model=linear['model'], steps=1000)

[0] ELBO: 13406.6
[100] ELBO: 13270.8
[200] ELBO: 13095.8
[300] ELBO: 13165.9
[400] ELBO: 13701.1
[500] ELBO: 13523.1
[600] ELBO: 12921.6
[700] ELBO: 12867.1
[800] ELBO: 13519.7
[900] ELBO: 13102.6


In [25]:
linear_preds, linear_y_true = svi_regression.post_process(guide=linear_guide, model=linear, X_train_torch=X_train_torch_linear, y_train_torch=y_train_torch_linear, X_test=X_test_linear, y_test=y_test_linear, y_std=y_std_linear, y_mean=y_mean_linear)

In [26]:
corr, mae, rae, rmse, r2, svi_trues, svi_pred = compute_error(trues=linear_y_true, predicted=linear_preds, threshold=None)
print("CorrCoef: %.3f\nMAE: %.3f\nRMSE: %.3f\nR2: %.3f" % (corr, mae, rmse, r2))

No threshold
CorrCoef: 0.232
MAE: 22.438
RMSE: 37.695
R2: 0.000


### Heteroscedastic

In [27]:
heteroscedastic = SVI_model_format.heterosc

In [28]:
y_heterosc, X_heterosc, X_train_torch_heterosc, y_train_torch_heterosc, X_test_heterosc, y_test_heterosc, X_train_heterosc, y_train_heterosc, y_std_heterosc, y_mean_heterosc = svi_regression.preprocess(X_init=svi_dataset, model=heteroscedastic['name'])

HETEROSCEDASTIC Regression


In [29]:
heteroscedastic_guide = svi_regression.pyro_inference(X_train_torch=X_train_torch_heterosc, y_train_torch=y_train_torch_heterosc, model=heteroscedastic['model'], steps=1000)

[0] ELBO: 1202375.2
[100] ELBO: 560317.3
[200] ELBO: 287986.9
[300] ELBO: 295488.2
[400] ELBO: 203894.5
[500] ELBO: 229750.9
[600] ELBO: 93275.4
[700] ELBO: 45756.3
[800] ELBO: 99573.1
[900] ELBO: 31774.8


In [30]:
heterosc_preds, heterosc_y_true = svi_regression.post_process(guide=heteroscedastic_guide, model=heteroscedastic, X_train_torch=X_train_torch_heterosc, y_train_torch=y_train_torch_heterosc, X_test=X_test_heterosc, y_test=y_test_heterosc, y_std=y_std_heterosc, y_mean=y_mean_heterosc)

In [31]:
corr, mae, rae, rmse, r2, svi_trues, svi_pred = compute_error(trues=heterosc_y_true, predicted=heterosc_preds, threshold=None)
print("CorrCoef: %.3f\nMAE: %.3f\nRMSE: %.3f\nR2: %.3f" % (corr, mae, rmse, r2))

No threshold
CorrCoef: -0.092
MAE: 24.980
RMSE: 40.523
R2: 0.000


# MCMC

In [33]:
mcmc_regression = MCMC_regression_model(data, 'comp2_life')

In [34]:
mcmc_dataset = mcmc_regression.get_data_for_component()

In [35]:
mcmc_dataset.head()

Unnamed: 0,voltmean_3h,rotatemean_3h,pressuremean_3h,vibrationmean_3h,voltsd_3h,rotatesd_3h,pressuresd_3h,vibrationsd_3h,voltmean_24h,rotatemean_24h,...,error2count,error3count,error4count,error5count,age,model_model1,model_model2,model_model3,model_model4,comp2_life
17,166.281848,453.787824,106.187582,51.99008,24.276228,23.621315,11.176731,3.394073,172.042428,450.418764,...,0.0,1.0,0.0,1.0,18,0,0,1,0,105.875
18,175.412103,445.450581,100.887363,54.251534,34.918687,11.001625,10.580336,2.921501,171.219623,443.802134,...,0.0,1.0,0.0,1.0,18,0,0,1,0,105.75
19,157.347716,451.882075,101.28938,48.602686,24.617739,28.950883,9.966729,2.356486,172.013443,444.882018,...,0.0,1.0,0.0,1.0,18,0,0,1,0,105.625
20,176.45055,446.033068,84.521555,47.638836,8.0714,76.511343,2.636879,4.108621,170.176321,445.069594,...,0.0,1.0,0.0,1.0,18,0,0,1,0,105.5
21,190.325814,422.692565,107.393234,49.552856,8.390777,7.176553,4.262645,7.598552,172.932248,444.618018,...,0.0,1.0,0.0,1.0,18,0,0,1,0,105.375


### Linear

In [39]:
mcmc_linear = MCMC_model_format.linear

In [36]:
y_mcmc, X_mcmc, X_train_torch_mcmc, y_train_torch_mcmc, X_test_mcmc, y_test_mcmc, X_train_mcmc, y_train_mcmc, y_std_mcmc, y_mean_mcmc = mcmc_regression.preprocess(X_init=mcmc_dataset)

In [40]:
mcmc_samples = mcmc_regression.pyro_inference(X_train_torch_mcmc, y_train_torch_mcmc, mcmc_linear['model'], 100)

Sample: 100%|██████████████████████████████████████████| 300/300 [01:05,  4.58it/s, step size=3.30e-02, acc. prob=0.893]                                                                       


                mean       std    median      5.0%     95.0%     n_eff     r_hat
     alpha      0.00      0.01      0.00     -0.02      0.03    171.65      0.99
   beta[0]     -0.03      0.02     -0.03     -0.05      0.01    175.62      0.99
   beta[1]      0.01      0.03      0.02     -0.02      0.07    172.92      1.00
   beta[2]      0.02      0.03      0.02     -0.02      0.06    167.56      1.00
   beta[3]     -0.01      0.02     -0.01     -0.05      0.03    117.63      1.01
   beta[4]      0.01      0.01      0.01     -0.02      0.03    154.91      0.99
   beta[5]      0.02      0.02      0.02     -0.01      0.04    263.70      0.99
   beta[6]     -0.00      0.01     -0.00     -0.02      0.02    140.51      1.00
   beta[7]      0.00      0.01      0.00     -0.02      0.02    139.67      0.99
   beta[8]     -0.03      0.04     -0.03     -0.08      0.04    116.48      0.99
   beta[9]      0.17      0.04      0.17      0.09      0.23     79.53      1.00
  beta[10]      0.02      0




In [43]:
mcmc_preds, mcmc_y_true = mcmc_regression.post_process(X_test_mcmc, X_train_mcmc, mcmc_samples, y_std_mcmc, y_mean_mcmc, y_test_mcmc)

In [44]:
corr, mae, rae, rmse, r2, mcmc_trues, mcmc_pred = compute_error(trues=mcmc_y_true, predicted=mcmc_preds, threshold=None)
print("CorrCoef: %.3f\nMAE: %.3f\nRMSE: %.3f\nR2: %.3f" % (corr, mae, rmse, r2))

No threshold
CorrCoef: 0.576
MAE: 16.596
RMSE: 24.504
R2: 0.327


In [68]:
components = ['comp1_fail', 'comp2_fail', 'comp3_fail', 'comp4_fail']

In [70]:
for component in components:
    svi_main(data, component, 'poisson', steps=2000)

comp1_fail
POISSON Regression
[0] ELBO: 5419.8
[100] ELBO: 5234.2
[200] ELBO: 6871.3
[300] ELBO: 4369.2
[400] ELBO: 3867.2
[500] ELBO: 4572.0
[600] ELBO: 4325.0
[700] ELBO: 3371.8
[800] ELBO: 3732.2
[900] ELBO: 3851.7
[1000] ELBO: 3564.4
[1100] ELBO: 2971.8
[1200] ELBO: 3668.0
[1300] ELBO: 3710.6
[1400] ELBO: 3914.0
[1500] ELBO: 3768.3
[1600] ELBO: 3567.2
[1700] ELBO: 3715.8
[1800] ELBO: 3165.6
[1900] ELBO: 2921.0
No threshold
CorrCoef: 0.513
MAE: 0.607
RMSE: 0.780
R2: 0.000
comp2_fail
POISSON Regression
[0] ELBO: 9068.3
[100] ELBO: 10094.3
[200] ELBO: 12198.7
[300] ELBO: 8109.2
[400] ELBO: 8170.9
[500] ELBO: 4616.6
[600] ELBO: 6972.6
[700] ELBO: 5409.3
[800] ELBO: 4783.9
[900] ELBO: 5258.8
[1000] ELBO: 4814.5
[1100] ELBO: 4289.6
[1200] ELBO: 4401.5
[1300] ELBO: 3999.2
[1400] ELBO: 4235.5
[1500] ELBO: 4375.5
[1600] ELBO: 3628.6
[1700] ELBO: 3432.9
[1800] ELBO: 3574.3
[1900] ELBO: 3373.5
No threshold
CorrCoef: 0.460
MAE: 0.807
RMSE: 1.191
R2: 0.000
Content appended to file
comp3_fail
PO

In [66]:
svi_main(data,'comp2_life', 'poisson', steps=2000)

comp2_life
POISSON Regression
[0] ELBO: 230274.9
[100] ELBO: 232949.1
[200] ELBO: 241390.4
[300] ELBO: 227659.7
[400] ELBO: 204318.4
[500] ELBO: 228912.2
[600] ELBO: 209692.0
[700] ELBO: 197950.9
[800] ELBO: 202513.7
[900] ELBO: 195477.9
[1000] ELBO: 198315.2
[1100] ELBO: 192002.5
[1200] ELBO: 182600.3
[1300] ELBO: 186795.3
[1400] ELBO: 174847.1
[1500] ELBO: 173074.5
[1600] ELBO: 182023.6
[1700] ELBO: 159636.4
[1800] ELBO: 162375.9
[1900] ELBO: 155870.5
No threshold
CorrCoef: 0.143
MAE: 22.030
RMSE: 39.085
R2: 0.000


In [67]:
svi_main(data,'comp2_life', 'linear', steps=2000)

comp2_life
LINEAR Regression
[0] ELBO: 9676.4
[100] ELBO: 9428.8
[200] ELBO: 10130.4
[300] ELBO: 9800.2
[400] ELBO: 10387.0
[500] ELBO: 9972.7
[600] ELBO: 9386.2
[700] ELBO: 9601.5
[800] ELBO: 9372.4
[900] ELBO: 9783.4
[1000] ELBO: 9764.7
[1100] ELBO: 9534.0
[1200] ELBO: 9182.6
[1300] ELBO: 9223.0
[1400] ELBO: 9337.3
[1500] ELBO: 9239.2
[1600] ELBO: 9376.7
[1700] ELBO: 8866.0
[1800] ELBO: 9310.5
[1900] ELBO: 9546.5
No threshold
CorrCoef: 0.286
MAE: 22.942
RMSE: 39.189
R2: 0.000
Content appended to file


In [51]:
svi_main(data,'comp2_life', 'heteroscedastic', steps=2000)

comp2_life
HETEROSCEDASTIC Regression
[0] ELBO: 260463.5
[100] ELBO: 601577.9
[200] ELBO: 1445578.6
[300] ELBO: 249741.8
[400] ELBO: 160566.8
[500] ELBO: 329763.0
[600] ELBO: 131040.9
[700] ELBO: 93087.2
[800] ELBO: 52574.0
[900] ELBO: 66611.7
[1000] ELBO: 137398.6
[1100] ELBO: 94235.6
[1200] ELBO: 94559.0
[1300] ELBO: 36983.6
[1400] ELBO: 54939.3
[1500] ELBO: 35147.3
[1600] ELBO: 39411.9
[1700] ELBO: 37046.6
[1800] ELBO: 37299.5
[1900] ELBO: 23618.5
No threshold
CorrCoef: -0.373
MAE: 25.903
RMSE: 42.585
R2: 0.000
