In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from pathlib import Path

from sklearn.model_selection import train_test_split

import torch

import seaborn as sns

import pyro
import pyro.distributions as dist
from pyro.contrib.autoguide import AutoDiagonalNormal, AutoMultivariateNormal
from pyro.infer import MCMC, NUTS, HMC, SVI, Trace_ELBO
from pyro.optim import Adam, ClippedAdam
from pyro.infer import Predictive

import os

In [None]:
%load_ext autoreload
%autoreload 2
from __init__ import root_dir, data_path, src_path
from src.models.models import MCMC_LR, normal_model

In [None]:
data = pd.read_csv(os.path.join(data_path,'processed','data_processed.csv'))

In [None]:
data.columns

In [None]:
data.drop(['Unnamed: 0'], axis=1, inplace=True)
data.dropna(axis=0, inplace=True)

In [None]:
data['datetime'] = pd.to_datetime(data['datetime'])

Try linear regression with one target variable each time, the continuous variable `comp<i>_life` where i in (1,4)

In [None]:
components = ["comp"+str(i)+"_life" for i in range(1,5)]

In [None]:
components

In [None]:
X = get_data_for_component(data, components[0]).to_numpy()

# Keep the last column as target y
y = X[:,-1]
X = X[:,:-1]

Scaling or Normalizing our data by substracting the mean value and dividing with the std.

In [None]:
X_train_unsc, X_test_unsc, y_train_unsc, y_test_unsc = train_test_split(X, y, test_size=0.2, random_state=0)

In [None]:
X_mean = X_train_unsc.mean(axis=0)
X_std = X_train_unsc.std(axis=0)

y_std = y_train_unsc.std()
y_mean = y_train_unsc.mean()

In [None]:
X_train = (X_train_unsc - X_mean)/ X_std
X_test = (X_test_unsc - X_mean)/X_std

y_train = (y_train_unsc- y_mean)/ y_std 
y_test = (y_test_unsc- y_mean)/y_std

In [None]:
# # Prepare data for Pyro model
X_train_torch = torch.tensor(X_train).float()
y_train_torch = torch.tensor(y_train).float()

In [None]:
def student_model(X, obs=None):

    df = 1.0  # degrees of freedom
    loc = torch.zeros(X.shape[1]) # mean
    scale = torch.ones(X.shape[1]) # scale

    alpha = pyro.sample("alpha", dist.StudentT(df, 0.0, 1.0))                   # Prior for the bias/intercept
    
    beta = pyro.sample("beta", dist.StudentT(df, loc, scale).to_event()) # Student-t prior for the regression coefficients
    
    sigma = pyro.sample("sigma", dist.HalfCauchy(5.))                   # Prior for the variance
    
    with pyro.plate("data"):
        y = pyro.sample("y", dist.Normal(alpha + X.matmul(beta), sigma), obs=obs)
      
    return y

In [None]:
# Run inference in Pyro
nuts_kernel = NUTS(student_model)
mcmc = MCMC(nuts_kernel, num_samples=1000, warmup_steps=200, num_chains=1)
mcmc.run(X_train_torch, y_train_torch)

# Show summary of inference results
mcmc.summary()

In [None]:
plt.figure(figsize=(12,8))
# Extract samples from posterior
posterior_samples = mcmc.get_samples()

sns.kdeplot(posterior_samples["alpha"])
sns.kdeplot(posterior_samples["sigma"])

# Alternative
#sns.histplot(data=posterior_samples["alpha"], kde=True, stat='density', color=next(palette), element="step")
#sns.histplot(data=posterior_samples["sigma"], kde=True, stat='density', color=next(palette), element="step")

plt.legend(["alpha", "sigma"])
#plt.axis([-2,2,0,5])
plt.show()

In [None]:
for i in range(X.shape[1]):
    sns.kdeplot(posterior_samples["beta"][:,i])

plt.show()

In [None]:
# Extract samples from posterior
posterior_samples = mcmc.get_samples()

In [None]:
# Compute predictions
y_hat = np.mean(posterior_samples["alpha"].numpy().T + np.dot(X_test, posterior_samples["beta"].numpy().T), axis=1)
y_hat_train = np.mean(posterior_samples["alpha"].numpy().T + np.dot(X_train, posterior_samples["beta"].numpy().T), axis=1)

In [None]:
corr, mae, rae, rmse, r2, y_true, preds = compute_error(y_true, preds, None)
print("CorrCoef: %.3f\nMAE: %.3f\nRMSE: %.3f\nR2: %.3f" % (corr, mae, rmse, r2))

In [None]:
fig = plt.figure(figsize=(20,10))

start = 300
end = 600

plt.plot(y_true[np.where(preds>=0)], 'r-', label = 'test')
plt.plot(preds[np.where(preds>=0)], 'b-', label = 'pred')

plt.legend()

plt.show()

In [None]:
corr, mae, rae, rmse, r2, yy_true, ppreds = compute_error(y_true, preds, 80)
print("CorrCoef: %.3f\nMAE: %.3f\nRMSE: %.3f\nR2: %.3f" % (corr, mae, rmse, r2))

In [None]:
fig = plt.figure(figsize=(20,10))

start = 400
end = 600

plt.plot(yy_true[np.where(ppreds>=0)], 'r-', label = 'test')
plt.plot(ppreds[np.where(ppreds>=0)], 'b-', label = 'pred')

plt.legend()

plt.show()

In [None]:
mcmc_lr = MCMC_LR(data, components[0])

In [None]:
X = mcmc_lr.get_data_for_component()

In [None]:
X.head()

In [None]:
y, X, X_train_torch, y_train_torch, X_test, y_test, X_train, y_train, y_std, y_mean = mcmc_lr.preprocess(X)

In [None]:
samples = mcmc_lr.pyro_inference(X_train_torch, y_train_torch, normal_model)

In [None]:
print(samples)