In [None]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import pyro
import torch
import pyro.distributions as dist
import torch.nn as nn

from pyro.nn import PyroModule, PyroSample
from pyro.infer import SVI, Trace_ELBO, MCMC, NUTS
from pyro.optim import ClippedAdam, Adam
from pyro.contrib.autoguide import AutoDiagonalNormal
from pyro.infer import Predictive
from typing import Dict, Tuple, List
from sklearn.model_selection import train_test_split
from pandas import DataFrame
from tqdm.autonotebook import tqdm
from pyro.nn import PyroModule, PyroSample

In [None]:
data_path = '../data/raw/raw2'

# Predictive maintenance

In [None]:
from IPython.display import Image
Image("marm.jpg")

## Data Preparation

Read the raw data

In [None]:
failures_path = os.path.join(data_path, 'PdM_failures.csv')
errors_path = os.path.join(data_path, 'PdM_errors.csv')
machines_path = os.path.join(data_path, 'PdM_machines.csv')
maint_path = os.path.join(data_path, 'PdM_maint.csv')
telemetry_path = os.path.join(data_path, 'PdM_telemetry.csv')

In [None]:
failures_df = pd.read_csv(failures_path)
errors_df = pd.read_csv(errors_path)
machines_df = pd.read_csv(machines_path)
maint_df = pd.read_csv(maint_path)
telemetry_df = pd.read_csv(telemetry_path)

Transform `datetime` column

In [None]:
maint_df['datetime'] = pd.to_datetime(maint_df['datetime'], format="%Y-%m-%d %H:%M:%S")
failures_df['datetime'] = pd.to_datetime(failures_df['datetime'], format="%Y-%m-%d %H:%M:%S")
errors_df['datetime'] = pd.to_datetime(errors_df['datetime'], format="%Y-%m-%d %H:%M:%S")
telemetry_df['datetime'] = pd.to_datetime(telemetry_df['datetime'], format="%Y-%m-%d %H:%M:%S")

#### Dataset transformation

Maintenance data

In [None]:
#create a function that takes a column and returns its mean and std of a moving window of 3 hours
def rolling_mean_std(col:str,window:int) -> float:
    
    '''
    returns the mean and the std of a moving window of 3 hours for a column
    '''
    return col.rolling(window).agg(['mean', 'std'])

#apply the function to the telemetry data
def telem_(telemetry:DataFrame , column:str, window:int) -> DataFrame:
    
    '''
    telemetry data where we apply the moving window of 3 hours
    '''
    
    telemetry[[column+'mean_'+str(window)+'h', column+'sd_'+str(window)+'h']] = telemetry.groupby('machineID')[column].apply(rolling_mean_std,window)
    
    return telemetry

def lifespan(replacement_event_df: DataFrame) -> DataFrame:
    
    '''
    Receives a dataframe with timestamp and columns that signify when a component is replaced, with 1.
    Returns a dataframe with the days since the last replacement for the component
    '''
    
    comp_rep=replacement_event_df.copy()
    points = comp_rep['machineID'].unique()
    final=pd.DataFrame()
    
    for i in tqdm(points, desc='Machine'):
        df = comp_rep[(comp_rep['machineID']==i)][['datetime','machineID','comp_comp1', 'comp_comp2', 'comp_comp3', 'comp_comp4']]
        for comp in ['comp_comp1', 'comp_comp2', 'comp_comp3', 'comp_comp4']:
            
            # keep the last part of component name
            life = comp[5:]
            
            #apply function in each row of df[life] column where if row[comp]==1 then value=0.041667 else 0
            df[life+'_maint'] = df.apply(lambda row: 0 if row[comp]==1 else 0.041667, axis=1)

            df_maint = df[life+'_maint'] != 0
            df[life+'_maint'] = df_maint.cumsum()-df_maint.cumsum().where(~df_maint).ffill().fillna(0).astype(int)
            df[life+'_maint'] = df[life+'_maint'].apply(lambda x: x*0.041667)
            
            
        final=pd.concat([final,df],axis=0)
        final=final[['datetime', 'machineID', 'comp1_maint', 'comp2_maint', 'comp3_maint', 'comp4_maint']]
        
    return final.copy()


In [None]:
telemetry_df = telem_(telemetry_df, column='volt', window=3)
telemetry_df = telem_(telemetry_df,'rotate',3)
telemetry_df = telem_(telemetry_df,'pressure',3)
telemetry_df = telem_(telemetry_df,'vibration',3)
telemetry_df = telem_(telemetry_df,'volt',24)
telemetry_df = telem_(telemetry_df,'rotate',24)
telemetry_df = telem_(telemetry_df,'pressure',24)
telemetry_df = telem_(telemetry_df,'vibration',24)
#telemetry_df=telemetry_df.drop(['volt','rotate','pressure','vibration'],axis=1)
telemetry_df=telemetry_df.dropna()

In [None]:
maint_transf_df = pd.get_dummies(maint_df, columns=['comp'])
maint_transf_df = telemetry_df.merge(maint_transf_df, on=['datetime', 'machineID'], how='left')
maint_transf_df= maint_transf_df[['datetime', 'machineID', 'comp_comp1', 'comp_comp2', 'comp_comp3', 'comp_comp4']]
maint_transf_df = maint_transf_df.fillna(0)

In [None]:
maintenance_df = lifespan(maint_transf_df)

In [None]:
maintenance_df.head()

#### Merging the rest of the datasets

In [None]:
#fget dummies for errorID
error_count = pd.get_dummies(errors_df, columns=['errorID'])
error_count.rename(columns={'errorID_error5':'error5count'}, inplace=True)
error_count.rename(columns={'errorID_error4':'error4count'}, inplace=True)
error_count.rename(columns={'errorID_error3':'error3count'}, inplace=True)
error_count.rename(columns={'errorID_error2':'error2count'}, inplace=True)
error_count.rename(columns={'errorID_error1':'error1count'}, inplace=True)

features = telemetry_df.merge(error_count, on=['datetime', 'machineID'], how='left')

# Propagate the error information per error type
features[['error1count','error2count','error3count','error4count','error5count']] = features[['error1count','error2count','error3count','error4count','error5count']].fillna(method='ffill')
# Fill the iinital error count with 0
features = features.fillna(0)

# turn "model" variable into dummy variables
machines_df['model'] = machines_df['model'].astype('category')
machines_dummy = pd.get_dummies(machines_df, drop_first=False)

# Add the machine metadata information
features = features.merge(machines_df[['machineID','model']], on=['machineID'], how='left')
features = features.merge(machines_dummy, on=['machineID'], how='left')
features = features.merge(maintenance_df, on=['datetime', 'machineID'], how='left')

Merge the failures dataset

In [None]:
fails = pd.get_dummies(failures_df,columns=['failure'])
#fails.rename(columns={'failure_comp1':'comp1'}, inplace=True)
#fails.rename(columns={'failure_comp2':'comp2'}, inplace=True)
#fails.rename(columns={'failure_comp3':'comp3'}, inplace=True)
#fails.rename(columns={'failure_comp4':'comp4'}, inplace=True)

In [None]:
def lifespan_fails(comp_rep0: DataFrame) -> DataFrame:
    
    '''
    creates the reverse counting column until the component's fail

    '''
    comp_rep=comp_rep0.copy()
    points = comp_rep['machineID'].unique()
    final=pd.DataFrame()

    for i in points:
        df = comp_rep[(comp_rep['machineID']==i)][['datetime','machineID','failure_comp1','failure_comp2','failure_comp3','failure_comp4']]
        for comp in ['failure_comp1','failure_comp2','failure_comp3','failure_comp4']:
            life=comp.split('_')[1]+'_life'
            df[life] = df.apply(lambda row: row['datetime'] if row[comp]==0 else np.nan, axis=1)
            df[df[life].isna()==False].index
            df[life].fillna(method='backfill', inplace=True)
            df[life] = pd.to_datetime(df[life]) - df['datetime']
            df[life] = df[life].apply(lambda row: row.total_seconds()/86400)
            
        final=pd.concat([final,df],axis=0)
        
    return final.copy()

In [None]:
fails_ = telemetry_df.merge(fails, on=['datetime', 'machineID'], how='left')

In [None]:
fails_ = fails_.fillna(method='bfill', limit=24) # fill backward up to 24h if all data, otherwise it must be  7

In [None]:
fails_transf = lifespan_fails(fails_)

In [None]:
fails_transf.columns

In [None]:
features.columns

In [None]:
# For the individual components that fail , the columns with 0 or 1, after the merge and backfill
fails_transf = fails_transf.fillna(0)

In [None]:
labeled_features = features.merge(fails_transf, on=['datetime', 'machineID'], how='left')
# labeled_features = labeled_features.fillna(method='bfill', limit=24) # fill backward up to 24h if all data, otherwise it must be  7

In [None]:
labeled_features.head()

In [None]:
labeled_features.to_csv('../data/processed/labeled_1h_data.csv')

## Exploratory data analysis

Machine age - machine age by model

In [None]:
def plot_boxh_groupby(df: DataFrame, feature_name: str, by: str):
    """
    Box plot with groupby
    
    df: DataFrame
    feature_name: Name of the feature to be plotted
    by: Name of the feature based on which groups are created
    """
    df.boxplot(column=feature_name, by=by, vert=False, 
                              figsize=(10, 6))
    plt.title(f'Distribution of {feature_name} by {by}')
    plt.show()
    
    
def plot_histogram(df: DataFrame, attribute:str, title_name:str, bins: int, figsize=(9,3), alpha=1, label=None):
    
    '''
    histogram plot
    '''
    
    df[attribute].plot(kind='hist', 
                              bins=bins, 
                              figsize=figsize,
                              alpha=alpha,
                              label=label,
                              title=f'{title_name.title()} distribution')
    
def plot_bar_sortvals(df: DataFrame, attribute: str, title: str, figsize=(5,5)):
    df[attribute].value_counts(
                normalize=False, dropna=False).sort_values().plot(
                kind='bar', figsize=figsize, grid=True,
                title=title)

    
def plot_scatter(df: DataFrame, x_axis_attr: str , y_axis_attr: str, figsize=(5,5), title=None, legend=None):
    df.plot.scatter(x_axis_attr, y_axis_attr, 
                    figsize=figsize, title=title, 
                    legend=legend)

In [None]:
labeled_features.boxplot(column='age', vert=False, figsize=(5,3))
plt.title('Age distribution')
plt.show()

In [None]:
labeled_features.boxplot(column='age', by='model', vert=False, figsize=(8,5))
plt.show()

```
We can see that `model 4` has the lowest median age
```

Failure per component

In [None]:
plot_bar_sortvals(failures_df, 'failure', 'Number of failures per component')
plt.ylabel('Failures')
plt.show()

Telemetry data

In [None]:
plot_histogram(labeled_features, 'volt', 'Voltage', bins=300)
plt.show()

In [None]:
plot_histogram(labeled_features, 'rotate', 'RPM', bins=300)
plt.show()

In [None]:
plot_histogram(labeled_features, 'pressure', 'Pressure', bins=300)
plt.show()

In [None]:
plot_histogram(labeled_features, 'vibration', 'Vibration', bins=300)
plt.show()

```
All the telemetry data, taking into consideration all machines, look to be normally distributed. The odd one being rotation, which looks a skewed on the left.
```

In [None]:
for model in labeled_features.groupby(['model']):
    plot_histogram(model[1], 'volt', 'Voltage',bins=300, alpha=0.3, label=model[0])

plt.legend()
plt.show()

In [None]:
for model in labeled_features.groupby(['model']):
    plot_histogram(model[1], 'rotate', 'RPM',bins=300, alpha=0.3, label=model[0])

plt.legend()
plt.show()

#### Create a temporary dataframe, which is the `labeled_features` dataframe used in training, but augmented with time features:

Errors

In [None]:
plot_bar_sortvals(errors_df, 'errorID', title='Number of error per type')

plt.show()

```
The most common error type is 1, while the least common is error 5.
```

Plot age vs errors

In [None]:
errors_per_machine = errors_df.groupby("machineID").size()
errors_per_machine = pd.DataFrame(errors_per_machine, columns=["num_errors"]).reset_index()

machines_fail_data = pd.merge(machines_df, errors_per_machine, how='left', on="machineID")

maint_per_machine = maint_df.groupby("machineID").size()
maint_per_machine = pd.DataFrame(maint_per_machine, columns=["num_maint"]).reset_index()

machines_fail_data = pd.merge(machines_fail_data, maint_per_machine, how='left', on="machineID")

failure_per_machine = failures_df.groupby("machineID").size()
failure_per_machine = pd.DataFrame(failure_per_machine, columns=["num_failure"]).reset_index()

machines_fail_data = pd.merge(machines_fail_data, failure_per_machine, how='left', on="machineID")

In [None]:
machines_fail_data.fillna(0, inplace=True)

In [None]:
plot_scatter(machines_fail_data, "age", "num_errors", 
             title="Age versus number of errors")

plot_scatter(machines_fail_data, "age", "num_failure", 
             title="Age versus number of failures")

plot_scatter(machines_fail_data, "age", "num_maint", 
             title="Age versus total number of\ncomponent maintenance")
plt.show()

In [None]:
corr = machines_fail_data.corr()
corr.style.background_gradient(cmap='coolwarm').set_precision(2)

In [None]:
augmented_features = labeled_features.copy()
augmented_features['month'] = augmented_features.datetime.dt.month
augmented_features['week_of_year'] = augmented_features.datetime.dt.isocalendar().week
augmented_features['hour'] = augmented_features.datetime.dt.hour

Failures per machine per component

In [None]:
temp_df = failures_df.groupby(["machineID", "failure"]).size().reset_index()
temp_df.columns = ["machineID", "comp", "num_fail"]
temp_df_pivot = pd.pivot(temp_df, index="machineID", columns="comp", values="num_fail").rename_axis(None, axis=1)

temp_df_pivot.plot.bar(stacked=True, figsize=(20, 6), title="Count of failures per component for different Machines")
plt.xlabel("Machine ID")
plt.ylabel("Number of components that failed")

plt.show()

In [None]:
augmented_features['month'].value_counts(
                normalize=False, dropna=False).sort_values().plot(
                kind='bar', figsize=(5,5), grid=True,
                title='Number of error per type')
    
plt.show()

### data import

In [None]:
labeled_features = pd.read_csv('../data/processed/labeled_1h_data.csv',  index_col='datetime')

In [None]:
labeled_features.drop('Unnamed: 0', axis=1, inplace=True)

In [None]:
labeled_features.shape

In [None]:
labeled_features.columns

## Regression
The following part includes the regression models (linear, poisson, heteroscedatic) and techniques of SVI and MCMC.

In [None]:
def split(data: DataFrame, test_size: float) ->  tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    
    '''
    data split in a way that all the machines' models are represented in both the train and test data.
    '''
    machines = machines_df
    
    x_train_,x_test_,y_train_,y_test_,train_idx,test_idx = train_test_split(machines, machines.model, machines.machineID.index, test_size=test_size, stratify=machines.model, random_state=42)
    
    training=data[data['machineID'].isin(train_idx)]
    training=training.drop(columns=['machineID']).to_numpy()
    test=data[data['machineID'].isin(test_idx)]
    test=test.drop(columns=['machineID']).to_numpy()
    
    x_train=training[:,:-1]
    y_train=training[:,-1]
    x_test=test[:,:-1]
    y_test=test[:,-1]

    return x_train,x_test,y_train,y_test

def get_data_for_component(data, component):
    components_cols = ['comp1_maint', 'comp2_maint', 'comp3_maint', 'comp4_maint',
       'failure_comp1', 'failure_comp2', 'failure_comp3', 'failure_comp4',
       'comp1_life', 'comp2_life', 'comp3_life', 'comp4_life']
    
    cols = ['machineID', 'voltmean_3h', 'rotatemean_3h',
                'pressuremean_3h', 'vibrationmean_3h', 'voltsd_3h', 'rotatesd_3h',
                'pressuresd_3h', 'vibrationsd_3h', 'voltmean_24h', 'rotatemean_24h',
                'pressuremean_24h', 'vibrationmean_24h', 'voltsd_24h', 'rotatesd_24h',
                'pressuresd_24h', 'vibrationsd_24h', 'error1count', 'error2count',
                'error3count', 'error4count', 'error5count','age',
                'model_model1', 'model_model2', 'model_model3', 'model_model4'] + [word for word in components_cols if str(component) in ([*word]) and 'life' not in word.split('_')]

    return data[cols]

def preprocess(X_init, test_size, model:str, classi=False, splitting=True) -> tuple[np.ndarray, np.ndarray, torch.tensor, torch.tensor, torch.tensor, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:

    '''
    preprocess including split and standarization
    '''
    if splitting:
        X_train_unsc, X_test_unsc, y_train_unsc, y_test_unsc =split(X_init,test_size=test_size)
        X_init = X_init.drop(columns=['machineID']).to_numpy()
        y = X_init[:,-1]
        X = X_init[:,:-1]
    else:
        X_init = X_init.drop(columns=['machineID']).to_numpy()
        y = X_init[:,-1]
        X = X_init[:,:-1]
        X_train_unsc, X_test_unsc, y_train_unsc, y_test_unsc = train_test_split(X, y, test_size=test_size, random_state=0)
            
        #print(X_train_unsc.shape, y_train_unsc.shape, X_test_unsc.shape, y_test_unsc.shape)
        
    X_mean = X_train_unsc.mean(axis=0)
    X_std = X_train_unsc.std(axis=0)

    y_std = y_train_unsc.std()
    y_mean = y_train_unsc.mean()

    X_train = (X_train_unsc - X_mean)/ X_std
    X_test = (X_test_unsc - X_mean)/X_std

    y_train = (y_train_unsc- y_mean)/ y_std 
    y_test = (y_test_unsc- y_mean)/y_std        

    X_train_torch = torch.tensor(X_train).float()
        
    if model == SVI_model_format.poisson['name']:
        y_train_torch = torch.tensor(y_train * y_std + y_mean).int()
    else:
        y_train_torch = torch.tensor(y_train).float()
        
    if classi:
        y_train_torch = torch.tensor(y_train_unsc).float()
        y_train = y_train_unsc
        y_test = y_test_unsc
    X_test_torch = torch.tensor(X_test).float()

    return y, X, X_train_torch, y_train_torch,X_test_torch, X_test, y_test, X_train, y_train, y_std, y_mean

    
def linear_model(X : np.ndarray, obs=None) -> torch.tensor:
    
    '''
    pyro model for linear regression prediction 
    '''
    alpha = pyro.sample("alpha", dist.Normal(0., 1.))                   # Prior for the bias/intercept
    beta  = pyro.sample("beta", dist.Normal(torch.zeros(X.shape[1]), 
                                            torch.ones(X.shape[1])).to_event())    # Priors for the regression coeffcients
    sigma = pyro.sample("sigma", dist.HalfCauchy(5.))                   # Prior for the variance
    
    with pyro.plate("data"):
        y = pyro.sample("y", dist.Normal(alpha + X.matmul(beta), sigma), obs=obs)
      
    return y


def poisson_model(X : np.ndarray, obs=None):
    
    '''
     pyro model for poisson regression prediction
    ''' 
    alpha = pyro.sample("alpha", dist.Normal(0., 1.))                   # Prior for the bias/intercept
    beta  = pyro.sample("beta", dist.Normal(torch.zeros(X.shape[1]), 
                                            torch.ones(X.shape[1])).to_event())    # Priors for the regression coeffcients
    
    with pyro.plate("data"):
        y = pyro.sample("y", dist.Poisson(torch.exp(alpha + X.matmul(beta))), obs=obs)
        
    return y


def heteroscedastic_model(X : np.ndarray, obs=None):
    
    '''
    pyro model for heteroscedastic regression prediction 
    '''
    alpha_mu = pyro.sample("alpha_mu", dist.Normal(0., 1.))                 # Prior for the bias/intercept of the mean
    beta_mu  = pyro.sample("beta_mu", dist.Normal(torch.zeros(X.shape[1]), 
                                               torch.ones(X.shape[1])).to_event())     # Priors for the regression coeffcients of the mean
    alpha_v = pyro.sample("alpha_v", dist.Normal(0., 1.))                   # Prior for the bias/intercept of the variance
    beta_v  = pyro.sample("beta_v", dist.Normal(torch.zeros(X.shape[1]), 
                                               torch.ones(X.shape[1])).to_event())     # Priors for the regression coeffcients of the variance
    
    with pyro.plate("data"):
        y = pyro.sample("y", dist.Normal(alpha_mu + X.matmul(beta_mu), torch.exp(alpha_v + X.matmul(beta_v))), obs=obs)
        
    return y

def compute_error(trues: np.array, predicted: np.array, threshold: int):
    
    '''
    error calculator
    '''
    if threshold:
        predicted_thres = predicted[np.where(trues<threshold)]
        trues_thres  = trues[np.where(trues<threshold)[0]]
    else:
        print('No threshold')
        pass
        
    corr = np.corrcoef(predicted, trues)[0,1]
    mae = np.mean(np.abs(predicted - trues))
    rae = np.sum(np.abs(predicted - trues)) / np.sum(np.abs(trues - np.mean(trues)))
    rmse = np.sqrt(np.mean((predicted - trues)**2))
    r2 = max(0, 1 - np.sum((trues-predicted)**2) / np.sum((trues - np.mean(trues))**2))
    try:
        return corr, mae, rae, rmse, r2, predicted_thres, trues_thres
    except:
         return corr, mae, rae, rmse, r2, predicted, trues
        
def results(dataset: pd.DataFrame, features_importance: np.array, results_dict: Dict, model:str, comp_number: int, y_trues:np.array, y_preds:np.array) -> Dict:
    
    '''
    produce the results of a model train and test, including errors and feautures importance. 
    A dictionary with MAE and most important feature is returned.
    '''
    dataset.drop(columns='machineID', inplace=True, axis=1)
  
    corr, mae, rae, rmse, r2, svi_trues, svi_pred = compute_error(trues=y_trues, predicted=y_preds, threshold=None)

    results_dict[model][f"comp_{comp_number}"]['MAE'] = mae
    
    print("CorrCoef: %.3f\nMAE: %.3f\nRMSE: %.3f\nR2: %.3f" % (corr, mae, rmse, r2))
    
    sort = features_importance.argsort()
    
    feautures_dict = dict(zip(dataset.columns[sort].tolist()[0], features_importance[0][sort].tolist()[0]))
    
    results_dict[model][f"comp_{comp_number}"]['FI'] = list(feautures_dict.keys())[list(feautures_dict.values()).index(max(list(feautures_dict.values())) )]
    plt.barh(dataset.columns[sort].tolist()[0], features_importance[0][sort].tolist()[0])
    plt.xlabel("Feature Importance")
    plt.show()
  
    return results_dict

### SVI
In this part we implement the SVI (Stochastic Variational Inference) Regression

In [None]:
class SVI_model_format():
    '''
    defining the type of the regression
    '''
    poisson = {'name':"POISSON", 'model':poisson_model}
    heterosc = {'name':"HETEROSCEDASTIC", 'model':heteroscedastic_model}
    linear = {'name':"LINEAR", 'model':linear_model}

class SVI_regression_model():
    '''
    SVI regression: choosing the correct data, preprocess them, pyro inference and prediction
    '''
    def __init__(self, data: pd.DataFrame, component: int) -> None:
        
        self.data = data
        self.component = component
        print(self.component)

    def get_data_for_component(self) -> pd.DataFrame:
        
        '''
        returns the feautures of the dataset and the component we want to predict for
        '''
        
        components_cols = ['comp1_maint', 'comp2_maint', 'comp3_maint', 'comp4_maint',
                           'failure_comp1', 'failure_comp2', 'failure_comp3', 'failure_comp4',
                           'comp1_life', 'comp2_life', 'comp3_life', 'comp4_life']
    
        cols = ['machineID', 'voltmean_3h', 'rotatemean_3h', 'pressuremean_3h', 'vibrationmean_3h',
                'voltsd_3h', 'rotatesd_3h', 'pressuresd_3h', 'vibrationsd_3h', 'voltmean_24h',
                'rotatemean_24h', 'pressuremean_24h', 'vibrationmean_24h', 'voltsd_24h', 'rotatesd_24h',
                'pressuresd_24h', 'vibrationsd_24h', 'error1count', 'error2count','error3count',
                'error4count', 'error5count','age', 'model_model1', 'model_model2', 
                'model_model3', 'model_model4'] + [word for word in components_cols if str(self.component) in ([*word]) and 'failure' not in word.split('_')]


        return self.data[cols]
    
    
    def Preprocess(self, X_init: pd.DataFrame, model: str):

        '''
        the necessary data preprocess before procceding in pyro inference
        '''
        print(f"{model} Regression {self.component}")
        print(X_init.columns)
        print('\n-----------------------------------------------------------------------')

        y, X, X_train_torch, y_train_torch, X_test_torch, X_test, y_test, X_train, y_train, y_std, y_mean = preprocess(X_init, 0.1, model, classi=False, splitting=True)
 
        return y, X, X_train_torch, y_train_torch, X_test_torch, X_test, y_test, X_train, y_train, y_std, y_mean
        
    def pyro_inference(self, X_train_torch: torch.tensor, y_train_torch: torch.tensor, model: object, steps: int):
        '''
        pyro inference
        '''
        
        # Reset parameter values
        pyro.clear_param_store()

        # Define guide function
        guide = AutoDiagonalNormal(model)

        # Define the number of optimization steps
        n_steps = steps

        # Setup the optimizer
        adam_params = {"lr": 0.0001} # learning rate (lr) of optimizer
        optimizer = ClippedAdam(adam_params)

        # Setup the inference algorithm
        elbo = Trace_ELBO(num_particles=1)
        svi = SVI(model, guide, optimizer, loss=elbo)

        # Do gradient steps
        for step in range(n_steps):
            elbo = svi.step(X_train_torch, y_train_torch)
            if step % 100 == 0:
                print("[%d] ELBO: %.1f" % (step, elbo))
                
        return guide
    
    def post_process(self, guide: object, model: object, X_train_torch: torch.tensor, y_train_torch: torch.tensor, X_test: np.ndarray, y_test: np.ndarray, y_std: np.float64, y_mean: np.float64):

        '''
        after the train we make the prediction and calculate the features importance
        '''
        if model['name'] != SVI_model_format.heterosc['name']:
            predictive = Predictive(model=model['model'], guide=guide, num_samples=1000, return_sites=("alpha", "beta", "sigma"))
            samples = predictive(X_train_torch, y_train_torch)

            alpha_samples = samples["alpha"].detach().numpy()
            beta_samples = samples["beta"].detach().numpy()

            y_hat = np.mean(np.exp(alpha_samples.T + np.dot(X_test, beta_samples[:,0].T)), axis=1)

        else:
            predictive = Predictive(model=model['model'], guide=guide, num_samples=1000,
                        return_sites=("alpha_mu", "beta_mu", "alpha_v", "beta_v"))
            samples = predictive(X_train_torch, y_train_torch)

            alpha_samples = samples["alpha_mu"].detach().numpy()
            beta_samples = samples["beta_mu"].detach().numpy()

            y_hat = np.mean(alpha_samples.T + np.dot(X_test, beta_samples[:,0].T), axis=1)
            
        # Calculate feature importance
        feature_importance = np.mean(np.abs(beta_samples), axis=0)
        feature_importance /= np.sum(feature_importance)

        # convert back to the original scale
        if model['name'] == SVI_model_format.poisson['name']:
            preds = y_hat # no need to do any conversion here because the Poisson model received untransformed y's
        else:
            preds = y_hat * y_std + y_mean
            
        y_true = y_test * y_std + y_mean

        return preds, y_true, feature_importance

#### Poisson Regression

In [None]:
poisson_results = {"poisson": {'comp_1':  {'MAE':0, 'FI':''}, 'comp_2':  {'MAE':0, 'FI':''}, 'comp_3':  {'MAE':0, 'FI':''}, 'comp_4':  {'MAE':0, 'FI':''}}}

for comp_number in range(1,5):
    print(f"\n--------------------------------  component {comp_number}  ---------------------------------------")
    svi_regression = SVI_regression_model(labeled_features, comp_number)
    svi_dataset = svi_regression.get_data_for_component()
    poisson = SVI_model_format.poisson 

    y_poisson, X_poisson, X_train_torch_poisson, y_train_torch_poisson, X_test_torch_poisson, X_test_poisson, y_test_poisson, X_train_poisson, y_train_poisson, y_std_poisson, y_mean_poisson = svi_regression.Preprocess(X_init=svi_dataset, model=poisson['name'])
    poisson_guide = svi_regression.pyro_inference(X_train_torch=X_train_torch_poisson, y_train_torch=y_train_torch_poisson, model=poisson['model'], steps=5000)
    poisson_preds, poisson_y_true, poisson_features_importance = svi_regression.post_process(guide=poisson_guide, model=poisson, X_train_torch=X_train_torch_poisson, y_train_torch=y_train_torch_poisson, X_test=X_test_poisson, y_test=y_test_poisson, y_std=y_std_poisson, y_mean=y_mean_poisson)
    
    poisson_results = results(svi_dataset, poisson_features_importance, poisson_results, 'poisson', comp_number, poisson_y_true, poisson_preds)

In [None]:
poisson_results

#### Heteorscedastic Regression

In [None]:
hetero_results = {'hetero': {'comp_1':  {'MAE':0, 'FI':''}, 'comp_2':  {'MAE':0, 'FI':''}, 'comp_3':  {'MAE':0, 'FI':''}, 'comp_4':  {'MAE':0, 'FI':''}}}

for comp_number in range(1,5):
    
    print(f"\n--------------------------------  component {comp_number}  ---------------------------------------")
    svi_regression = SVI_regression_model(labeled_features, comp_number)
    svi_dataset = svi_regression.get_data_for_component()
    hetero = SVI_model_format.heterosc
    
    y_hetero, X_hetero, X_train_torch_hetero, y_train_torch_hetero, X_test_torch_hetero, X_test_hetero, y_test_hetero, X_train_hetero, y_train_hetero, y_std_hetero, y_mean_hetero = svi_regression.Preprocess(X_init=svi_dataset, model=hetero['name'])
    hetero_guide = svi_regression.pyro_inference(X_train_torch=X_train_torch_hetero, y_train_torch=y_train_torch_hetero, model=hetero['model'], steps=5000)
    hetero_preds, hetero_y_true, hetero_features_importance = svi_regression.post_process(guide=hetero_guide, model=hetero, X_train_torch=X_train_torch_hetero, y_train_torch=y_train_torch_hetero, X_test=X_test_hetero, y_test=y_test_hetero, y_std=y_std_hetero, y_mean=y_mean_hetero)

    hetero_results = results(svi_dataset, hetero_features_importance, hetero_results, 'hetero', comp_number, hetero_y_true, hetero_preds)    

In [None]:
hetero_results

#### Linear Regression

In [None]:
linear_svi_results = {'linear_svi': {'comp_1':  {'MAE':0, 'FI':''}, 'comp_2':  {'MAE':0, 'FI':''}, 'comp_3':  {'MAE':0, 'FI':''}, 'comp_4':  {'MAE':0, 'FI':''}}}

for comp_number in range(1,5):
    
    print(f"\n--------------------------------  component {comp_number}  ---------------------------------------")
    svi_regression = SVI_regression_model(labeled_features, comp_number)
    svi_dataset = svi_regression.get_data_for_component()
    linear_svi = SVI_model_format.linear
 
    y_linear_svi, X_linear_svi, X_train_torch_linear_svi, y_train_torch_linear_svi,  X_test_torch_linear_svi, X_test_linear_svi, y_test_linear_svi, X_train_linear_svi, y_train_linear_svi, y_std_linear_svi, y_mean_linear_svi = svi_regression.Preprocess(X_init=svi_dataset, model=linear_svi['name'])
    linear_svi_guide = svi_regression.pyro_inference(X_train_torch=X_train_torch_linear_svi, y_train_torch=y_train_torch_linear_svi, model=linear_svi['model'], steps=5000)
    linear_svi_preds, linear_svi_y_true, linear_features_importance = svi_regression.post_process(guide=linear_svi_guide, model=linear_svi, X_train_torch=X_train_torch_linear_svi, y_train_torch=y_train_torch_linear_svi, X_test=X_test_linear_svi, y_test=y_test_linear_svi, y_std=y_std_linear_svi, y_mean=y_mean_linear_svi)
    
    linear_svi_results = results(svi_dataset, linear_features_importance, linear_svi_results, 'linear_svi', comp_number, linear_svi_y_true, linear_svi_preds)    

In [None]:
linear_svi_results

### MCMC
In this part we implement the MCMC (Markov Chain Monte Carlo) Regression

In [None]:
class MCMC_model_format():
    '''
    defining the type of the regression
    '''
    poisson = {'name':"POISSON", 'model':poisson_model}
    heterosc = {'name':"HETEROSCEDASTIC", 'model':heteroscedastic_model}
    linear = {'name':"LINEAR", 'model':linear_model}

class MCMC_regression_model():
    
    '''
    implementation of mcmc regression
    '''

    def __init__(self, data: pd.DataFrame, component: int) -> None:
        
        self.data = data
        self.component = component
        print(self.component)

    def get_data_for_component(self) -> pd.DataFrame:
        
        '''
        returns the feautures of the dataset and the component we want to predict for
        '''

    
        components_cols = ['comp1_maint', 'comp2_maint', 'comp3_maint', 'comp4_maint',
                           'failure_comp1', 'failure_comp2', 'failure_comp3', 'failure_comp4',
                           'comp1_life', 'comp2_life', 'comp3_life', 'comp4_life']
        
        cols = ['machineID', 'voltmean_3h', 'rotatemean_3h',
                'pressuremean_3h', 'vibrationmean_3h', 'voltsd_3h', 'rotatesd_3h',
                'pressuresd_3h', 'vibrationsd_3h', 'voltmean_24h', 'rotatemean_24h',
                'pressuremean_24h', 'vibrationmean_24h', 'voltsd_24h', 'rotatesd_24h',
                'pressuresd_24h', 'vibrationsd_24h', 'error1count', 'error2count',
                'error3count', 'error4count', 'error5count','age',
                'model_model1', 'model_model2', 'model_model3', 'model_model4'] + [word for word in components_cols if str(self.component) in ([*word]) and 'failure' not in word.split('_')]


        return self.data[cols]
    
    def Preprocess(self, X_init: pd.DataFrame, model: str) -> \
        Tuple[np.ndarray, np.ndarray,  torch.tensor,  torch.tensor, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.float64, np.float64]:

        '''
        the necessary data preprocess before procceding in pyro inference
        '''
        print(f"{model} Regression {self.component}")
        print(X_init.columns)
        print('\n-----------------------------------------------------------------------')

        y, X, X_train_torch, y_train_torch, X_test_torch, X_test, y_test, X_train, y_train, y_std, y_mean = preprocess(X_init,0.1,model,classi=False,splitting=True)
 
        return y, X, X_train_torch, y_train_torch, X_test_torch, X_test, y_test, X_train, y_train, y_std, y_mean
    

    def pyro_inference(self, X_train_torch, y_train_torch, model, num_samples):

        # Reset parameter values
        pyro.clear_param_store()

        # Run inference in Pyro
        nuts_kernel = NUTS(model)
        
        mcmc = MCMC(nuts_kernel, num_samples=num_samples, warmup_steps=200, num_chains=1)
        mcmc.run(X_train_torch, y_train_torch)
        
        # Show summary of inference results
        print(mcmc.summary())

        return mcmc.get_samples()

    def post_process(self, X_test, X_train, samples, y_std, y_mean, y_test):

        posterior_samples = samples

        # Compute predictions
        y_hat = np.mean(posterior_samples["alpha"].numpy().T + np.dot(X_test, posterior_samples["beta"].numpy().T), axis=1)
        y_hat_train = np.mean(posterior_samples["alpha"].numpy().T + np.dot(X_train, posterior_samples["beta"].numpy().T), axis=1)

        # Convert back to the original scale
        preds = y_hat * y_std + y_mean
        preds_train = y_hat_train * y_std + y_mean
        y_true = y_test * y_std + y_mean

        return preds, y_true

In [None]:
linear_mcmc_results = {'linear_mcmc': {'comp_1': 0, 'comp_2':0, 'comp_3': 0, 'comp_4':0}}

In [None]:
for comp_number in range(1,5):
    print(f"\n--------------------------------  component {comp_number}  ---------------------------------------")
    mcmc_regression = MCMC_regression_model(labeled_features, comp_number)
    mcmc_dataset = mcmc_regression.get_data_for_component()
    linear_mcmc = MCMC_model_format.linear
 

    y_linear_mcmc, X_linear_mcmc, X_train_torch_linear_mcmc, y_train_torch_linear_mcmc,  X_test_torch_linear_mcmc, X_test_linear_mcmc, y_test_linear_mcmc, X_train_linear_mcmc, y_train_linear_mcmc, y_std_linear_mcmc, y_mean_linear_mcmc = mcmc_regression.Preprocess(X_init=mcmc_dataset, model=linear_mcmc['name'])
  
    linear_mcmc_samples = mcmc_regression.pyro_inference(X_train_torch=X_train_torch_linear_mcmc, y_train_torch=y_train_torch_linear_mcmc, model=linear_mcmc['model'], num_samples=100)
    linear_mcmc_preds, linear_mcmc_y_true = mcmc_regression.post_process(samples=linear_mcmc_samples, X_train = X_train_linear_mcmc, X_test = X_test_linear_mcmc, y_test=y_test_linear_mcmc, y_std=y_std_linear_mcmc, y_mean=y_mean_linear_mcmc)
    
    corr_linear_mcmc, mae_linear_mcmc, rae_linear_mcmc, rmse_linear_mcmc, r2_linear_mcmc, mcmc_trues_linear_mcmc, mcmc_pred_linear_mcmc = compute_error(trues=linear_mcmc_y_true, predicted=linear_mcmc_preds, threshold=None)
    linear_mcmc_results['linear_mcmc'][f"comp_{comp_number}"] = [mae_linear_mcmc]
    print("CorrCoef: %.3f\nMAE: %.3f\nRMSE: %.3f\nR2: %.3f" % (corr_linear_mcmc, mae_linear_mcmc, rmse_linear_mcmc, r2_linear_mcmc))
    print('\n')

### NEURAL NETWORKS

In [None]:
class ModelFormat():
    '''
    defining the type of the regression
    '''
    neural_netwrok = {'name':'NN'}
    poisson = {'name':"POISSON", 'model':poisson_model}
    heterosc = {'name':"HETEROSCEDASTIC", 'model':heteroscedastic_model}
    linear = {'name':"LINEAR", 'model':linear_model}
    classification_model = {'name':"CLASSIFICATION"}

class FFNN(PyroModule):
    
    '''
    FNN implementation
    '''
    
    def __init__(self, n_in, n_hidden, n_out, type_forward, feature):
        
        self.type_forward = type_forward
        self.feature = feature
        super(FFNN, self).__init__()
        
        # Architecture
        self.in_layer = PyroModule[nn.Linear](n_in, n_hidden)
        self.in_layer.weight = PyroSample(dist.Normal(0., 1.).expand([n_hidden, n_in]).to_event(2))

        self.h_layer = PyroModule[nn.Linear](n_hidden, n_hidden)
        self.h_layer.weight = PyroSample(dist.Normal(0., 1.).expand([n_hidden, n_hidden]).to_event(2))

        self.h_layer = PyroModule[nn.Linear](n_hidden, n_hidden)
        self.h_layer.weight = PyroSample(dist.Normal(0., 1.).expand([n_hidden, n_hidden]).to_event(2))

        self.out_layer = PyroModule[nn.Linear](n_hidden, n_out)
        self.out_layer.weight = PyroSample(dist.Normal(0., 1.).expand([n_out, n_hidden]).to_event(2))

        # Activation functions
        self.tanh = nn.Tanh()
        
    def forward(self, X, y=None):
        
        if self.type_forward == 'simple':
            X = self.tanh(self.in_layer(X))
            X = self.tanh(self.h_layer(X))
            X = self.out_layer(X)
            prediction_mean = X.squeeze(-1)
            with pyro.plate("observations"):
                y = pyro.sample("obs", dist.Normal(prediction_mean, 0.1), obs=y)
        
        elif self.type_forward == 'interpretable':
            X_nn = X[:,1:]
            X_nn = self.tanh(self.in_layer(X_nn))
            X_nn = self.tanh(self.h_layer(X_nn))
            X_nn = self.out_layer(X_nn)
            nn_out = X_nn.squeeze(-1)

            beta_lin = pyro.sample("beta", dist.Normal(0, 1))
            X_linear = X[:,0]
            with pyro.plate("observations"):
                linear_out = X_linear*beta_lin
                y = pyro.sample("obs", dist.Normal(nn_out+linear_out, 0.1), obs=y)

        return y

def train_nn(model0, X_train_torch, y_train_torch, feature):
    
    if model0 =="simple":
        model = FFNN(n_in=X_train_torch.shape[1],feature=feature, n_hidden=32, n_out=1, type_forward=model0)
        
    elif model0 == "interpretable":
        model = FFNN(n_in=X_train_torch.shape[1]-1,feature=feature, n_hidden=32, n_out=1, type_forward=model0)
        
        
    guide = AutoDiagonalNormal(model)
    pyro.clear_param_store()
    # Define the number of optimization steps
    n_steps = 1000

    # Setup the optimizer
    adam_params = {"lr": 0.01}
    optimizer = Adam(adam_params)

    # Setup the inference algorithm
    elbo = Trace_ELBO(num_particles=1)
    svi = SVI(model, guide, optimizer, loss=elbo)

    # Do gradient steps
    for step in range(n_steps):
        elbo = svi.step(X_train_torch,y_train_torch)
        if step % 100 == 0:
            print("[%d] ELBO: %.1f" % (step, elbo))
    
    return model, guide

def get_data_for_component(data, component) -> pd.DataFrame:
        
    '''
    returns the feautures of the dataset and the component we want to predict for
    '''
    
    components_cols = ['comp1_maint', 'comp2_maint', 'comp3_maint', 'comp4_maint',
                       'failure_comp1', 'failure_comp2', 'failure_comp3', 'failure_comp4',
                       'comp1_life', 'comp2_life', 'comp3_life', 'comp4_life']
        
    cols = ['machineID', 'voltmean_3h', 'rotatemean_3h', 'pressuremean_3h', 'vibrationmean_3h',
            'voltsd_3h', 'rotatesd_3h', 'pressuresd_3h', 'vibrationsd_3h', 'voltmean_24h',
            'rotatemean_24h', 'pressuremean_24h', 'vibrationmean_24h', 'voltsd_24h', 'rotatesd_24h',
            'pressuresd_24h', 'vibrationsd_24h', 'error1count', 'error2count', 'error3count',
            'error4count', 'error5count','age', 'model_model1', 'model_model2',
            'model_model3', 'model_model4'] + [word for word in components_cols if str(component) in ([*word]) and 'failure' not in word.split('_')]


    return data[cols]

def test_nn(model,guide,X_test_torch):
    
    # Predict
    predictive = pyro.infer.Predictive(model, guide=guide, num_samples=1000,return_sites=("obs", "_RETURN"))
    samples = predictive(X_test_torch)
    y_pred = samples["obs"].mean(axis=0).detach().numpy()
    
    y_preds = y_pred * y_std + y_mean
    y_true = y_test * y_std + y_mean
    
    return y_preds, y_true

def test_nn_beta(model,guide,X_test_torch):
    # Predict
    predictive = pyro.infer.Predictive(model, guide=guide, num_samples=1000,return_sites=("beta",))
    samples = predictive(X_test_torch)
    print("Estimated beta:", samples["beta"].mean(axis=0).detach().numpy())
    
    y_pred = samples["beta"].mean(axis=0).detach().numpy()
    
    y_preds = y_pred * y_std + y_mean
    y_true = y_test * y_std + y_mean
    
    return y_preds, y_true


def mae_test(y_pred, y_test):

    mae = np.mean(np.abs(y_pred - y_test))
    print("MAE:", mae)
    
    return mae

def plot_pred(y_pred,y_test,y_std,y_mean,threshold,start=None,end=None):
    
    fig = plt.figure(figsize=(20, 10))

    start = start
    end = end
    y_true = y_test * y_std + y_mean
    y_pre = y_pred * y_std + y_mean

    plt.plot(y_true[y_pre>threshold], 'r.-', label='test')
    plt.plot(y_pre[y_pre>threshold], 'b-', label='pred')
    plt.legend()
    plt.show()
    
    return y_true,y_pre

#### Features importance 

In [None]:
feature_for_comp = {'comp_1':  {'Feature':0}, 'comp_2':  {'Feature':0}, 'comp_3':  {'Feature':0}, 'comp_4':  {'Feature':0}}

for i in range(1,3):
    feature = poisson_results['poisson'][f"comp_{i}"]["FI"]
    feature_for_comp[f"comp_{i}"]["Feature"] = svi_dataset.columns.get_loc(feature)

#### FNN Simple

In [None]:
for i in range(1,5):
    print(f"\n--------------------------------  component {i}  ---------------------------------------")
    nn_dataset = get_data_for_component(labeled_features, i)
    y, X, X_train_torch, y_train_torch,X_test_torch, X_test, y_test, X_train, y_train, y_std, y_mean = preprocess(nn_dataset, 0.1, 'neural_network')
    
    model_FFNN, guide = train_nn("simple", X_train_torch, y_train_torch, -1)
    y_preds, y_true = test_nn(model_FFNN, guide, X_test_torch)
    mae = mae_test(y_preds, y_true)
    print('\n')

#### FNN Interpretable

In [None]:
for i in range(1,5):
    print(f"\n--------------------------------  component {i}  ---------------------------------------")
    nn_dataset = get_data_for_component(labeled_features, i)
    y, X, X_train_torch, y_train_torch,X_test_torch, X_test, y_test, X_train, y_train, y_std, y_mean = preprocess(nn_dataset, 0.1, 'neural_network')
    
    model_FFNN, guide = train_nn("interpretable", X_train_torch, y_train_torch, feature_for_comp[f"comp_{i}"]['Feature']-1)
    y_preds, y_true = test_nn_beta(model_FFNN, guide, X_test_torch)
    mae = mae_test(y_preds, y_true)
    print('\n')

### Data Threshold

```
Considering that trying to predict the fail of a component among others on 150 or 300 days period is very difficult and not realistic, at the end it doesn't make sense. therefore, we introduce a more realistic and clear threshold of 30 days, where we train the best of our models per component in a more limited dataset regarding the days until the fail of the component.
```

In [None]:
plt.boxplot(labeled_features[["comp1_life","comp2_life","comp3_life","comp4_life"]])
plt.show()

In [None]:
feature_for_comp = {'comp_1':  {'Feature':0}, 'comp_2':  {'Feature':0}, 'comp_3':  {'Feature':0}, 'comp_4':  {'Feature':0}}
for i in range(1,5):
    feature = poisson_results['poisson'][f"comp_{i}"]['FI']
    feature_for_comp[f"comp_{i}"]['Feature'] = svi_dataset.columns.get_loc(feature)

In [None]:
feature_for_comp
# feature_for_comp = {'comp_1': {'Feature': 24},'comp_2': {'Feature': 9},'comp_3': {'Feature': 25},'comp_4': {'Feature': 26}}

For components 1 and 4 the simple FNN model has been qualified as the best model to be trained. 

In [None]:

threshold = 30
print(labeled_features.shape)
for i in [1,4]:
    print(f"\n--------------------------------  component {i}  ---------------------------------------")
    data = labeled_features[labeled_features[f"comp{i}_life"]<=threshold]
    nn_dataset = get_data_for_component(data, i)
    print(nn_dataset.shape)
    print(nn_dataset[f"comp{i}_life"].max())
    y, X, X_train_torch, y_train_torch,X_test_torch, X_test, y_test, X_train, y_train, y_std, y_mean = preprocess(nn_dataset, 0.1, 'neural_network')
    
    model_FFNN, guide = train_nn("simple", X_train_torch, y_train_torch, -1)
    y_preds, y_true = test_nn(model_FFNN, guide, X_test_torch)
    mae = mae_test(y_preds, y_true)
    print('\n')

For components 2 and 3 the interpretable FNN model has been qualified as the best model to be trained. 

In [None]:
feature_for_comp = {'comp_1': {'Feature': 24},'comp_2': {'Feature': 9},'comp_3': {'Feature': 25},'comp_4': {'Feature': 26}}
threshold = 30
print(labeled_features.shape)
for i in [2,3]:
    print(f"\n--------------------------------  component {i}  ---------------------------------------")
    data = labeled_features[labeled_features[f"comp{i}_life"]<=threshold]
    nn_dataset = get_data_for_component(data, i)
    print(nn_dataset.shape)
    print(nn_dataset[f"comp{i}_life"].max())
    y, X, X_train_torch, y_train_torch,X_test_torch, X_test, y_test, X_train, y_train, y_std, y_mean = preprocess(nn_dataset, 0.1, 'neural_network')
    
    model_FFNN, guide = train_nn("interpretable", X_train_torch, y_train_torch, feature_for_comp[f"comp_{i}"]['Feature']-1)
    y_preds, y_true = test_nn_beta(model_FFNN, guide, X_test_torch)
    mae = mae_test(y_preds, y_true)
    print('\n')

## Binary classification
The following part includes the classification models (categorical, binary logistic regression, probit regression and NN) and techniques of SVI and MCMC. Due to computational limitations we proceeded mainly with the SVI method.

In [None]:
def model_cat(X, obs=None):
    n_cat=2
    input_dim = X.shape[1]
    alpha = pyro.sample("alpha", dist.Normal(torch.zeros(1, n_cat), 
                                             torch.ones(1, n_cat)).to_event())  # Prior for the bias/intercept
    beta  = pyro.sample("beta", dist.Normal(torch.zeros(input_dim, n_cat), 
                                            torch.ones(input_dim, n_cat)).to_event()) # Priors for the regression coeffcients
    with pyro.plate("data"):
        y = pyro.sample("y", dist.Categorical(logits=alpha + X.matmul(beta)), obs=obs)
    return y


def model_bin_lr(X, obs=None):
    alpha = pyro.sample("alpha", dist.Normal(0, 1)) # Prior for the bias/intercept
    beta  = pyro.sample("beta", dist.Normal(torch.zeros(X.shape[1]), 
                                            torch.ones(X.shape[1])).to_event()) # Priors for the regression coeffcients
    with pyro.plate("data"):
        logits = alpha + X.matmul(beta)
        y = pyro.sample("y", dist.Bernoulli(logits=logits), obs=obs)
    return y



def model_cdf(X, obs=None):
    std_normal = torch.distributions.Normal(0,1)
    alpha = pyro.sample("alpha", dist.Normal(0, 1)) # Prior for the bias/intercept
    beta  = pyro.sample("beta", dist.Normal(torch.zeros(X.shape[1]), 
                                            torch.ones(X.shape[1])).to_event()) # Priors for the regression coeffcients
    with pyro.plate("data"):
        probs = std_normal.cdf(alpha + X.matmul(beta))
        y = pyro.sample("y", dist.Bernoulli(probs=probs), obs=obs)
    return y

   
class FFNN_c(PyroModule):
    def __init__(self, n_in, n_hidden, n_out):
        super(FFNN_c, self).__init__()
        
        # Architecture
        self.in_layer = PyroModule[nn.Linear](n_in, n_hidden*2)
        self.in_layer.weight = PyroSample(dist.Normal(0., 1.).expand([n_hidden*2, n_in]).to_event(2))

        self.h_layer = PyroModule[nn.Linear](n_hidden*2, n_hidden)
        self.h_layer.weight = PyroSample(dist.Normal(0., 1.).expand([n_hidden, n_hidden*2]).to_event(2))

        self.h_layer1 = PyroModule[nn.Linear](n_hidden, n_hidden)
        self.h_layer1.weight = PyroSample(dist.Normal(0., 1.).expand([n_hidden, n_hidden]).to_event(2))

        self.out_layer = PyroModule[nn.Linear](n_hidden, n_out)
        self.out_layer.weight = PyroSample(dist.Normal(0., 1.).expand([n_out, n_hidden]).to_event(2))

        # Activation functions
        self.relu = nn.ReLU()
        
    def forward(self, X, y=None):
        X = self.relu(self.in_layer(X))
        X = self.relu(self.h_layer(X))
        X = self.relu(self.h_layer1(X))
        X = self.out_layer(X)
        prediction_mean = torch.sigmoid(X).squeeze(-1)
        #not enough memory for this
        """
        with pyro.plate("data", X.shape[0]):
            y = pyro.sample("obs", dist.Categorical(logits=prediction_mean), obs=y)    
        
        with pyro.plate("observations"):
            y = pyro.sample("obs", dist.Normal(prediction_mean, 0.1), obs=y)  
        """
        with pyro.plate("observations"):
            y = pyro.sample("obs", dist.Bernoulli(probs=prediction_mean), obs=y)
        return y
        

In [None]:
def get_data_for_component_class(data, component):
    components_cols = ['comp1_maint', 'comp2_maint', 'comp3_maint', 'comp4_maint',
       'failure_comp1', 'failure_comp2', 'failure_comp3', 'failure_comp4',
       'comp1_life', 'comp2_life', 'comp3_life', 'comp4_life']
    
    cols = ['machineID', 'voltmean_3h', 'rotatemean_3h',
                'pressuremean_3h', 'vibrationmean_3h', 'voltsd_3h', 'rotatesd_3h',
                'pressuresd_3h', 'vibrationsd_3h', 'voltmean_24h', 'rotatemean_24h',
                'pressuremean_24h', 'vibrationmean_24h', 'voltsd_24h', 'rotatesd_24h',
                'pressuresd_24h', 'vibrationsd_24h', 'error1count', 'error2count',
                'error3count', 'error4count', 'error5count','age',
                'model_model1', 'model_model2', 'model_model3', 'model_model4'] + [word for word in components_cols if str(component) in ([*word]) and 'life' not in word.split('_')]

    return data[cols]

Setting the training of the SVI and MCMC methods stands for inference to estimate the parameters and posterior distributions of the aforementioned models.

In [None]:
# Define guide function
from pyro.contrib.autoguide import AutoMultivariateNormal
from src.models.models import FFNN_c

def train_nn(model0, X_train_torch, y_train_torch):
    if model0 == "modelFFNN_c":
        model = FFNN_c(n_in=X_train_torch.shape[1], n_hidden=32, n_out=1)
    guide = AutoDiagonalNormal(model)
    pyro.clear_param_store()
    # Define the number of optimization steps
    n_steps = 2000

    # Setup the optimizer
    adam_params = {"lr": 0.01}
    optimizer = Adam(adam_params)

    # Setup the inference algorithm
    elbo = Trace_ELBO(num_particles=1)
    svi = SVI(model, guide, optimizer, loss=elbo)

    # Do gradient steps
    for step in range(n_steps):
        elbo = svi.step(X_train_torch,y_train_torch)
        if step % 500 == 0:
            print("[%d] ELBO: %.1f" % (step, elbo))
    return model,guide



#model using SVI
def train_c_svi(model, X_train, y_train, steps, lrate):
    # Define guide function
    guide = AutoMultivariateNormal(model)
    
    # Reset parameter values
    pyro.clear_param_store()

    # Define the number of optimization steps
    n_steps = steps

    # Set parameters of the optimizer
    adam_params = {"lr": lrate}
    optimizer = ClippedAdam(adam_params)

    # Setup the inference algorithm
    elbo = Trace_ELBO(num_particles=1)
    svi = SVI(model, guide, optimizer, loss=elbo)

    # Do gradient steps
    for step in range(n_steps):
        elbo = svi.step(X_train, y_train)
        if step % 1000 == 0:
            print("[%d] ELBO: %.1f" % (step, elbo))
    #use the Predictive class to extract samples from posterior:
    predictive = Predictive(model, guide=guide, num_samples=2000,return_sites=("alpha", "beta"))
    samples = predictive(X_train, y_train)
    #extract the inferred posteriors to make predictions for the testset
    alpha_hat = samples["alpha"].detach().squeeze().mean(axis=0).numpy()
    beta_hat = samples["beta"].detach().squeeze().mean(axis=0).numpy()
    return model,guide, alpha_hat, beta_hat


#model using MCMC
def train_c_mcmc(model, X_train, y_train, num_samples,w_steps,chains):
    # Initialize NUTS kernel for the MCMC sampler
    nuts_kernel = NUTS(model)
    # create MCMC inference object with the needed paramters
    mcmc = MCMC(nuts_kernel, num_samples=num_samples, warmup_steps=w_steps, num_chains=chains)
    # run the inference
    mcmc.run(X_train, y_train)
    # extract the inferred posteriors to make predictions for the testset
    samples = mcmc.get_samples()
    alpha_hat = samples["alpha"].detach().squeeze().mean(axis=0).numpy()
    beta_hat = samples["beta"].detach().squeeze().mean(axis=0).numpy()
    return mcmc, alpha_hat, beta_hat




Setting the test and the evaluation of the SVI and MCMC methods stands for inference of the estimated the parameters and posterior distributions of the aforementioned models.

In [None]:
from sklearn.metrics import confusion_matrix, recall_score, accuracy_score, precision_score

def test_nn_c(model,guide,X_test_torch,thres):
    # make predictions with threshold for test set using the trained model
    predictive = pyro.infer.Predictive(model, guide=guide, num_samples=2000,return_sites=("obs", "_RETURN"))
    samples = predictive(X_test_torch)
    y_pred = samples["obs"].mean(axis=0).detach().numpy()
    #threshold predictions
    y_pred[y_pred<=thres]=0
    y_pred[y_pred>thres]=1
    return y_pred

def test_model_c(alpha_hat,beta_hat,X_test_torch,thres=None):
    # make predictions for test set either using threshold or argmax
    y_hat = alpha_hat + np.dot(X_test_torch, beta_hat)
    if thres:
        thres=0.01
        y_hat[y_hat<=thres]=0
        y_hat[y_hat>thres]=1
    else: 
        y_hat = np.argmax(y_hat, axis=1)
    return y_hat

# function to evaluate the accuracy of the model
def test_c(y_hat,y_test):
    # evaluate prediction accuracy
    print("Accuracy:", 1.0*np.sum(y_hat == y_test) / len(y_test))

# function to evaluate predictions
def evaluate(y_test, y_hat):
    # calculate and display confusion matrix
    labels = np.unique(y_test)
    cm = confusion_matrix(y_test, y_hat, labels=labels)
    print('Confusion matrix\n- x-axis is true labels (no failure, failure)\n- y-axis is predicted labels')
    print(cm)
    # calculate precision, recall, and F1 score
    accuracy = float(np.trace(cm)) / np.sum(cm)
    precision = precision_score(y_test, y_hat, average=None, labels=labels)[1]
    recall = recall_score(y_test, y_hat, average=None, labels=labels)[1]
    f1 = 2 * precision * recall / (precision + recall)
    print("accuracy:", accuracy)
    print("precision:", precision)
    print("recall:", recall)
    print("f1 score:", f1)


Categorical Classification

In [None]:
for i in range(1,5):
    print(f"\n--------------------------------  component {i}  ---------------------------------------")
    cat_dataset = get_data_for_component_class(labeled_features, i)
    y, X, X_train_torch, y_train_torch,X_test_torch, X_test, y_test, X_train, y_train, y_std, y_mean = preprocess(cat_dataset, 0.1, 'classification_model',classi=True,splitting=True)
    model__cat,guide_cat, alpha_hat_cat, beta_hat_cat=train_c_svi(model_cat, X_train_torch, y_train_torch,steps=10000,lrate=0.001)
    y_hat=test_model_c(alpha_hat_cat, beta_hat_cat,X_test_torch)
    evaluate(y_hat,y_test)
    print('\n')

Binary Logistic Regression

In [None]:
for i in range(1,5):
    print(f"\n--------------------------------  component {i}  ---------------------------------------")
    cat_dataset = get_data_for_component_class(labeled_features, i)
    y, X, X_train_torch, y_train_torch,X_test_torch, X_test, y_test, X_train, y_train, y_std, y_mean = preprocess(cat_dataset, 0.1, 'classification_model',classi=True,splitting=True)
    model__bin,guide_bin, alpha_hat_bin, beta_hat_bin=train_c_svi(model_bin_lr, X_train_torch, y_train_torch,steps=30000,lrate=0.001)
    y_hat=test_model_c(alpha_hat_bin, beta_hat_bin,X_test_torch,thres=0.5)
    evaluate(y_hat,y_test)
    print('\n')

Probit Regression

In [None]:
for i in range(1,5):
    print(f"\n--------------------------------  component {i}  ---------------------------------------")
    cat_dataset = get_data_for_component_class(labeled_features, i)
    y, X, X_train_torch, y_train_torch,X_test_torch, X_test, y_test, X_train, y_train, y_std, y_mean = preprocess(cat_dataset, 0.1, 'classification_model',classi=True,splitting=True)
    model__cdf,guide_cdf, alpha_hat_cdf, beta_hat_cdf=train_c_svi(model_cdf, X_train_torch, y_train_torch.squeeze(),steps=20000,lrate=0.0005)
    y_hat=test_model_c(alpha_hat_cdf, beta_hat_cdf,X_test_torch,thres=0.5)
    evaluate(y_hat,y_test)
    print('\n')

Neural Network Classification

In [None]:
for i in range(1,5):
    print(f"\n--------------------------------  component {i}  ---------------------------------------")
    cat_dataset = get_data_for_component_class(labeled_features, i)
    y, X, X_train_torch, y_train_torch,X_test_torch, X_test, y_test, X_train, y_train, y_std, y_mean = preprocess(cat_dataset, 0.1, 'classification_model',classi=True,splitting=True)
    modelFFNN_c,guide=train_nn("modelFFNN_c", X_train_torch, y_train_torch)
    y_hat=test_nn_c(modelFFNN_c,guide,X_test_torch,thres=0.5)
    evaluate(y_hat,y_test)
    print('\n')
