# Example: Turbofan Engine Degradation Simulation Data Set

This uses the dataset explored in Dayne Batten's implementation of WTTE-RNN in Keras.

Reference:
```
A. Saxena and K. Goebel (2008). 
"Turbofan Engine Degradation Simulation Data Set", 
https://ti.arc.nasa.gov/c/13/, NASA Ames, Moffett Field, CA.
```

In [None]:
from wtte.transformer import WtteAttentionNetwork
from wtte.loss import loss_continuous_weibull_loglik, loss_discrete_weibull_loglik
from wtte.datasets import TurbofanDegradationDataset
from wtte.train import train, Historian
from wtte.predict import predict
from wtte.visualize import plot_weibull_predictions, plot_predictions_over_time

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Subset
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import logging

logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')

In [None]:
# Configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.backends.cudnn.deterministic = True  # Fix for CUDA unspecified launch failure

## Train model

In [None]:
dataset_train = TurbofanDegradationDataset(directory='D:/Users/Aaron/Documents/AI and ML Projects/Data/CMAPSS',
                                           train=True, min_seq_len=20, max_seq_len=100,
                                           unit_ids=[1,2,3,4], device=torch.device('cpu'))

logging.info(len(dataset_train))

In [None]:
dataset_test = TurbofanDegradationDataset(directory='D:/Users/Aaron/Documents/AI and ML Projects/Data/CMAPSS',
                                           train=False, min_seq_len=100, max_seq_len=100,
                                           unit_ids=[1], device=torch.device('cpu'))
dataset_test.standardize(dataset_train)

logging.info(len(dataset_test))

In [None]:
dl_train = DataLoader(dataset_train, batch_size=512, shuffle=True, num_workers=2, pin_memory=True,
                      collate_fn=dataset_train.collate_fn)

In [None]:
dl_test = DataLoader(dataset_test, batch_size=512, shuffle=False, num_workers=2, pin_memory=True,
                      collate_fn=dataset_train.collate_fn)

In [None]:
model = WtteAttentionNetwork(input_size=len(dataset_train.features), 
                             num_layers=2,
                             encoder_layer_options={'nhead': 8, 'dim_feedforward': 32, 'dropout': 0.25}) \
                                 .to(device)

optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

In [None]:
train_model = True
model_filepath = 'D:/Users/Aaron/Documents/AI and ML Projects/My Projects/Predictive Maintenance/wtte_att.pt'

if train_model:
    historian = Historian()
    train(model, dl_train, test_dataloader=dl_test, n_epochs=100, optimizer=optimizer, clip_grad=1.0, 
          loss_type='discrete', device=device, n_epochs_pretrain=10, historian=historian)
    # Plot loss by epoch
    tbl_historian = historian.to_table()
    _ = plt.plot(tbl_historian.index, tbl_historian['train_loss'], label='Training loss')
    _ = plt.plot(tbl_historian.index, tbl_historian['test_loss'], label='Test loss')
    _ = plt.legend()
    _ = plt.title('Objective loss by training epoch')
    _ = plt.show()
    # Save trained model
    torch.save(model.state_dict(), model_filepath)
else:
    # Load trained model
    model.load_state_dict(torch.load(model_filepath))

## Predict model

In [None]:
# Just one sequence
test_seq_indices = [0]
dl_test_2 = DataLoader(Subset(dataset_test, test_seq_indices), 
                       batch_size=min(len(test_seq_indices), 1024), 
                       shuffle=False, pin_memory=True,
                       collate_fn=dataset_train.collate_fn)

test_output = predict(model, dl_test_2, device=device, to_dataframe=True)

In [None]:
plt.rcParams["figure.figsize"] = (8,6)
plot_predictions_over_time(test_output)

In [None]:
plt.rcParams["figure.figsize"] = (8,15)
plot_weibull_predictions(test_output, sample_frac=0.10)