In [None]:
"""
File Name: prediction.ipynb

Description: Prediction from existing models

Author: junghwan lee
Email: jhrrlee@gmail.com
Date Created: 2023.09.12
Todo:
1. Separation concerns between reshaping dataset and evaluation
2. A function to save prediction results.
3. Refactor to enhance flexibility
"""

In [None]:
import sys
import numpy as np
import matplotlib.pyplot as plt
import tensorflow.keras as keras
import os
from tensorflow.keras.models import load_model
from sklearn.metrics import mean_absolute_error, mean_squared_error
import pandas as pd
from tensorflow.keras import backend as K
import time
import math

In [None]:
from google.colab import drive
drive.mount("/content/drive", force_remount=True)
sys.path.append('/content/drive/MyDrive/Colab_Notebooks')
sys.path.append('/content/drive/MyDrive/Colab_Notebooks/toyota_data')
sys.path.append('/content/drive/MyDrive/Colab_Notebooks/toyota_data/from pc/')
sys.path.append('/content/drive/MyDrive/Colab_Notebooks/toyota_data/from pc/models/')


Mounted at /content/drive


In [None]:
trainedmodel_path = '/content/drive/MyDrive/Colab_Notebooks/toyota_data/from pc/models/'
data_path = '/content/drive/MyDrive/Colab_Notebooks/toyota_data/data/'

In [None]:
trained_model_cnnlstm = 'cnn_lstm_new2_sum_40_40_86.h5'
trained_model_bilstm_attn = 'v2_1_2_bilstm_attn_new2_sum_40_40_39.h5'
trained_model_lstm = 'v2_1_2_lstm_new2_sum_40_40_94.h5'
trained_model_resnet = 'v2_1_2_resnet_new2_sum_40_40_101.h5'
trained_model_1dcnn = 'v2_2_1dcnn_new2_sum_40_40290.h5'
trained_model_tcn = 'v3_1_2_tcn_new2_sum_40_40_76.h5'
trained_model_transformer = 'v4_1_transformer_new2_sum_40_40_133.h5'

In [None]:
def get_cyclesum_data_x(start_cycle, num_cycles, X):
  # Select only the data for the specified range of cycles
  X_selected = X[:, :, start_cycle:start_cycle+num_cycles]
  # Print shapes of the returned arrays
  print(f"Shape of X: {X_selected.shape}")

  return X_selected

In [None]:
def get_RUL(eol, num_cycles):
  return eol - num_cycles

In [None]:
import tensorflow as tf
def root_mean_squared_error(y_true, y_pred):
    return tf.sqrt(tf.reduce_mean(tf.square(y_pred - y_true)))

In [None]:
def mean_absolute_percentage_error(y_true, y_pred):
    epsilon = 1e-10  # some small constant
    return tf.reduce_mean(tf.abs((y_true - y_pred) / (y_true + epsilon))) * 100

In [None]:
# Load the original data
"""
eol = np.load(data_path + 'valid_EOL_data.npy')
cycle_data_dis = np.load(data_path + 'valid_dis_data.npy')
cycle_data_ch = np.load(data_path + 'valid_ch_data.npy')
"""
# Load the original data
eol = np.load(data_path + 'eol.npy')
cycle_data_dis = np.load(data_path + 'cycle_data_dis.npy')
indices_train = np.load(data_path + 'train_ind.npy')
indices_val = np.load(data_path + 'test_ind.npy')
indices_test = np.load(data_path + 'secondary_test_ind.npy')
new_cycle_sum = np.load(data_path + 'new_cycle_sum.npy')

num_cells = eol.shape[0]
num_cycles = cycle_data_dis.shape[1]
cycle_count = np.arange(1, num_cycles+1)
RUL = eol - num_cycles

print("RUL shape:", RUL.shape)
print("cycle_data_dis shape:", cycle_data_dis.shape)
print("new_cycle_sum shape:", new_cycle_sum.shape)

RUL shape: (124, 1)
cycle_data_dis shape: (114, 100, 4, 500)
new_cycle_sum shape: (124, 14, 100)


In [None]:
indices_train = [0, 3, 4, 6, 9, 12, 15, 16, 19, 22, 28, 29, 32, 37, 39, 41, 44, 46, 48, 52, 55, 58, 61, 63, 66, 69, 73, 76, 79, 82, 85, 87, 89, 99, 102, 106, 109, 114, 117, 120, 123]
indices_val = [1, 5, 7, 8, 11, 13, 17, 20, 23, 25, 30, 33, 35, 38, 40, 45, 49, 50, 53, 56, 59, 64, 67, 70, 71, 74, 77, 80, 86, 90, 92, 94, 95, 98, 100, 104, 107, 110, 111, 113, 115, 118, 121]
indices_test = [2, 10, 14, 18, 21, 24, 26, 27, 31, 34, 36, 42, 43, 47, 51, 54, 57, 60, 62, 65, 68, 72, 75, 78, 81, 83, 84, 88, 91, 93, 96, 97, 101, 103, 105, 108, 112, 116, 119, 122]

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

def rul_sumdata_preparation_with_fixed_indice(cycle_data, rul, indices_train, indices_val, indices_test):

  X = cycle_data.transpose(0,2,1)
  #X = cycle_data
  y = rul

  num_samples, num_cycles, num_features = X.shape
  # The length of each sequence is cycles * time_series
  sequence_length = num_cycles
  X_reshaped = X.reshape(num_samples, sequence_length, num_features)
  y_reshaped = y.reshape(-1, 1)

  print("X X_reshaped:", X.shape, X_reshaped.shape)
  print("y y_reshaped:", y.shape, y_reshaped.shape)
  print("num_features", num_features)
  # normalization
  y_scaler = MinMaxScaler()
  #scalers = [5]
  X_reshaped_norm = np.zeros_like(X_reshaped)
  for i in range(num_features):
    scalers= MinMaxScaler()
    X_reshaped_norm[:, :, i] = scalers.fit_transform(X_reshaped[:, :, i].reshape(-1, 1)).reshape(num_samples, sequence_length)

  y_scaler = MinMaxScaler()
  print("X_reshaped_norm", X_reshaped_norm.shape)
  #y_reshaped_norm = y_scaler.fit_transform(y_reshaped)
  y_reshaped_norm = y_reshaped


  # Split the data based on train, validatioin, test indices
  print("Indices of training set:", indices_train)
  print("Indices of validation set:", indices_val)
  print("Indices of test set:", indices_test)
  X_train, X_val, X_test = X_reshaped[indices_train], X_reshaped[indices_val], X_reshaped[indices_test]
  X_norm_train, X_norm_val, X_norm_test = X_reshaped_norm[indices_train], X_reshaped_norm[indices_val], X_reshaped_norm[indices_test]
  y_train, y_val, y_test  = y_reshaped[indices_train], y_reshaped[indices_val], y_reshaped[indices_test]
  y_norm_train, y_norm_val, y_norm_test = y_reshaped_norm[indices_train], y_reshaped_norm[indices_val], y_reshaped_norm[indices_test]
  print("X_train X_val X_test:", X_train.shape, X_val.shape, X_test.shape)
  print("X_norm_train X_norm_val X_norm_test:", X_norm_train.shape, X_norm_val.shape, X_norm_test.shape)
  print("y_train y_val y_test:", y_train.shape, y_val.shape, y_test.shape)
  print("y_norm_train y_norm_val y_norm_test:", y_norm_train.shape, y_norm_val.shape, y_norm_test.shape)

  return X_train, X_val, X_test, X_norm_train, X_norm_val, X_norm_test, y_train, y_val, y_test, y_norm_train, y_norm_val, y_norm_test

In [None]:
cycle_data_selected = get_cyclesum_data_x(40, 40, new_cycle_sum)
RUL_selected = get_RUL(eol, 80)

Shape of X: (124, 14, 40)


In [None]:
X_train, X_val, X_test, X_train_norm, X_val_norm, X_test_norm, y_train, y_val, y_test, y_train_norm, y_val_norm, y_test_norm \
= rul_sumdata_preparation_with_fixed_indice(cycle_data_selected, RUL_selected, indices_train, indices_val, indices_test)

X X_reshaped: (124, 40, 14) (124, 40, 14)
y y_reshaped: (124, 1) (124, 1)
num_features 14
X_reshaped_norm (124, 40, 14)
Indices of training set: [0, 3, 4, 6, 9, 12, 15, 16, 19, 22, 28, 29, 32, 37, 39, 41, 44, 46, 48, 52, 55, 58, 61, 63, 66, 69, 73, 76, 79, 82, 85, 87, 89, 99, 102, 106, 109, 114, 117, 120, 123]
Indices of validation set: [1, 5, 7, 8, 11, 13, 17, 20, 23, 25, 30, 33, 35, 38, 40, 45, 49, 50, 53, 56, 59, 64, 67, 70, 71, 74, 77, 80, 86, 90, 92, 94, 95, 98, 100, 104, 107, 110, 111, 113, 115, 118, 121]
Indices of test set: [2, 10, 14, 18, 21, 24, 26, 27, 31, 34, 36, 42, 43, 47, 51, 54, 57, 60, 62, 65, 68, 72, 75, 78, 81, 83, 84, 88, 91, 93, 96, 97, 101, 103, 105, 108, 112, 116, 119, 122]
X_train X_val X_test: (41, 40, 14) (43, 40, 14) (40, 40, 14)
X_norm_train X_norm_val X_norm_test: (41, 40, 14) (43, 40, 14) (40, 40, 14)
y_train y_val y_test: (41, 1) (43, 1) (40, 1)
y_norm_train y_norm_val y_norm_test: (41, 1) (43, 1) (40, 1)


In [None]:
custom_objects = {"root_mean_squared_error": root_mean_squared_error}
model = load_model(trainedmodel_path + trained_model_resnet, custom_objects=custom_objects)

start_time = time.time()
y_train_pred = model.predict(X_train_norm)
prediction_time = (time.time() - start_time) * 1000
y_train = y_train_norm
y_train_pred = np.squeeze(y_train_pred)
y_train = np.squeeze(y_train)

# Compute evaluation metrics on the unnormalized predictions
train_rmse_rul = np.sqrt(mean_squared_error(y_train, y_train_pred))
train_mae_rul = mean_absolute_error(y_train, y_train_pred)
train_mape_rul = mean_absolute_percentage_error(y_train, y_train_pred).numpy().item()

print(f"Train RMSE for RUL: {train_rmse_rul.item()} prediction_time: {prediction_time} MAPE for RUL: {train_mape_rul} MAE for RUL: {train_mae_rul}")

y_val_pred = model.predict(X_val_norm)
y_val = y_val_norm
y_val_pred = np.squeeze(y_val_pred)
y_val = np.squeeze(y_val)

# Compute evaluation metrics on the unnormalized predictions
val_rmse_rul = np.sqrt(mean_squared_error(y_val, y_val_pred))
val_mae_rul = mean_absolute_error(y_val, y_val_pred)
val_mape_rul = mean_absolute_percentage_error(y_val, y_val_pred).numpy().item()

print(f"Validation RMSE for RUL: {val_rmse_rul.item()} MAPE for RUL: {val_mape_rul} MAE for RUL: {val_mae_rul}")

y_test_pred = model.predict(X_test_norm)
y_test = y_test_norm
y_test_pred = np.squeeze(y_test_pred)
y_test = np.squeeze(y_test)

# Compute evaluation metrics on the unnormalized predictions
test_rmse_rul = np.sqrt(mean_squared_error(y_test, y_test_pred))
test_mae_rul = mean_absolute_error(y_test, y_test_pred)
test_mape_rul = mean_absolute_percentage_error(y_test, y_test_pred).numpy().item()

print(f"Test RMSE for RUL: {test_rmse_rul.item()} MAPE for RUL: {test_mape_rul} MAE for RUL: {test_mae_rul}")

# Create separate DataFrames for each dataset
train_data = pd.DataFrame({'y_train': y_train, 'y_train_pred': y_train_pred})
val_data = pd.DataFrame({'y_val': y_val, 'y_val_pred': y_val_pred})
test_data = pd.DataFrame({'y_test': y_test, 'y_test_pred': y_test_pred})

mape_score = (train_mape_rul + val_mape_rul + test_mape_rul) / 3
rmse_score = (train_rmse_rul + val_rmse_rul + test_rmse_rul) / 3
print(f"rmse_score : {rmse_score} mape score : {mape_score}")

Train RMSE for RUL: 65.879835108657 prediction_time: 147.58944511413574 MAPE for RUL: 6.2332979908599855 MAE for RUL: 46.3203835836271
Validation RMSE for RUL: 193.92463833786667 MAPE for RUL: 13.409446574298103 MAE for RUL: 111.33531366392623
Test RMSE for RUL: 128.02170419971247 MAPE for RUL: 12.637974439283791 MAE for RUL: 85.20046291351318
rmse_score : 129.27539254874537 mape score : 10.760239668147292
