## This notebook will use Long Short Term Memory (LSTM) to predict RUL for jet engines

In [4]:
# import the libraries
import os
import pandas as pd
import numpy as np
# import scikit-learn use pip install scikit-learn and then use the code right below this
from sklearn.preprocessing import MinMaxScaler

# %matplotlib inline
import matplotlib.pyplot as plt
import glob
import urllib
# Please note that the 'azureml' package requires an active Azure subscription 
# and Azure Machine Learning workspace for certain functionalities. 
# If you're not using Azure Machine Learning, you may need to consider whether 
# you actually need the 'azureml' package or if there's an alternative solution that suits your requirements.
# import azureml
# from azureml.logging import get_azureml_logger
# run_logger = get_azureml_logger()
# run_logger.log('amlrealworld.predictivemaintenanceforpm.dataingestionpreparation','true')

#### Import Train, Test datasets

In [34]:
# read training data 
train_raw_df = pd.read_csv('./data/train_FD001.txt', sep=" ", header=None)

# remove two columns that have NaN
# Drop columns 26 and 27
train_raw_df = train_raw_df.drop(columns=[26, 27])

# name the cols
train_raw_df.columns = ['id', 'cycle', 'setting1', 'setting2', 'setting3', 's1', 's2', 's3',
                     's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14',
                     's15', 's16', 's17', 's18', 's19', 's20', 's21']

train_raw_df.head()

Unnamed: 0,id,cycle,setting1,setting2,setting3,s1,s2,s3,s4,s5,...,s12,s13,s14,s15,s16,s17,s18,s19,s20,s21
0,1,1,-0.0007,-0.0004,100.0,518.67,641.82,1589.7,1400.6,14.62,...,521.66,2388.02,8138.62,8.4195,0.03,392,2388,100.0,39.06,23.419
1,1,2,0.0019,-0.0003,100.0,518.67,642.15,1591.82,1403.14,14.62,...,522.28,2388.07,8131.49,8.4318,0.03,392,2388,100.0,39.0,23.4236
2,1,3,-0.0043,0.0003,100.0,518.67,642.35,1587.99,1404.2,14.62,...,522.42,2388.03,8133.23,8.4178,0.03,390,2388,100.0,38.95,23.3442
3,1,4,0.0007,0.0,100.0,518.67,642.35,1582.79,1401.87,14.62,...,522.86,2388.08,8133.83,8.3682,0.03,392,2388,100.0,38.88,23.3739
4,1,5,-0.0019,-0.0002,100.0,518.67,642.37,1582.85,1406.22,14.62,...,522.19,2388.04,8133.8,8.4294,0.03,393,2388,100.0,38.9,23.4044


In [18]:
# read test data
test_df = pd.read_csv('./data/test_FD001.txt', sep=" ", header=None)

# remove two columns that have NaN
# Drop columns 26 and 27
test_df = test_df.drop(columns=[26, 27])

# name the cols
test_df.columns = train_df.columns


test_df.head()

Unnamed: 0,id,cycle,setting1,setting2,setting3,s1,s2,s3,s4,s5,...,s12,s13,s14,s15,s16,s17,s18,s19,s20,s21
0,1,1,0.0023,0.0003,100.0,518.67,643.02,1585.29,1398.21,14.62,...,521.72,2388.03,8125.55,8.4052,0.03,392,2388,100.0,38.86,23.3735
1,1,2,-0.0027,-0.0003,100.0,518.67,641.71,1588.45,1395.42,14.62,...,522.16,2388.06,8139.62,8.3803,0.03,393,2388,100.0,39.02,23.3916
2,1,3,0.0003,0.0001,100.0,518.67,642.46,1586.94,1401.34,14.62,...,521.97,2388.03,8130.1,8.4441,0.03,393,2388,100.0,39.08,23.4166
3,1,4,0.0042,0.0,100.0,518.67,642.44,1584.12,1406.42,14.62,...,521.38,2388.05,8132.9,8.3917,0.03,391,2388,100.0,39.0,23.3737
4,1,5,0.0014,0.0,100.0,518.67,642.51,1587.19,1401.92,14.62,...,522.15,2388.03,8129.54,8.4031,0.03,390,2388,100.0,38.99,23.413


In [22]:
# read ground truth data
truth_df = pd.read_csv('./data/RUL_FD001.txt', sep=" ", header=None)
# remove two columns that have NaN
# Drop column 1
truth_df = truth_df.drop(columns=[1])

truth_df.head()

Unnamed: 0,0
0,112
1,98
2,69
3,82
4,91


#### Training Data: Calculate the RUL for each record
##### The maximum cycle for each unit is the failure cycle.   Thus, we can use this to create the RUL of all the other cycles.

In [35]:
# create df with the unit id and the max cycle
rul = pd.DataFrame(train_raw_df.groupby('id')['cycle'].max()).reset_index()
rul.columns = ['id', 'max']
rul


Unnamed: 0,id,max
0,1,192
1,2,287
2,3,179
3,4,189
4,5,269
...,...,...
95,96,336
96,97,202
97,98,156
98,99,185


In [37]:
# join the two datasets on the unit id
train_df = train_raw_df.merge(rul, on=['id'], how='left')

# subtract cycle from the max cycles to get RUL
train_df['RUL'] = train_df['max'] - train_df['cycle']
train_df.drop(columns=[('max')])
train_df.head()

Unnamed: 0,id,cycle,setting1,setting2,setting3,s1,s2,s3,s4,s5,...,s14,s15,s16,s17,s18,s19,s20,s21,max,RUL
0,1,1,-0.0007,-0.0004,100.0,518.67,641.82,1589.7,1400.6,14.62,...,8138.62,8.4195,0.03,392,2388,100.0,39.06,23.419,192,191
1,1,2,0.0019,-0.0003,100.0,518.67,642.15,1591.82,1403.14,14.62,...,8131.49,8.4318,0.03,392,2388,100.0,39.0,23.4236,192,190
2,1,3,-0.0043,0.0003,100.0,518.67,642.35,1587.99,1404.2,14.62,...,8133.23,8.4178,0.03,390,2388,100.0,38.95,23.3442,192,189
3,1,4,0.0007,0.0,100.0,518.67,642.35,1582.79,1401.87,14.62,...,8133.83,8.3682,0.03,392,2388,100.0,38.88,23.3739,192,188
4,1,5,-0.0019,-0.0002,100.0,518.67,642.37,1582.85,1406.22,14.62,...,8133.8,8.4294,0.03,393,2388,100.0,38.9,23.4044,192,187
