# Predicting Remaining Useful Life

I tried to predict the RUL(Remaining Useful Life) values for the 100 trajectories in the FD001 dataset from Turbofan Engine Degradation Simulation Data Set from NASA.

## Importing Modules

In [None]:
import numpy as np
import pandas as pd

## 1) Loading Data

In [28]:
train_df = pd.read_csv('CMAPSSData/train_FD001.txt', sep=" ", header=None)

In [29]:
train_df.columns = ["unit", "cycle","os1","os2","os3","sm1","sm2","sm3",
                    "sm4","sm5","sm6","sm7","sm8","sm9","sm10","sm11",
                    "sm12","sm13","sm14","sm15","sm16","sm17","sm18","sm19",
                    "sm20","sm21","sm22","sm23"]

In [30]:
train_df.head()

Unnamed: 0,unit,cycle,os1,os2,os3,sm1,sm2,sm3,sm4,sm5,...,sm14,sm15,sm16,sm17,sm18,sm19,sm20,sm21,sm22,sm23
0,1,1,-0.0007,-0.0004,100.0,518.67,641.82,1589.7,1400.6,14.62,...,8138.62,8.4195,0.03,392,2388,100.0,39.06,23.419,,
1,1,2,0.0019,-0.0003,100.0,518.67,642.15,1591.82,1403.14,14.62,...,8131.49,8.4318,0.03,392,2388,100.0,39.0,23.4236,,
2,1,3,-0.0043,0.0003,100.0,518.67,642.35,1587.99,1404.2,14.62,...,8133.23,8.4178,0.03,390,2388,100.0,38.95,23.3442,,
3,1,4,0.0007,0.0,100.0,518.67,642.35,1582.79,1401.87,14.62,...,8133.83,8.3682,0.03,392,2388,100.0,38.88,23.3739,,
4,1,5,-0.0019,-0.0002,100.0,518.67,642.37,1582.85,1406.22,14.62,...,8133.8,8.4294,0.03,393,2388,100.0,38.9,23.4044,,


In [31]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20631 entries, 0 to 20630
Data columns (total 28 columns):
unit     20631 non-null int64
cycle    20631 non-null int64
os1      20631 non-null float64
os2      20631 non-null float64
os3      20631 non-null float64
sm1      20631 non-null float64
sm2      20631 non-null float64
sm3      20631 non-null float64
sm4      20631 non-null float64
sm5      20631 non-null float64
sm6      20631 non-null float64
sm7      20631 non-null float64
sm8      20631 non-null float64
sm9      20631 non-null float64
sm10     20631 non-null float64
sm11     20631 non-null float64
sm12     20631 non-null float64
sm13     20631 non-null float64
sm14     20631 non-null float64
sm15     20631 non-null float64
sm16     20631 non-null float64
sm17     20631 non-null int64
sm18     20631 non-null int64
sm19     20631 non-null float64
sm20     20631 non-null float64
sm21     20631 non-null float64
sm22     0 non-null float64
sm23     0 non-null float64
dtypes: flo

I first drop the columns that consisted of missing values

In [32]:
train_df.dropna(axis=1,inplace=True)

In [33]:
train_df.head()

Unnamed: 0,unit,cycle,os1,os2,os3,sm1,sm2,sm3,sm4,sm5,...,sm12,sm13,sm14,sm15,sm16,sm17,sm18,sm19,sm20,sm21
0,1,1,-0.0007,-0.0004,100.0,518.67,641.82,1589.7,1400.6,14.62,...,521.66,2388.02,8138.62,8.4195,0.03,392,2388,100.0,39.06,23.419
1,1,2,0.0019,-0.0003,100.0,518.67,642.15,1591.82,1403.14,14.62,...,522.28,2388.07,8131.49,8.4318,0.03,392,2388,100.0,39.0,23.4236
2,1,3,-0.0043,0.0003,100.0,518.67,642.35,1587.99,1404.2,14.62,...,522.42,2388.03,8133.23,8.4178,0.03,390,2388,100.0,38.95,23.3442
3,1,4,0.0007,0.0,100.0,518.67,642.35,1582.79,1401.87,14.62,...,522.86,2388.08,8133.83,8.3682,0.03,392,2388,100.0,38.88,23.3739
4,1,5,-0.0019,-0.0002,100.0,518.67,642.37,1582.85,1406.22,14.62,...,522.19,2388.04,8133.8,8.4294,0.03,393,2388,100.0,38.9,23.4044


In [45]:
train_df.groupby('unit').mean()

Unnamed: 0_level_0,cycle,os1,os2,os3,sm1,sm2,sm3,sm4,sm5,sm6,...,sm12,sm13,sm14,sm15,sm16,sm17,sm18,sm19,sm20,sm21
unit,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,96.5,-0.000189,4.843750e-05,100.0,518.67,642.621042,1589.485521,1407.262135,14.62,21.610000,...,521.459427,2388.110833,8128.913542,8.436555,0.03,392.854167,2388.0,100.0,38.840052,23.306310
2,144.0,0.000051,8.815331e-05,100.0,518.67,642.435226,1588.181986,1404.265854,14.62,21.609338,...,521.811603,2388.053554,8143.246167,8.423983,0.03,392.386760,2388.0,100.0,38.901150,23.338997
3,90.0,0.000137,-1.396648e-05,100.0,518.67,642.543743,1588.715084,1405.628994,14.62,21.609777,...,521.773966,2388.047430,8159.557654,8.430236,0.03,392.754190,2388.0,100.0,38.882793,23.321931
4,95.0,0.000150,6.719577e-05,100.0,518.67,642.662381,1590.563280,1408.253915,14.62,21.609947,...,521.501005,2388.082275,8154.574444,8.439303,0.03,393.291005,2388.0,100.0,38.830265,23.294502
5,135.0,0.000136,-6.059480e-05,100.0,518.67,642.451970,1588.153271,1404.878439,14.62,21.609331,...,521.866431,2388.029071,8161.807844,8.425968,0.03,392.646840,2388.0,100.0,38.891078,23.336284
6,94.5,0.000006,-8.650549e-22,100.0,518.67,642.942340,1592.706277,1413.876117,14.62,21.610000,...,520.925798,2388.179468,8119.317128,8.458306,0.03,393.792553,2388.0,100.0,38.727447,23.235116
7,130.0,0.000102,-4.980695e-05,100.0,518.67,642.563320,1589.327838,1406.577336,14.62,21.609730,...,521.653475,2388.061737,8149.527413,8.433265,0.03,392.992278,2388.0,100.0,38.862124,23.310668
8,75.5,0.000008,2.600000e-05,100.0,518.67,642.934200,1593.001400,1413.969933,14.62,21.610000,...,520.898867,2388.170667,8123.167533,8.463234,0.03,393.786667,2388.0,100.0,38.725733,23.228078
9,101.0,-0.000323,4.975124e-06,100.0,518.67,642.396716,1588.308060,1403.030647,14.62,21.609204,...,522.031592,2388.014826,8170.726517,8.419438,0.03,392.512438,2388.0,100.0,38.908756,23.353426
10,111.5,-0.000060,4.684685e-05,100.0,518.67,642.457432,1587.943964,1404.491577,14.62,21.609414,...,521.745135,2388.059730,8146.605541,8.423277,0.03,392.536036,2388.0,100.0,38.901712,23.343227


In [39]:
grp = train_df.groupby(['unit']).agg({'cycle' : 'max'}).reset_index()

grp.rename(columns = {'cycle' : 'total_cycles'}, inplace = True)

data = train_df.merge(grp, how = 'left', left_on = 'unit', right_on = 'unit')
data['RUL'] = data.apply(lambda r: int(r['total_cycles'] - r['cycle']), axis = 1)
    

In [40]:
data.head()

Unnamed: 0,unit,cycle,os1,os2,os3,sm1,sm2,sm3,sm4,sm5,...,sm14,sm15,sm16,sm17,sm18,sm19,sm20,sm21,total_cycles,RUL
0,1,1,-0.0007,-0.0004,100.0,518.67,641.82,1589.7,1400.6,14.62,...,8138.62,8.4195,0.03,392,2388,100.0,39.06,23.419,192,191
1,1,2,0.0019,-0.0003,100.0,518.67,642.15,1591.82,1403.14,14.62,...,8131.49,8.4318,0.03,392,2388,100.0,39.0,23.4236,192,190
2,1,3,-0.0043,0.0003,100.0,518.67,642.35,1587.99,1404.2,14.62,...,8133.23,8.4178,0.03,390,2388,100.0,38.95,23.3442,192,189
3,1,4,0.0007,0.0,100.0,518.67,642.35,1582.79,1401.87,14.62,...,8133.83,8.3682,0.03,392,2388,100.0,38.88,23.3739,192,188
4,1,5,-0.0019,-0.0002,100.0,518.67,642.37,1582.85,1406.22,14.62,...,8133.8,8.4294,0.03,393,2388,100.0,38.9,23.4044,192,187
