## Using time series for predictive mainteneance of turbofan engines

In [1]:
# Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
# Data import
data_dir = "../CMAPSSData/"

In [None]:
!ls ../CMAPSSData/

In [3]:
sensor_colnames = [f"sensor{i}" for i in range(1,22, 1)]

In [4]:
engine_colnames = ["unit_number", "cycles", "operational_setting_1", "operational_setting_2", "operational_setting_3"] + sensor_colnames

In [5]:
train_fd001_raw = pd.read_csv(f"{data_dir}train_FD001.txt", delim_whitespace=True, names=engine_colnames)
train_fd002_raw = pd.read_csv(f"{data_dir}train_FD002.txt", delim_whitespace=True, names=engine_colnames)
train_fd003_raw = pd.read_csv(f"{data_dir}train_FD003.txt", delim_whitespace=True, names=engine_colnames)
train_fd004_raw = pd.read_csv(f"{data_dir}train_FD004.txt", delim_whitespace=True, names=engine_colnames)

In [6]:
test_fd001_raw = pd.read_csv(f"{data_dir}test_FD001.txt", delim_whitespace=True, names=engine_colnames)
test_fd002_raw = pd.read_csv(f"{data_dir}test_FD002.txt", delim_whitespace=True, names=engine_colnames)
test_fd003_raw = pd.read_csv(f"{data_dir}test_FD003.txt", delim_whitespace=True, names=engine_colnames)
test_fd004_raw = pd.read_csv(f"{data_dir}test_FD004.txt", delim_whitespace=True, names=engine_colnames)

In [7]:
rul_fd001_raw = pd.read_csv(f"{data_dir}RUL_FD001.txt", names=["rul_fd001"], squeeze=True)
rul_fd002_raw = pd.read_csv(f"{data_dir}RUL_FD002.txt", names=["rul_fd002"], squeeze=True)
rul_fd003_raw = pd.read_csv(f"{data_dir}RUL_FD003.txt", names=["rul_fd003"], squeeze=True)
rul_fd004_raw = pd.read_csv(f"{data_dir}RUL_FD004.txt", names=["rul_fd004"], squeeze=True)

In [8]:
#rul_df = pd.DataFrame({"rul_fd001": rul_fd001_raw, "rul_fd002": rul_fd002_raw, "rul_fd003": rul_fd003_raw, "rul_fd004": rul_fd004_raw}, dtype=np.int)

In [62]:
#rul_fd001_raw

0     112
1      98
2      69
3      82
4      91
     ... 
95    137
96     82
97     59
98    117
99     20
Name: rul_fd001, Length: 100, dtype: int64

### Assumptions

- data in rul_ are useful life of a particular unit
- unit number and trejectories are synonyms
- data in rul_ are ordered by thier unit number
- rul_df represent test set remaining useful life

In [9]:
rul_df["unit_number"] = list(range(1, rul_df.shape[0] + 1))

In [10]:
cols = rul_df.columns.tolist()
cols = cols[-1:] + cols[:-1]
rul_df = rul_df[cols]

In [11]:
rul_df = rul_df.set_index("unit_number")

In [12]:
rul_df.head(2)

Unnamed: 0_level_0,rul_fd001,rul_fd002,rul_fd003,rul_fd004
unit_number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,112.0,18,44.0,22.0
2,98.0,79,51.0,39.0


### Model data transformation

In [13]:
train_fd001 = train_fd001_raw.copy()
train_fd002 = train_fd002_raw.copy()
train_fd003 = train_fd003_raw.copy()
train_fd004 = train_fd004_raw.copy()

In [54]:
test_fd001 = test_fd001_raw.copy()
test_fd002 = test_fd002_raw.copy()
test_fd003 = test_fd003_raw.copy()
test_fd004 = test_fd004_raw.copy()

In [14]:
# assuming linear reduction on remaining useful life
train_fd001["rul"] = train_fd001.groupby(["unit_number"], group_keys=False).apply(lambda g: max(g.cycles) - g.cycles)
train_fd002["rul"] = train_fd002.groupby(["unit_number"], group_keys=False).apply(lambda g: max(g.cycles) - g.cycles)
train_fd003["rul"] = train_fd003.groupby(["unit_number"], group_keys=False).apply(lambda g: max(g.cycles) - g.cycles)
train_fd004["rul"] = train_fd004.groupby(["unit_number"], group_keys=False).apply(lambda g: max(g.cycles) - g.cycles)

In [15]:
train_fd001["train_data"] = "fd001"
train_fd002["train_data"] = "fd002"
train_fd003["train_data"] = "fd003"
train_fd004["train_data"] = "fd004"

In [55]:
test_fd001["test_data"] = "fd001"
test_fd002["test_data"] = "fd002"
test_fd003["test_data"] = "fd003"
test_fd004["test_data"] = "fd004"

In [16]:
train = pd.concat([train_fd001, train_fd002, train_fd003, train_fd004], axis=0)

In [17]:
train = train.set_index([ "train_data", "unit_number", "cycles"])

In [18]:
train.head(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,operational_setting_1,operational_setting_2,operational_setting_3,sensor1,sensor2,sensor3,sensor4,sensor5,sensor6,sensor7,...,sensor13,sensor14,sensor15,sensor16,sensor17,sensor18,sensor19,sensor20,sensor21,rul
train_data,unit_number,cycles,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
fd001,1,1,-0.0007,-0.0004,100.0,518.67,641.82,1589.7,1400.6,14.62,21.61,554.36,...,2388.02,8138.62,8.4195,0.03,392,2388,100.0,39.06,23.419,191
fd001,1,2,0.0019,-0.0003,100.0,518.67,642.15,1591.82,1403.14,14.62,21.61,553.75,...,2388.07,8131.49,8.4318,0.03,392,2388,100.0,39.0,23.4236,190


In [19]:
train.tail(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,operational_setting_1,operational_setting_2,operational_setting_3,sensor1,sensor2,sensor3,sensor4,sensor5,sensor6,sensor7,...,sensor13,sensor14,sensor15,sensor16,sensor17,sensor18,sensor19,sensor20,sensor21,rul
train_data,unit_number,cycles,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
fd004,249,254,35.0046,0.84,100.0,449.44,555.77,1381.29,1148.18,5.48,7.96,199.93,...,2388.83,8125.64,9.0515,0.02,337,2223,100.0,15.26,9.0774,1
fd004,249,255,42.003,0.84,100.0,445.0,549.85,1369.75,1147.45,3.91,5.69,142.47,...,2388.66,8144.33,9.1207,0.02,333,2212,100.0,10.66,6.4341,0


In [None]:
train.index

In [56]:
test = pd.concat([test_fd001, test_fd002, test_fd003, test_fd004], axis=0)

In [57]:
test = test.set_index([ "test_data", "unit_number", "cycles"])

In [58]:
test.head(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,operational_setting_1,operational_setting_2,operational_setting_3,sensor1,sensor2,sensor3,sensor4,sensor5,sensor6,sensor7,...,sensor12,sensor13,sensor14,sensor15,sensor16,sensor17,sensor18,sensor19,sensor20,sensor21
test_data,unit_number,cycles,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
fd001,1,1,0.0023,0.0003,100.0,518.67,643.02,1585.29,1398.21,14.62,21.61,553.9,...,521.72,2388.03,8125.55,8.4052,0.03,392,2388,100.0,38.86,23.3735
fd001,1,2,-0.0027,-0.0003,100.0,518.67,641.71,1588.45,1395.42,14.62,21.61,554.85,...,522.16,2388.06,8139.62,8.3803,0.03,393,2388,100.0,39.02,23.3916


In [59]:
test.tail(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,operational_setting_1,operational_setting_2,operational_setting_3,sensor1,sensor2,sensor3,sensor4,sensor5,sensor6,sensor7,...,sensor12,sensor13,sensor14,sensor15,sensor16,sensor17,sensor18,sensor19,sensor20,sensor21
test_data,unit_number,cycles,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
fd004,248,280,20.0027,0.7,100.0,491.19,608.19,1489.11,1256.25,9.35,13.66,333.15,...,314.17,2388.37,8085.24,9.2727,0.03,366,2324,100.0,24.44,14.6887
fd004,248,281,35.0075,0.8402,100.0,449.44,556.4,1378.58,1140.7,5.48,8.0,194.26,...,181.88,2388.59,8098.17,9.3964,0.02,335,2223,100.0,14.72,8.8502


In [20]:
# How to index into the training set
# Select the first unit of fd001 dataset # Can unstack with tarain.unstack()
train.loc[("fd001", 1)]

Unnamed: 0_level_0,operational_setting_1,operational_setting_2,operational_setting_3,sensor1,sensor2,sensor3,sensor4,sensor5,sensor6,sensor7,...,sensor13,sensor14,sensor15,sensor16,sensor17,sensor18,sensor19,sensor20,sensor21,rul
cycles,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,-0.0007,-0.0004,100.0,518.67,641.82,1589.70,1400.60,14.62,21.61,554.36,...,2388.02,8138.62,8.4195,0.03,392,2388,100.0,39.06,23.4190,191
2,0.0019,-0.0003,100.0,518.67,642.15,1591.82,1403.14,14.62,21.61,553.75,...,2388.07,8131.49,8.4318,0.03,392,2388,100.0,39.00,23.4236,190
3,-0.0043,0.0003,100.0,518.67,642.35,1587.99,1404.20,14.62,21.61,554.26,...,2388.03,8133.23,8.4178,0.03,390,2388,100.0,38.95,23.3442,189
4,0.0007,0.0000,100.0,518.67,642.35,1582.79,1401.87,14.62,21.61,554.45,...,2388.08,8133.83,8.3682,0.03,392,2388,100.0,38.88,23.3739,188
5,-0.0019,-0.0002,100.0,518.67,642.37,1582.85,1406.22,14.62,21.61,554.00,...,2388.04,8133.80,8.4294,0.03,393,2388,100.0,38.90,23.4044,187
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
188,-0.0067,0.0003,100.0,518.67,643.75,1602.38,1422.78,14.62,21.61,551.94,...,2388.23,8117.69,8.5207,0.03,396,2388,100.0,38.51,22.9588,4
189,-0.0006,0.0002,100.0,518.67,644.18,1596.17,1428.01,14.62,21.61,550.70,...,2388.33,8117.51,8.5183,0.03,395,2388,100.0,38.48,23.1127,3
190,-0.0027,0.0001,100.0,518.67,643.64,1599.22,1425.95,14.62,21.61,551.29,...,2388.35,8112.58,8.5223,0.03,398,2388,100.0,38.49,23.0675,2
191,-0.0000,-0.0004,100.0,518.67,643.34,1602.36,1425.77,14.62,21.61,550.92,...,2388.30,8114.61,8.5174,0.03,394,2388,100.0,38.45,23.1295,1


In [None]:
train.head(2)

### Using clipped RUL
Assuming cliiped RUL over one that decreases linearly overtime may better reflect real operating condtions. See 

https://towardsdatascience.com/the-importance-of-problem-framing-for-supervised-predictive-maintenance-solutions-cc8646826093

In [21]:
train.loc[("fd001", 1)].rul.clip(upper=125)

cycles
1      125
2      125
3      125
4      125
5      125
      ... 
188      4
189      3
190      2
191      1
192      0
Name: rul, Length: 192, dtype: int64

In [22]:
train["rul_clipped"] = train.rul.clip(upper=125)

In [23]:
train["rul_clipped"]

train_data  unit_number  cycles
fd001       1            1         125
                         2         125
                         3         125
                         4         125
                         5         125
                                  ... 
fd004       249          251         4
                         252         3
                         253         2
                         254         1
                         255         0
Name: rul_clipped, Length: 160359, dtype: int64

### Model using time series

In [None]:
train.index

In [None]:
train.loc["fd001"]#["unit_number"]

In [None]:
train.loc[("fd001", 1), :]

In [None]:
train.loc[("fd001", slice(None)), :]

In [None]:
###train.loc[("fd001", slice(None)), :] ###train.loc[("fd001"), :]  ### train.loc[("fd001")]

In [None]:
train.loc[("fd001")].index.get_level_values("unit_number").unique() #train.index.get_level_values("unit_number").unique()

In [None]:
train.head(2)

In [None]:
def plot_feature(train_data, unit, feature):
    #plt.figure(figsize=(12, 6))
    #plt.plot("rul", "sensor2", data=train.loc[("fd001", 1)])
    #plt.plot("rul", feature, data=train.loc[(train_data, unit)])
    plt.plot(feature, data=train.loc[(train_data, unit)])
    #plt.xlim(0, 250)
    plt.xlabel("cycles")
    plt.ylabel(feature)

In [None]:
#plt.plot("sensor2", data=train.loc[("fd001", 1)])

In [24]:
#def plot_sensor(sensor):
#    plt.figure(figsize = (12, 6))
#    
#    for unit in train.loc[("fd001", )]
#    

In [None]:
#for unit in train.loc[("fd001")].index.get_level_values("unit_number").unique():
#    print(unit)

In [None]:
plot_feature("fd001", 1, "sensor2")

In [None]:
print(train.loc[("fd001")].index.get_level_values("unit_number").unique())

In [66]:
drop_sensors = ['sensor1','sensor5','sensor6','sensor10','sensor16','sensor18','sensor19']
drop_settings =  ["operational_setting_1", "operational_setting_2", "operational_setting_3"]
drop_targets = ["rul"] # ["rul", rul_clipped"]
drop_labels = drop_sensors + drop_settings + drop_targets
drop_test_labels = drop_sensors + drop_settings
print(drop_labels)

['sensor1', 'sensor5', 'sensor6', 'sensor10', 'sensor16', 'sensor18', 'sensor19', 'operational_setting_1', 'operational_setting_2', 'operational_setting_3', 'rul']


In [37]:
Xtrain = train.loc[("fd001")].drop(drop_labels, axis=1)

In [38]:
Xtrain.head(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,sensor2,sensor3,sensor4,sensor7,sensor8,sensor9,sensor11,sensor12,sensor13,sensor14,sensor15,sensor17,sensor20,sensor21,rul_clipped
unit_number,cycles,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
1,1,641.82,1589.7,1400.6,554.36,2388.06,9046.19,47.47,521.66,2388.02,8138.62,8.4195,392,39.06,23.419,125
1,2,642.15,1591.82,1403.14,553.75,2388.04,9044.07,47.49,522.28,2388.07,8131.49,8.4318,392,39.0,23.4236,125


In [39]:
remaining_sensors = list(Xtrain.columns.difference(["rul", "rul_clipped"]))
print(remaining_sensors)

['sensor11', 'sensor12', 'sensor13', 'sensor14', 'sensor15', 'sensor17', 'sensor2', 'sensor20', 'sensor21', 'sensor3', 'sensor4', 'sensor7', 'sensor8', 'sensor9']


In [40]:
lag1 = [col + '_lag_1' for col in remaining_sensors]
print(lag1)

['sensor11_lag_1', 'sensor12_lag_1', 'sensor13_lag_1', 'sensor14_lag_1', 'sensor15_lag_1', 'sensor17_lag_1', 'sensor2_lag_1', 'sensor20_lag_1', 'sensor21_lag_1', 'sensor3_lag_1', 'sensor4_lag_1', 'sensor7_lag_1', 'sensor8_lag_1', 'sensor9_lag_1']


In [41]:
Xtrain[lag1] = Xtrain[remaining_sensors].shift(1)

In [44]:
Xtrain.head(4)

Unnamed: 0_level_0,Unnamed: 1_level_0,sensor2,sensor3,sensor4,sensor7,sensor8,sensor9,sensor11,sensor12,sensor13,sensor14,...,sensor15_lag_1,sensor17_lag_1,sensor2_lag_1,sensor20_lag_1,sensor21_lag_1,sensor3_lag_1,sensor4_lag_1,sensor7_lag_1,sensor8_lag_1,sensor9_lag_1
unit_number,cycles,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,2,642.15,1591.82,1403.14,553.75,2388.04,9044.07,47.49,522.28,2388.07,8131.49,...,8.4195,392.0,641.82,39.06,23.419,1589.7,1400.6,554.36,2388.06,9046.19
1,3,642.35,1587.99,1404.2,554.26,2388.08,9052.94,47.27,522.42,2388.03,8133.23,...,8.4318,392.0,642.15,39.0,23.4236,1591.82,1403.14,553.75,2388.04,9044.07
1,4,642.35,1582.79,1401.87,554.45,2388.11,9049.48,47.13,522.86,2388.08,8133.83,...,8.4178,390.0,642.35,38.95,23.3442,1587.99,1404.2,554.26,2388.08,9052.94
1,5,642.37,1582.85,1406.22,554.0,2388.06,9055.15,47.28,522.19,2388.04,8133.8,...,8.3682,392.0,642.35,38.88,23.3739,1582.79,1401.87,554.45,2388.11,9049.48


In [43]:
Xtrain.dropna(inplace=True)
ytrain = Xtrain.pop('rul_clipped')

In [47]:
Xtrain.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 20630 entries, (1, 2) to (100, 200)
Data columns (total 28 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   sensor2         20630 non-null  float64
 1   sensor3         20630 non-null  float64
 2   sensor4         20630 non-null  float64
 3   sensor7         20630 non-null  float64
 4   sensor8         20630 non-null  float64
 5   sensor9         20630 non-null  float64
 6   sensor11        20630 non-null  float64
 7   sensor12        20630 non-null  float64
 8   sensor13        20630 non-null  float64
 9   sensor14        20630 non-null  float64
 10  sensor15        20630 non-null  float64
 11  sensor17        20630 non-null  int64  
 12  sensor20        20630 non-null  float64
 13  sensor21        20630 non-null  float64
 14  sensor11_lag_1  20630 non-null  float64
 15  sensor12_lag_1  20630 non-null  float64
 16  sensor13_lag_1  20630 non-null  float64
 17  sensor14_lag_1  20630

In [49]:
ytrain

unit_number  cycles
1            2         125
             3         125
             4         125
             5         125
             6         125
                      ... 
100          196         4
             197         3
             198         2
             199         1
             200         0
Name: rul_clipped, Length: 20630, dtype: int64

In [50]:
lm = LinearRegression()

In [51]:
lm.fit(Xtrain, ytrain)

LinearRegression()

In [52]:
lm

LinearRegression()

In [63]:
test

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,operational_setting_1,operational_setting_2,operational_setting_3,sensor1,sensor2,sensor3,sensor4,sensor5,sensor6,sensor7,...,sensor12,sensor13,sensor14,sensor15,sensor16,sensor17,sensor18,sensor19,sensor20,sensor21
test_data,unit_number,cycles,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
fd001,1,1,0.0023,0.0003,100.0,518.67,643.02,1585.29,1398.21,14.62,21.61,553.90,...,521.72,2388.03,8125.55,8.4052,0.03,392,2388,100.0,38.86,23.3735
fd001,1,2,-0.0027,-0.0003,100.0,518.67,641.71,1588.45,1395.42,14.62,21.61,554.85,...,522.16,2388.06,8139.62,8.3803,0.03,393,2388,100.0,39.02,23.3916
fd001,1,3,0.0003,0.0001,100.0,518.67,642.46,1586.94,1401.34,14.62,21.61,554.11,...,521.97,2388.03,8130.10,8.4441,0.03,393,2388,100.0,39.08,23.4166
fd001,1,4,0.0042,0.0000,100.0,518.67,642.44,1584.12,1406.42,14.62,21.61,554.07,...,521.38,2388.05,8132.90,8.3917,0.03,391,2388,100.0,39.00,23.3737
fd001,1,5,0.0014,0.0000,100.0,518.67,642.51,1587.19,1401.92,14.62,21.61,554.16,...,522.15,2388.03,8129.54,8.4031,0.03,390,2388,100.0,38.99,23.4130
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
fd004,248,277,41.9991,0.8401,100.0,445.00,550.30,1364.40,1129.17,3.91,5.72,138.34,...,130.87,2388.50,8112.61,9.4427,0.02,331,2212,100.0,10.53,6.2620
fd004,248,278,20.0026,0.7005,100.0,491.19,608.00,1494.75,1260.88,9.35,13.66,334.75,...,314.51,2388.33,8086.83,9.2772,0.02,366,2324,100.0,24.33,14.6486
fd004,248,279,34.9988,0.8413,100.0,449.44,555.92,1370.65,1130.97,5.48,8.00,194.92,...,182.76,2388.64,8100.84,9.3982,0.02,336,2223,100.0,14.69,8.8389
fd004,248,280,20.0027,0.7000,100.0,491.19,608.19,1489.11,1256.25,9.35,13.66,333.15,...,314.17,2388.37,8085.24,9.2727,0.03,366,2324,100.0,24.44,14.6887


In [78]:
rul_fd001_raw

0     112
1      98
2      69
3      82
4      91
     ... 
95    137
96     82
97     59
98    117
99     20
Name: rul_fd001, Length: 100, dtype: int64

In [64]:
def evaluate(y_true, y_hat, label='test'):
    mse = mean_squared_error(y_true, y_hat)
    rmse = np.sqrt(mse)
    variance = r2_score(y_true, y_hat)
    print('{} set RMSE:{}, R2:{}'.format(label, rmse, variance))

In [67]:
Xtest = test.loc[("fd001")].drop(drop_test_labels, axis=1)

In [68]:
Xtest[lag1] = Xtest[remaining_sensors].shift(1)

In [69]:
Xtest.dropna(inplace=True)

In [70]:
Xtest

Unnamed: 0_level_0,Unnamed: 1_level_0,sensor2,sensor3,sensor4,sensor7,sensor8,sensor9,sensor11,sensor12,sensor13,sensor14,...,sensor15_lag_1,sensor17_lag_1,sensor2_lag_1,sensor20_lag_1,sensor21_lag_1,sensor3_lag_1,sensor4_lag_1,sensor7_lag_1,sensor8_lag_1,sensor9_lag_1
unit_number,cycles,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,2,641.71,1588.45,1395.42,554.85,2388.01,9054.42,47.50,522.16,2388.06,8139.62,...,8.4052,392.0,643.02,38.86,23.3735,1585.29,1398.21,553.90,2388.04,9050.17
1,3,642.46,1586.94,1401.34,554.11,2388.05,9056.96,47.50,521.97,2388.03,8130.10,...,8.3803,393.0,641.71,39.02,23.3916,1588.45,1395.42,554.85,2388.01,9054.42
1,4,642.44,1584.12,1406.42,554.07,2388.03,9045.29,47.28,521.38,2388.05,8132.90,...,8.4441,393.0,642.46,39.08,23.4166,1586.94,1401.34,554.11,2388.05,9056.96
1,5,642.51,1587.19,1401.92,554.16,2388.01,9044.55,47.31,522.15,2388.03,8129.54,...,8.3917,391.0,642.44,39.00,23.3737,1584.12,1406.42,554.07,2388.03,9045.29
1,6,642.11,1579.12,1395.13,554.22,2388.00,9050.96,47.26,521.92,2388.08,8127.46,...,8.4031,390.0,642.51,38.99,23.4130,1587.19,1401.92,554.16,2388.01,9044.55
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100,194,643.24,1599.45,1415.79,553.41,2388.02,9142.37,47.69,520.69,2388.00,8213.28,...,8.4429,395.0,643.10,38.63,23.2952,1595.60,1414.62,553.18,2388.08,9139.88
100,195,643.22,1595.69,1422.05,553.22,2388.05,9140.68,47.60,521.05,2388.09,8210.85,...,8.4715,394.0,643.24,38.65,23.1974,1599.45,1415.79,553.41,2388.02,9142.37
100,196,643.44,1593.15,1406.82,553.04,2388.11,9146.81,47.57,521.18,2388.04,8217.24,...,8.4512,395.0,643.22,38.57,23.2771,1595.69,1422.05,553.22,2388.05,9140.68
100,197,643.26,1594.99,1419.36,553.37,2388.07,9148.85,47.61,521.33,2388.08,8220.48,...,8.4569,395.0,643.44,38.62,23.2051,1593.15,1406.82,553.04,2388.11,9146.81


In [77]:
#Xtrain

In [72]:
y_hat_test = lm.predict(Xtest)

In [74]:
print(y_hat_test)

[124.81486129 118.68471283 119.2706235  ...  32.79880558  31.07329941
  15.82612867]


In [76]:
len(y_hat_test)

13095