In [38]:
import pandas as pd
import numpy as np

# Machine Learning
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import ShuffleSplit
from sklearn.ensemble import RandomForestRegressor

In [39]:
pd.set_option('display.max_columns', None) 
pd.set_option('display.max_rows', 500) 
pd.set_option('display.expand_frame_repr', False)

In [40]:
index_names = [
    'engine', 
    'cycle'
]

setting_names = [
    'setting_1', 
    'setting_2', 
    'setting_3'
]

sensor_names = [ 
    "(Fan inlet temperature) (◦R)",
    "(LPC outlet temperature) (◦R)",
    "(HPC outlet temperature) (◦R)",
    "(LPT outlet temperature) (◦R)",
    "(Fan inlet Pressure) (psia)",
    "(bypass-duct pressure) (psia)",
    "(HPC outlet pressure) (psia)",
    "(Physical fan speed) (rpm)",
    "(Physical core speed) (rpm)",
    "(Engine pressure ratio(P50/P2)",
    "(HPC outlet Static pressure) (psia)",
    "(Ratio of fuel flow to Ps30) (pps/psia)",
    "(Corrected fan speed) (rpm)",
    "(Corrected core speed) (rpm)",
    "(Bypass Ratio) ",
    "(Burner fuel-air ratio)",
    "(Bleed Enthalpy)",
    "(Required fan speed)",
    "(Required fan conversion speed)",
    "(High-pressure turbines Cool air flow)",
    "(Low-pressure turbines Cool air flow)"
]

col_names = index_names + setting_names + sensor_names

In [41]:
dftrain = pd.read_csv('CMAPSSData/train_FD001.txt', sep='\s+', header=None, names=col_names)
dfvalid = pd.read_csv('CMAPSSData/test_FD001.txt', sep='\s+', header=None, names=col_names)
y_valid = pd.read_csv('CMAPSSData/RUL_FD001.txt', sep='\s+', header=None, names=['RUL'])

In [42]:
y_valid.head()

Unnamed: 0,RUL
0,112
1,98
2,69
3,82
4,91


In [43]:
dftrain.head()

Unnamed: 0,engine,cycle,setting_1,setting_2,setting_3,(Fan inlet temperature) (◦R),(LPC outlet temperature) (◦R),(HPC outlet temperature) (◦R),(LPT outlet temperature) (◦R),(Fan inlet Pressure) (psia),(bypass-duct pressure) (psia),(HPC outlet pressure) (psia),(Physical fan speed) (rpm),(Physical core speed) (rpm),(Engine pressure ratio(P50/P2),(HPC outlet Static pressure) (psia),(Ratio of fuel flow to Ps30) (pps/psia),(Corrected fan speed) (rpm),(Corrected core speed) (rpm),(Bypass Ratio),(Burner fuel-air ratio),(Bleed Enthalpy),(Required fan speed),(Required fan conversion speed),(High-pressure turbines Cool air flow),(Low-pressure turbines Cool air flow)
0,1,1,-0.0007,-0.0004,100.0,518.67,641.82,1589.7,1400.6,14.62,21.61,554.36,2388.06,9046.19,1.3,47.47,521.66,2388.02,8138.62,8.4195,0.03,392,2388,100.0,39.06,23.419
1,1,2,0.0019,-0.0003,100.0,518.67,642.15,1591.82,1403.14,14.62,21.61,553.75,2388.04,9044.07,1.3,47.49,522.28,2388.07,8131.49,8.4318,0.03,392,2388,100.0,39.0,23.4236
2,1,3,-0.0043,0.0003,100.0,518.67,642.35,1587.99,1404.2,14.62,21.61,554.26,2388.08,9052.94,1.3,47.27,522.42,2388.03,8133.23,8.4178,0.03,390,2388,100.0,38.95,23.3442
3,1,4,0.0007,0.0,100.0,518.67,642.35,1582.79,1401.87,14.62,21.61,554.45,2388.11,9049.48,1.3,47.13,522.86,2388.08,8133.83,8.3682,0.03,392,2388,100.0,38.88,23.3739
4,1,5,-0.0019,-0.0002,100.0,518.67,642.37,1582.85,1406.22,14.62,21.61,554.0,2388.06,9055.15,1.3,47.28,522.19,2388.04,8133.8,8.4294,0.03,393,2388,100.0,38.9,23.4044


In [44]:
dftrain.shape

(20631, 26)

In [48]:
train = dftrain.copy()
valid = dfvalid.copy()

In [45]:
def add_RUL_column(df):
    train_grouped_by_unit = df.groupby(by='engine')
    max_time_cycles = train_grouped_by_unit['cycle'].max()
    
    merged = df.merge(max_time_cycles.to_frame(name='max_cycle'), left_on='engine',right_index=True)
    merged["RUL"] = merged["max_cycle"] - merged['cycle']
    merged = merged.drop("max_cycle", axis=1)
    return merged

In [49]:
train = add_RUL_column(dftrain)

In [51]:
#Rul analysis
maxrul_u = train.groupby('engine').max().reset_index()
maxrul_u.head()

Unnamed: 0,engine,cycle,setting_1,setting_2,setting_3,(Fan inlet temperature) (◦R),(LPC outlet temperature) (◦R),(HPC outlet temperature) (◦R),(LPT outlet temperature) (◦R),(Fan inlet Pressure) (psia),(bypass-duct pressure) (psia),(HPC outlet pressure) (psia),(Physical fan speed) (rpm),(Physical core speed) (rpm),(Engine pressure ratio(P50/P2),(HPC outlet Static pressure) (psia),(Ratio of fuel flow to Ps30) (pps/psia),(Corrected fan speed) (rpm),(Corrected core speed) (rpm),(Bypass Ratio),(Burner fuel-air ratio),(Bleed Enthalpy),(Required fan speed),(Required fan conversion speed),(High-pressure turbines Cool air flow),(Low-pressure turbines Cool air flow),RUL
0,1,192,0.0047,0.0005,100.0,518.67,644.21,1605.44,1432.52,14.62,21.61,554.96,2388.32,9061.21,1.3,48.33,522.86,2388.35,8140.58,8.5227,0.03,398,2388,100.0,39.18,23.4999,191
1,2,287,0.0076,0.0006,100.0,518.67,643.94,1610.1,1431.17,14.62,21.61,555.45,2388.24,9109.36,1.3,48.27,523.26,2388.26,8175.57,8.5377,0.03,398,2388,100.0,39.24,23.6005,286
2,3,179,0.0058,0.0005,100.0,518.67,643.93,1606.5,1438.51,14.62,21.61,555.28,2388.19,9197.52,1.3,48.38,523.18,2388.2,8255.34,8.5363,0.03,399,2388,100.0,39.23,23.5181,178
3,4,189,0.0059,0.0006,100.0,518.67,644.53,1612.11,1434.12,14.62,21.61,554.75,2388.18,9203.22,1.3,48.21,522.48,2388.17,8259.42,8.5462,0.03,399,2388,100.0,39.21,23.5074,188
4,5,269,0.0055,0.0005,100.0,518.67,644.02,1609.41,1434.59,14.62,21.61,555.57,2388.21,9157.05,1.3,48.27,523.04,2388.23,8215.19,8.541,0.03,398,2388,100.0,39.29,23.5503,268


In [None]:
def plot_signal(df, Sensor_dic, signal_name):
    plt.figure(figsize=(13,5))
    for i in df['unit_number'].unique():
        if (i % 10 == 0):   #For a better visualisation, we plot the sensors signals of 20 units only
            plt.plot('RUL', signal_name, data=df[df['unit_number']==i].rolling(10).mean())

    plt.xlim(250, 0)  # reverse the x-axis so RUL counts down to zero
    plt.xticks(np.arange(0, 300, 25))
    plt.ylabel(Sensor_dic[signal_name])
    plt.xlabel('Remaining Useful Life')
    plt.show()

In [52]:
from sklearn.model_selection import train_test_split
drop_labels = index_names+setting_names
X_train=train.drop(columns=drop_labels).copy()
X_train, X_test, y_train, y_test=train_test_split(X_train,X_train['RUL'], test_size=0.3, random_state=42)

In [36]:
X_train

Unnamed: 0,(Fan inlet temperature) (◦R),(LPC outlet temperature) (◦R),(HPC outlet temperature) (◦R),(LPT outlet temperature) (◦R),(Fan inlet Pressure) (psia),(bypass-duct pressure) (psia),(HPC outlet pressure) (psia),(Physical fan speed) (rpm),(Physical core speed) (rpm),(Engine pressure ratio(P50/P2),(HPC outlet Static pressure) (psia),(Ratio of fuel flow to Ps30) (pps/psia),(Corrected fan speed) (rpm),(Corrected core speed) (rpm),(Bypass Ratio),(Burner fuel-air ratio),(Bleed Enthalpy),(Required fan speed),(Required fan conversion speed),(High-pressure turbines Cool air flow),(Low-pressure turbines Cool air flow),RUL
12862,518.67,643.08,1592.07,1402.73,14.62,21.61,554.10,2388.09,9062.50,1.3,47.67,521.78,2388.04,8133.34,8.4301,0.03,393,2388,100.0,38.67,23.3724,54
9936,518.67,642.21,1580.72,1394.09,14.62,21.61,553.34,2387.99,9066.77,1.3,47.21,522.67,2388.03,8144.06,8.3837,0.03,392,2388,100.0,39.08,23.3686,185
12025,518.67,642.09,1586.25,1404.03,14.62,21.61,554.27,2388.06,9061.07,1.3,47.22,521.70,2388.01,8139.32,8.4244,0.03,391,2388,100.0,38.96,23.3025,101
14526,518.67,643.52,1597.95,1423.87,14.62,21.61,552.22,2388.21,9031.18,1.3,47.94,520.08,2388.16,8109.80,8.4930,0.03,394,2388,100.0,38.54,23.1882,24
16747,518.67,642.34,1586.62,1401.82,14.62,21.61,554.13,2388.06,9054.55,1.3,47.22,522.24,2388.03,8128.29,8.4081,0.03,392,2388,100.0,39.08,23.4485,137
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11284,518.67,642.19,1592.99,1390.35,14.62,21.61,553.93,2388.06,9039.80,1.3,47.28,521.74,2388.07,8133.16,8.4340,0.03,393,2388,100.0,38.95,23.2258,107
11964,518.67,641.81,1580.59,1404.09,14.62,21.61,554.78,2388.01,9065.90,1.3,47.27,523.13,2387.98,8145.57,8.3866,0.03,390,2388,100.0,38.93,23.4079,162
5390,518.67,642.50,1600.91,1415.54,14.62,21.61,553.50,2388.09,9061.03,1.3,47.48,521.68,2388.08,8150.90,8.3974,0.03,392,2388,100.0,38.75,23.3561,74
860,518.67,641.93,1578.03,1396.28,14.62,21.61,554.52,2387.97,9062.88,1.3,47.26,522.19,2388.03,8153.40,8.4214,0.03,393,2388,100.0,39.07,23.3733,255


In [53]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
Tester = X_test.copy()
Trainer = X_train.copy()

#Droping the target variable
X_train.drop(columns=['RUL','(Fan inlet temperature) (◦R)','(Fan inlet Pressure) (psia)','(bypass-duct pressure) (psia)','(Engine pressure ratio(P50/P2)','(Burner fuel-air ratio)','(Required fan speed)','(Required fan conversion speed)'], inplace=True)
X_test.drop(columns=['RUL','(Fan inlet temperature) (◦R)','(Fan inlet Pressure) (psia)','(bypass-duct pressure) (psia)','(Engine pressure ratio(P50/P2)','(Burner fuel-air ratio)','(Required fan speed)','(Required fan conversion speed)'], inplace=True)
#Scaling X_train and X_test
X_train_s=scaler.fit_transform(X_train)
X_test_s=scaler.fit_transform(X_test)
#Conserve only the last occurence of each unit to match the length of y_valid
X_valid = valid.groupby('engine').last().reset_index().drop(columns=drop_labels)
Validator = X_valid.copy()
X_valid.drop(columns=['(Fan inlet temperature) (◦R)','(Fan inlet Pressure) (psia)','(bypass-duct pressure) (psia)','(Engine pressure ratio(P50/P2)','(Burner fuel-air ratio)','(Required fan speed)','(Required fan conversion speed)'], inplace=True)
#scaling X_valid
X_valid_s=scaler.fit_transform(X_valid)

## Random Forest Regressor

In [55]:
rf_reg = RandomForestRegressor()

rf_param_grid = {
    'n_estimators': [50, 90, 120],
    'max_depth': [8, 9, 10]
}

rf_grid = GridSearchCV(rf_reg, rf_param_grid, cv=3)

rf_grid.fit(X_train_s, y_train)

print(rf_grid.best_score_, rf_grid.best_params_)

0.641663203028041 {'max_depth': 10, 'n_estimators': 90}
