<a href="https://colab.research.google.com/github/chaoshune/cmapass/blob/main/LSTM_SHAP_CMAPSS_FD001.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# LSTM + SHAP for C-MAPSS FD001 Dataset

In [None]:
!pip install shap tensorflow pandas scikit-learn matplotlib

In [None]:

import pandas as pd
import numpy as np
import shap
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler


## Step 1: Load Data and Preprocess

In [None]:
github get /workspaces/cmapass/train_FD001.txt


In [None]:

col_names = ['engine_id', 'cycle'] + [f'op_set_{i}' for i in range(1, 4)] + [f'sensor_{i}' for i in range(1, 22)]
df = pd.read_csv('train_FD001.txt', sep='\s+', header=None, names=col_names)

rul = df.groupby('engine_id')['cycle'].max().reset_index()
rul.columns = ['engine_id', 'max_cycle']
df = df.merge(rul, on='engine_id')
df['RUL'] = df['max_cycle'] - df['cycle']
df.drop('max_cycle', axis=1, inplace=True)

drop_cols = ['sensor_1','sensor_5','sensor_6','sensor_10','sensor_16','sensor_18','sensor_19']
feature_cols = ['op_set_1','op_set_2','op_set_3'] + [f'sensor_{i}' for i in range(1,22) if f'sensor_{i}' not in drop_cols]

scaler = MinMaxScaler()
df[feature_cols] = scaler.fit_transform(df[feature_cols])


## Step 2: Generate Sequences

In [None]:

def gen_sequence(id_df, seq_len, feature_cols):
    data_array = id_df[feature_cols].values
    return np.array([data_array[start:start + seq_len] for start in range(len(data_array) - seq_len + 1)])

SEQ_LEN = 50
seq_data, labels = [], []
for eid in df['engine_id'].unique():
    sub_df = df[df['engine_id'] == eid]
    if len(sub_df) >= SEQ_LEN:
        seqs = gen_sequence(sub_df, SEQ_LEN, feature_cols)
        rul = sub_df['RUL'].values[SEQ_LEN - 1:]
        seq_data.extend(seqs)
        labels.extend(rul)

X = np.array(seq_data)
y = np.array(labels)


## Step 3: Train/Test Split

In [None]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


## Step 4: Train LSTM Model

In [None]:

model = Sequential()
model.add(LSTM(64, input_shape=(SEQ_LEN, X.shape[2])))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
model.fit(X_train, y_train, epochs=10, batch_size=64)


## Step 5: SHAP Analysis

In [None]:

X_train_flat = X_train.reshape(X_train.shape[0], -1)
X_test_flat = X_test.reshape(X_test.shape[0], -1)

def predict_lstm(x_flat):
    return model.predict(x_flat.reshape(-1, SEQ_LEN, X.shape[2]), verbose=0)

explainer = shap.KernelExplainer(predict_lstm, X_train_flat[:50])
shap_values = explainer.shap_values(X_test_flat[:10])

feature_names = [f"{col}_t{t}" for t in range(SEQ_LEN) for col in feature_cols]

shap.summary_plot(shap_values, X_test_flat[:10], feature_names=feature_names)
