In [None]:
# ================== TRAINING CODE ==================
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

# Load dataset
cols = ['unit', 'cycle', 'op_setting_1', 'op_setting_2', 'op_setting_3'] + [f'sensor_{i}' for i in range(1, 22)]
df = pd.read_csv("/content/train_FD001.txt", sep=' ', header=None)
df.drop(columns=[26, 27], inplace=True)
df.columns = cols

# Calculate RUL
rul = df.groupby('unit')['cycle'].max().reset_index()
rul.columns = ['unit', 'max']
df = df.merge(rul, on='unit')
df['RUL'] = df['max'] - df['cycle']
df.drop('max', axis=1, inplace=True)

# Clip and transform RUL
df['RUL'] = np.clip(df['RUL'], 0, 125)
df['log_RUL'] = np.log1p(df['RUL'])

# Features
useful_sensors = ['cycle','sensor_2', 'sensor_3', 'sensor_4', 'sensor_7', 'sensor_8',
                  'sensor_11', 'sensor_15', 'sensor_17', 'sensor_20', 'sensor_21']
features = ['op_setting_1', 'op_setting_2', 'op_setting_3'] + useful_sensors

# Normalize
scaler = MinMaxScaler()
df[features] = scaler.fit_transform(df[features])

# Generate sequences
def gen_sequence(id_df, seq_len, features):
    data_array = id_df[features].values
    return np.array([data_array[i:i+seq_len] for i in range(len(data_array) - seq_len)])

def gen_labels(id_df, seq_len, label='log_RUL'):
    return id_df[label].values[seq_len:]

SEQ_LEN = 50
seq_array, label_array = [], []

for unit in df['unit'].unique():
    unit_df = df[df['unit'] == unit]
    if len(unit_df) >= SEQ_LEN:
        seq_array.extend(gen_sequence(unit_df, SEQ_LEN, features))
        label_array.extend(gen_labels(unit_df, SEQ_LEN))

X = np.asarray(seq_array)
y = np.asarray(label_array)

# Split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Model
model = Sequential([
    Input((SEQ_LEN, X.shape[2])),
    LSTM(64, return_sequences=True, kernel_regularizer=l2(1e-4)),
    Dropout(0.4),
    LSTM(32, kernel_regularizer=l2(1e-4)),
    Dropout(0.4),
    Dense(32, activation='relu'),
    Dense(1)
])

model.compile(loss='mse', optimizer='adam')
model.summary()

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=10)
model_checkpoint = ModelCheckpoint("best_model.keras", save_best_only=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-5)

# Train
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=50,
    batch_size=32,
    callbacks=[early_stopping, model_checkpoint, reduce_lr],
    verbose=1
)

In [None]:
import pickle
pickle.dump(scaler, open("scaler.pkl", "wb"))

In [None]:
# ================== TESTING CODE ==================
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import load_model
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

# Load data
cols = ['unit', 'cycle', 'op_setting_1', 'op_setting_2',
        'op_setting_3'] + [f'sensor_{i}' for i in range(1, 22)]
test_df = pd.read_csv("/content/test_FD001.txt", sep=' ', header=None)
test_df.drop(columns=[26, 27], inplace=True)
test_df.columns = cols

true_rul = pd.read_csv("/content/RUL_FD001.txt", header=None).values.flatten()

# Normalize (same features/scaler)
features = ['cycle', 'op_setting_1', 'op_setting_2', 'op_setting_3'] + [
    'sensor_2', 'sensor_3', 'sensor_4', 'sensor_7', 'sensor_8',
    'sensor_11', 'sensor_15', 'sensor_17', 'sensor_20', 'sensor_21'
]
train_df = pd.read_csv("/content/train_FD001.txt", sep=' ', header=None)
train_df.drop(columns=[26, 27], inplace=True)
train_df.columns = cols
scaler = MinMaxScaler()
train_df[features] = scaler.fit_transform(train_df[features])
test_df[features] = scaler.transform(test_df[features])

# Generate sequences


def gen_sequence(id_df, seq_len, features):
    data_array = id_df[features].values
    return np.array([data_array[i:i+seq_len] for i in range(len(data_array) - seq_len + 1)])


SEQ_LEN = 50
X_test, y_true = [], []

for i, unit in enumerate(test_df['unit'].unique()):
    unit_df = test_df[test_df['unit'] == unit]
    if len(unit_df) >= SEQ_LEN:
        sequences = gen_sequence(unit_df, SEQ_LEN, features)
        last_sequences = sequences[-5:]  # Take last 5 sequences
        X_test.append(np.mean(last_sequences, axis=0))
        y_true.append(true_rul[i])

X_test = np.array(X_test)

# Load model & predict
model = load_model("best_model.keras")
y_pred = model.predict(X_test).flatten()
y_pred = np.clip(np.expm1(y_pred), 0, 125)
y_true = np.clip(y_true, 0, 125)

# Evaluate
mse = mean_squared_error(y_true, y_pred)
print(f"Test MSE: {mse:.2f}")

plt.figure(figsize=(10, 5))
plt.plot(y_true, label='True RUL')
plt.plot(y_pred, label='Predicted RUL')
plt.xlabel('Engine ID')
plt.ylabel('Remaining Useful Life')
plt.title('True vs Predicted RUL')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

In [1]:
print("hello")

hello
