**Rolling window sequence**

In [4]:
# 1. Imports and Data Loading
import pandas as pd
import numpy as np

# Load the feature-engineered dataset from previous step (adjust path as needed)
df = pd.read_csv('cmapss_feature_engineered_FD001.csv')  # Assume feature engineered file

# Basic info
print("Dataset shape:", df.shape)
df.head()

Dataset shape: (20531, 68)


Unnamed: 0,engine_id,cycle,op_setting_1,op_setting_2,op_setting_3,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,...,sensor_17_rollmean5,sensor_17_rollstd5,sensor_18_rollmean5,sensor_18_rollstd5,sensor_19_rollmean5,sensor_19_rollstd5,sensor_20_rollmean5,sensor_20_rollstd5,sensor_21_rollmean5,sensor_21_rollstd5
0,1,2,0.0019,-0.0003,100.0,0.0,-1.06178,0.211528,-0.643726,-1.776357e-15,...,-0.926028,-2.638069,0.0,0.0,0.0,0.0,1.367661,-1.534785,1.404213,-2.630752
1,1,3,-0.0043,0.0003,100.0,0.0,-0.661813,-0.413166,-0.525953,-1.776357e-15,...,-1.453702,0.786588,0.0,0.0,0.0,0.0,1.192984,-1.166192,1.123794,-0.599129
2,1,4,0.0007,0.0,100.0,0.0,-0.661813,-1.261314,-0.784831,-1.776357e-15,...,-1.321784,0.327771,0.0,0.0,0.0,0.0,0.991014,-0.547098,1.0646,-0.923458
3,1,5,-0.0019,-0.0002,100.0,0.0,-0.621816,-1.251528,-0.301518,-1.776357e-15,...,-1.08433,0.610846,0.0,0.0,0.0,0.0,0.896034,-0.625804,1.095643,-1.143645
4,1,6,-0.0043,-0.0001,100.0,0.0,-1.161771,-0.987297,-1.173703,-1.776357e-15,...,-1.242633,0.743509,0.0,0.0,0.0,0.0,0.791228,-1.279538,0.981948,-1.24503


In [5]:
# Columns to use as features (exclude id and cycle, plus target if any)
exclude_cols = ['engine_id', 'cycle']
feature_cols = [col for col in df.columns if col not in exclude_cols]

# Sort data by engine_id and cycle to ensure correct temporal order
df = df.sort_values(['engine_id', 'cycle']).reset_index(drop=True)


In [6]:
def generate_rolling_windows(data, engine_col, features, window_size=30):
    sequences = []
    engine_ids = []
    cycle_ids = []

    for engine in data[engine_col].unique():
        engine_data = data[data[engine_col] == engine]
        engine_features = engine_data[features].values

        # Generate sequences with rolling window
        for i in range(window_size - 1, len(engine_data)):
            seq = engine_features[i - window_size + 1 : i + 1]
            sequences.append(seq)
            engine_ids.append(engine)
            cycle_ids.append(engine_data.iloc[i]['cycle'])

    # Convert to array for modeling
    sequences = np.array(sequences)
    return sequences, engine_ids, cycle_ids

In [7]:
window_size = 30  # Typical rolling window length; adjust as needed
sequences, engine_ids, cycle_ids = generate_rolling_windows(df, 'engine_id', feature_cols, window_size)

print("Shape of rolling window sequences:", sequences.shape)  # (num_sequences, window_size, num_features)
print("Example sequence shape:", sequences[0].shape)

Shape of rolling window sequences: (17631, 30, 66)
Example sequence shape: (30, 66)


In [8]:
# Print the first sequence info
print(f"Engine ID: {engine_ids[0]}, Cycle: {cycle_ids[0]}")
print("Sequence data for first time window (shape {}):".format(sequences[0].shape))
print(sequences[0])

Engine ID: 1, Cycle: 31.0
Sequence data for first time window (shape (30, 66)):
[[ 1.90000000e-03 -3.00000000e-04  1.00000000e+02 ... -1.53478503e+00
   1.40421343e+00 -2.63075241e+00]
 [-4.30000000e-03  3.00000000e-04  1.00000000e+02 ... -1.16619245e+00
   1.12379400e+00 -5.99129117e-01]
 [ 7.00000000e-04  0.00000000e+00  1.00000000e+02 ... -5.47097938e-01
   1.06460040e+00 -9.23457744e-01]
 ...
 [ 1.20000000e-03 -1.00000000e-04  1.00000000e+02 ... -1.06697113e+00
   1.02930247e+00  4.59904157e-01]
 [-2.20000000e-03  0.00000000e+00  1.00000000e+02 ... -6.06104727e-01
   9.90458379e-01  3.42825323e-01]
 [ 1.40000000e-03  5.00000000e-04  1.00000000e+02 ... -1.31993488e+00
   8.16751090e-01  3.47734290e-01]]


In [9]:
# Check that sequence length matches window size
assert sequences.shape[1] == window_size, "Sequence window length mismatch"

# Check that sequences are ordered by cycle (manual inspection example)
assert all(cycle_ids[i] > cycle_ids[i-1] or engine_ids[i] != engine_ids[i-1] for i in range(1, len(cycle_ids))), "Cycle order violation"

print("Basic validation checks passed.")

Basic validation checks passed.


In [10]:
# Save sequences and metadata for modeling
np.save('rolling_window_sequences.npy', sequences)
pd.DataFrame({'engine_id': engine_ids, 'cycle': cycle_ids}).to_csv('sequence_metadata.csv', index=False)

**RUL Target Computation & Documentation**

In [11]:
import pandas as pd
import numpy as np

In [12]:
# 1. Load rolling windows sequence (NumPy array)
sequences = np.load('rolling_window_sequences.npy')
print(f"Loaded rolling window sequences with shape: {sequences.shape}")

Loaded rolling window sequences with shape: (17631, 30, 66)


In [13]:
# 2. Load sequence metadata (engine_id, cycle)
metadata_df = pd.read_csv('sequence_metadata.csv')
print(f"Loaded sequence metadata with shape: {metadata_df.shape}")
metadata_df.head()

Loaded sequence metadata with shape: (17631, 2)


Unnamed: 0,engine_id,cycle
0,1,31.0
1,1,32.0
2,1,33.0
3,1,34.0
4,1,35.0


In [14]:
# 3. Calculate Remaining Useful Life (RUL) per sequence

# Find max cycle per engine in metadata
max_cycle_per_engine = metadata_df.groupby('engine_id')['cycle'].max().reset_index()
max_cycle_per_engine.columns = ['engine_id', 'max_cycle']
max_cycle_per_engine.head()

Unnamed: 0,engine_id,max_cycle
0,1,192.0
1,2,287.0
2,3,179.0
3,4,189.0
4,5,269.0


In [15]:
# Merge max cycle info with metadata
metadata_df = metadata_df.merge(max_cycle_per_engine, on='engine_id', how='left')
metadata_df.head()

Unnamed: 0,engine_id,cycle,max_cycle
0,1,31.0,192.0
1,1,32.0,192.0
2,1,33.0,192.0
3,1,34.0,192.0
4,1,35.0,192.0


In [16]:
# Compute RUL
metadata_df['RUL'] = metadata_df['max_cycle'] - metadata_df['cycle']

In [17]:
metadata_df.head()

Unnamed: 0,engine_id,cycle,max_cycle,RUL
0,1,31.0,192.0,161.0
1,1,32.0,192.0,160.0
2,1,33.0,192.0,159.0
3,1,34.0,192.0,158.0
4,1,35.0,192.0,157.0


In [18]:
# Optional: Cap RUL values
rul_cap = 130
metadata_df['RUL'] = metadata_df['RUL'].clip(upper=rul_cap)

# Drop max_cycle column if not needed further
metadata_df.drop(columns=['max_cycle'], inplace=True)

metadata_df.head()


Unnamed: 0,engine_id,cycle,RUL
0,1,31.0,130.0
1,1,32.0,130.0
2,1,33.0,130.0
3,1,34.0,130.0
4,1,35.0,130.0


In [19]:
# 4. Validate by inspecting example engine(s)
engines_to_check = metadata_df['engine_id'].unique()[:3]  # first 3 engines
for engine in engines_to_check:
    sample = metadata_df[metadata_df['engine_id'] == engine][['cycle', 'RUL']]
    print(f"\nEngine {engine} - Cycles and RULs:")
    print(sample.head(10))
    print(sample.tail(10))


Engine 1 - Cycles and RULs:
   cycle    RUL
0   31.0  130.0
1   32.0  130.0
2   33.0  130.0
3   34.0  130.0
4   35.0  130.0
5   36.0  130.0
6   37.0  130.0
7   38.0  130.0
8   39.0  130.0
9   40.0  130.0
     cycle  RUL
152  183.0  9.0
153  184.0  8.0
154  185.0  7.0
155  186.0  6.0
156  187.0  5.0
157  188.0  4.0
158  189.0  3.0
159  190.0  2.0
160  191.0  1.0
161  192.0  0.0

Engine 2 - Cycles and RULs:
     cycle    RUL
162   31.0  130.0
163   32.0  130.0
164   33.0  130.0
165   34.0  130.0
166   35.0  130.0
167   36.0  130.0
168   37.0  130.0
169   38.0  130.0
170   39.0  130.0
171   40.0  130.0
     cycle  RUL
409  278.0  9.0
410  279.0  8.0
411  280.0  7.0
412  281.0  6.0
413  282.0  5.0
414  283.0  4.0
415  284.0  3.0
416  285.0  2.0
417  286.0  1.0
418  287.0  0.0

Engine 3 - Cycles and RULs:
     cycle    RUL
419   31.0  130.0
420   32.0  130.0
421   33.0  130.0
422   34.0  130.0
423   35.0  130.0
424   36.0  130.0
425   37.0  130.0
426   38.0  130.0
427   39.0  130.0
428   4

In [20]:
# 5. Save updated metadata with RUL target
metadata_df.to_csv('sequence_metadata_with_RUL.csv', index=False)
print("Saved updated metadata with RUL as 'sequence_metadata_with_RUL.csv'")

Saved updated metadata with RUL as 'sequence_metadata_with_RUL.csv'
