In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Load dataset
df = pd.read_csv('/content/updated_lstm_with_300_failures.csv')  # Upload this file in Colab
df.head()


Unnamed: 0,timestamp,machine_id,vibration,temperature,pressure,acoustic,flow_rate,RUL,failure
0,2024-01-01 00:00:00,M1,0.545,72.62,101.48,54.61,28.36,91,0
1,2024-01-01 01:00:00,M1,0.362,74.24,85.06,43.76,29.77,90,0
2,2024-01-01 02:00:00,M1,0.591,69.95,102.53,50.4,30.87,89,0
3,2024-01-01 03:00:00,M1,0.597,77.82,95.77,47.32,28.92,88,0
4,2024-01-01 04:00:00,M1,0.688,74.78,95.21,55.44,30.79,87,0


In [None]:
# Drop timestamp and machine_id for LSTM input
df.drop(columns=['timestamp', 'machine_id'], inplace=True)

# Sensor columns
sensor_columns = ['vibration', 'temperature', 'pressure', 'acoustic', 'flow_rate']

# Normalize sensor values
scaler = MinMaxScaler()
df[sensor_columns] = scaler.fit_transform(df[sensor_columns])
df.head()


Unnamed: 0,vibration,temperature,pressure,acoustic,flow_rate,RUL,failure
0,0.532723,0.421879,0.544313,0.599649,0.41977,91,0
1,0.293194,0.461,0.344142,0.327309,0.48227,90,0
2,0.592932,0.357402,0.557113,0.493976,0.531028,89,0
3,0.600785,0.547452,0.474704,0.416667,0.444592,88,0
4,0.719895,0.47404,0.467878,0.620482,0.527482,87,0


In [None]:
df.head()

Unnamed: 0,vibration,temperature,pressure,acoustic,flow_rate,RUL,failure
0,0.532723,0.421879,0.544313,0.599649,0.41977,91,0
1,0.293194,0.461,0.344142,0.327309,0.48227,90,0
2,0.592932,0.357402,0.557113,0.493976,0.531028,89,0
3,0.600785,0.547452,0.474704,0.416667,0.444592,88,0
4,0.719895,0.47404,0.467878,0.620482,0.527482,87,0


In [None]:
import pandas as pd

# Load the dataset
df = pd.read_csv("/content/updated_lstm_with_300_failures.csv")

# Clean up column names (in case of any extra whitespace)
df.columns = df.columns.str.strip()

# Filter for rows where failure == 1
failure_entries = df[df['failure'] == 1]

# Now count how many of those belong to machine_id 'M1'
machine1_failure_count = failure_entries[failure_entries['machine_id'] == 'M3'].shape[0]

print("Failures for machine_id M3:", machine1_failure_count)


Failures for machine_id M3: 100


In [None]:
# Function to create sliding window sequences
def create_sequences(data, target_col, window_size):
    X, y = [], []
    for i in range(len(data) - window_size):
        window = data.iloc[i:i + window_size]
        X.append(window[sensor_columns].values)
        y.append(window[target_col].values[-1])  # RUL of last row in window
    return np.array(X), np.array(y)

# Define window size
window_size = 30

# Generate sequences for regression (RUL)
X_rul, y_rul = create_sequences(df, target_col='RUL', window_size=window_size)

# Split data
X_rul_train, X_rul_test, y_rul_train, y_rul_test = train_test_split(X_rul, y_rul, test_size=0.2, random_state=42)


In [None]:
# Create sequences for binary classification
X_fail, y_fail = create_sequences(df, target_col='failure', window_size=window_size)

# Split data
X_fail_train, X_fail_test, y_fail_train, y_fail_test = train_test_split(X_fail, y_fail, test_size=0.2, random_state=42)


In [None]:
# Define LSTM classification model
model_fail = Sequential([
    LSTM(64, return_sequences=True, input_shape=(window_size, len(sensor_columns))),
    Dropout(0.2),
    LSTM(32),
    Dense(1, activation='sigmoid')  # Output: binary classification
])

model_fail.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model_fail.summary()

# Train model
history_fail = model_fail.fit(X_fail_train, y_fail_train, validation_data=(X_fail_test, y_fail_test), epochs=10, batch_size=32)


  super().__init__(**kwargs)


Epoch 1/10
[1m657/657[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 42ms/step - accuracy: 0.9885 - loss: 0.0944 - val_accuracy: 0.9901 - val_loss: 0.0556
Epoch 2/10
[1m657/657[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 32ms/step - accuracy: 0.9891 - loss: 0.0605 - val_accuracy: 0.9901 - val_loss: 0.0556
Epoch 3/10
[1m657/657[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 33ms/step - accuracy: 0.9882 - loss: 0.0644 - val_accuracy: 0.9901 - val_loss: 0.0557
Epoch 4/10
[1m657/657[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 35ms/step - accuracy: 0.9886 - loss: 0.0625 - val_accuracy: 0.9901 - val_loss: 0.0558
Epoch 5/10
[1m657/657[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 35ms/step - accuracy: 0.9855 - loss: 0.0759 - val_accuracy: 0.9901 - val_loss: 0.0557
Epoch 6/10
[1m657/657[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 34ms/step - accuracy: 0.9873 - loss: 0.0688 - val_accuracy: 0.9901 - val_loss: 0.0557
Epoch 7/10
[1m6

In [None]:
test_loss, test_accuracy = model_fail.evaluate(X_fail_test, y_fail_test)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")


[1m165/165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.9908 - loss: 0.0532
Test Loss: 0.0561339408159256
Test Accuracy: 0.9900952577590942


In [None]:
# Predict probabilities
y_pred_prob = model_fail.predict(X_fail_test)

# Convert probabilities to binary class (threshold at 0.5)
y_pred_class = (y_pred_prob > 0.5).astype(int)

# Print example output
print(y_pred_class[:20].flatten())  # First 10 predictions


[1m165/165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
