<a href="https://colab.research.google.com/github/TrickXer/LSTM---Predictive-Maintenance-model/blob/main/LSTM_Preventive_Maintenance.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cpu'

In [3]:
data = pd.read_csv('/content/predictive_maintenance.csv')
data.dropna()
data.head()

Unnamed: 0,UDI,Product ID,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Target,Failure Type
0,1,M14860,M,298.1,308.6,1551,42.8,0,0,No Failure
1,2,L47181,L,298.2,308.7,1408,46.3,3,0,No Failure
2,3,L47182,L,298.1,308.5,1498,49.4,5,0,No Failure
3,4,L47183,L,298.2,308.6,1433,39.5,7,0,No Failure
4,5,L47184,L,298.2,308.7,1408,40.0,9,0,No Failure


In [4]:
features = data[['Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]']]
features.head()

Unnamed: 0,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min]
0,298.1,308.6,1551,42.8,0
1,298.2,308.7,1408,46.3,3
2,298.1,308.5,1498,49.4,5
3,298.2,308.6,1433,39.5,7
4,298.2,308.7,1408,40.0,9


In [5]:
timestamps = pd.date_range(start='2023-01-01', periods=len(features), freq='H', name='Timestamps [hrs]')
timestamps = pd.to_datetime(timestamps, errors='coerce')
timestamps

DatetimeIndex(['2023-01-01 00:00:00', '2023-01-01 01:00:00',
               '2023-01-01 02:00:00', '2023-01-01 03:00:00',
               '2023-01-01 04:00:00', '2023-01-01 05:00:00',
               '2023-01-01 06:00:00', '2023-01-01 07:00:00',
               '2023-01-01 08:00:00', '2023-01-01 09:00:00',
               ...
               '2024-02-21 06:00:00', '2024-02-21 07:00:00',
               '2024-02-21 08:00:00', '2024-02-21 09:00:00',
               '2024-02-21 10:00:00', '2024-02-21 11:00:00',
               '2024-02-21 12:00:00', '2024-02-21 13:00:00',
               '2024-02-21 14:00:00', '2024-02-21 15:00:00'],
              dtype='datetime64[ns]', name='Timestamps [hrs]', length=10000, freq='H')

In [6]:
features.loc[:, 'Timestamps [hrs]'] = timestamps
features = features.set_index('Timestamps [hrs]')
features.head(11)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  features.loc[:, 'Timestamps [hrs]'] = timestamps


Unnamed: 0_level_0,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min]
Timestamps [hrs],Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-01-01 00:00:00,298.1,308.6,1551,42.8,0
2023-01-01 01:00:00,298.2,308.7,1408,46.3,3
2023-01-01 02:00:00,298.1,308.5,1498,49.4,5
2023-01-01 03:00:00,298.2,308.6,1433,39.5,7
2023-01-01 04:00:00,298.2,308.7,1408,40.0,9
2023-01-01 05:00:00,298.1,308.6,1425,41.9,11
2023-01-01 06:00:00,298.1,308.6,1558,42.4,14
2023-01-01 07:00:00,298.1,308.6,1527,40.2,16
2023-01-01 08:00:00,298.3,308.7,1667,28.6,18
2023-01-01 09:00:00,298.5,309.0,1741,28.0,21


In [7]:
def create_sequences(df, seq_length):
    X, y = [], []

    for i in range(len(df) - seq_length):
        seq = df.iloc[i:i+seq_length].values

        X.append(seq)
        y.append(df.iloc[i+seq_length].values)

    return np.array(X), np.array(y)

In [8]:
seq_length = 10

X, y = create_sequences(features, seq_length)
X, y

(array([[[ 298.1,  308.6, 1551. ,   42.8,    0. ],
         [ 298.2,  308.7, 1408. ,   46.3,    3. ],
         [ 298.1,  308.5, 1498. ,   49.4,    5. ],
         ...,
         [ 298.1,  308.6, 1527. ,   40.2,   16. ],
         [ 298.3,  308.7, 1667. ,   28.6,   18. ],
         [ 298.5,  309. , 1741. ,   28. ,   21. ]],
 
        [[ 298.2,  308.7, 1408. ,   46.3,    3. ],
         [ 298.1,  308.5, 1498. ,   49.4,    5. ],
         [ 298.2,  308.6, 1433. ,   39.5,    7. ],
         ...,
         [ 298.3,  308.7, 1667. ,   28.6,   18. ],
         [ 298.5,  309. , 1741. ,   28. ,   21. ],
         [ 298.4,  308.9, 1782. ,   23.9,   24. ]],
 
        [[ 298.1,  308.5, 1498. ,   49.4,    5. ],
         [ 298.2,  308.6, 1433. ,   39.5,    7. ],
         [ 298.2,  308.7, 1408. ,   40. ,    9. ],
         ...,
         [ 298.5,  309. , 1741. ,   28. ,   21. ],
         [ 298.4,  308.9, 1782. ,   23.9,   24. ],
         [ 298.6,  309.1, 1423. ,   44.3,   29. ]],
 
        ...,
 
        [[ 298.8

In [9]:
scaler_x = StandardScaler()
scaler_y = StandardScaler()

X = scaler_x.fit_transform(X.reshape(-1, X.shape[-1])).reshape(X.shape)
y = scaler_y.fit_transform(y.reshape(-1, y.shape[-1])).reshape(y.shape)

X.shape, y.shape

((9990, 10, 5), (9990, 5))

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

X_train.shape, y_train.shape, X_test.shape, y_test.shape

(torch.Size([7992, 10, 5]),
 torch.Size([7992, 5]),
 torch.Size([1998, 10, 5]),
 torch.Size([1998, 5]))

In [11]:
batch_size = 64

train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [12]:
class LSTMClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMClassifier, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=0.2)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])

        return out


input_size = X_train.shape[2]
hidden_size = 64
num_layers = 2
output_size = y_train.shape[1]

model = LSTMClassifier(input_size, hidden_size, num_layers, output_size)
model.to(device)
model

LSTMClassifier(
  (lstm): LSTM(5, 64, num_layers=2, batch_first=True, dropout=0.2)
  (fc): Linear(in_features=64, out_features=5, bias=True)
)

In [13]:
learning_rate = 1e-4

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5)

In [14]:
num_epochs = 100
model.train()

for epoch in range(num_epochs):
    epoch_loss = 0.0
    for inputs, targets in train_loader:
        # Move tensors to the configured device (CPU or GPU)
        inputs, targets = inputs.to(model.fc.weight.device), targets.to(model.fc.weight.device)

        # Forward pass
        outputs = model(inputs)

        # Regularization to avoid over-fitting
        l2_lambda = 5e-4
        l2_norm = sum(p.pow(2.0).sum() for p in model.parameters())
        loss = criterion(outputs, targets) + l2_lambda * l2_norm

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()

        # Gradient clipping
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)

        optimizer.step()

        epoch_loss += loss.item()

    epoch_loss /= len(train_loader)

    # Step the scheduler with validation loss (use epoch_loss if no separate validation loss)
    scheduler.step(epoch_loss)
    current_lr = scheduler.optimizer.param_groups[0]['lr']

    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {epoch_loss:.4f}')
        print(f'Current Learning Rate: {current_lr:.6f}')

Epoch [10/100], Loss: 0.5063
Current Learning Rate: 0.000100
Epoch [20/100], Loss: 0.4732
Current Learning Rate: 0.000100
Epoch [30/100], Loss: 0.4621
Current Learning Rate: 0.000100
Epoch [40/100], Loss: 0.4557
Current Learning Rate: 0.000100
Epoch [50/100], Loss: 0.4520
Current Learning Rate: 0.000100
Epoch [60/100], Loss: 0.4498
Current Learning Rate: 0.000100
Epoch [70/100], Loss: 0.4483
Current Learning Rate: 0.000100
Epoch [80/100], Loss: 0.4472
Current Learning Rate: 0.000100
Epoch [90/100], Loss: 0.4457
Current Learning Rate: 0.000100
Epoch [100/100], Loss: 0.4449
Current Learning Rate: 0.000100


In [15]:
def evaluate_model(model, test_loader, criterion):
    model.eval()  # Set model to evaluation mode
    test_loss = 0.0
    correct = 0
    total = 0

    test_losses = []

    with torch.no_grad():  # No need to calculate gradients during evaluation
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(model.fc.weight.device), targets.to(model.fc.weight.device)

            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            test_loss += loss.item()
            test_losses.append(loss.item())

    avg_loss = test_loss / len(test_loader)

    print(f'\nTest Loss: {avg_loss:.4f}')

evaluate_model(model, test_loader, criterion)


Test Loss: 0.4389


In [16]:
def save_model(model, path='lstm_model_v3.pth'):
    # Save the state dictionary of the model
    torch.save(model.state_dict(), path)
    print(f'Model saved to {path}')

# Save the model
save_model(model)

Model saved to lstm_model_v3.pth


In [19]:
model.eval()
sample_input = X_test[0].clone().detach().to(device).unsqueeze(0).float()

with torch.no_grad():
    output = model(sample_input)

output = output.cpu().numpy()
print(output)
output = scaler_y.inverse_transform(output.reshape(1, -1))[0]
output

[[ 1.0289869   0.6416063   0.05848524 -0.06847766  1.0259634 ]]


array([ 302.06506 ,  310.95892 , 1549.2816  ,   39.304176,  173.3077  ],
      dtype=float32)