In [2]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset

# Step 1: Load the dataset
df = pd.read_csv('prepared_dataset.csv')

# Step 2: Preprocess the data (for simplicity, assuming the dataset has a numeric feature column)
# Example: We'll normalize all columns except the 'target' column.

# Normalize the features
scaler = MinMaxScaler()
features = df.drop(columns=['will_fail']).values
target = df['will_fail'].values

scaled_features = scaler.fit_transform(features)

# Step 3: Prepare the data for LSTM (creating sequences for time series problems)
def create_sequences(data, target, seq_length=30):
    sequences = []
    labels = []
    for i in range(len(data) - seq_length):
        sequences.append(data[i:i+seq_length])
        labels.append(target[i+seq_length])
    return np.array(sequences), np.array(labels)

# Create sequences
X, y = create_sequences(scaled_features, target)

# Step 4: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

# Step 5: Create DataLoader for batching
train_data = TensorDataset(X_train_tensor, y_train_tensor)
test_data = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader = DataLoader(test_data, batch_size=64, shuffle=False)

# Step 6: Define the LSTM model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_layer_size, output_size):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_layer_size, batch_first=True)
        self.fc = nn.Linear(hidden_layer_size, output_size)

    def forward(self, x):
        lstm_out, (hn, cn) = self.lstm(x)
        out = self.fc(lstm_out[:, -1, :])  # We use the output of the last time step
        return out

# Step 7: Initialize the model, loss function, and optimizer
input_size = X_train.shape[2]  # Number of features
hidden_layer_size = 64
output_size = 1  # Assuming regression or single target prediction
model = LSTMModel(input_size, hidden_layer_size, output_size)

# Loss and optimizer
criterion = nn.MSELoss()  # Use cross entropy loss for classification
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Step 8: Train the model
num_epochs = 20
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        y_pred = model(batch_X)
        loss = criterion(y_pred.squeeze(), batch_y)  # Squeeze to match shapes
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss/len(train_loader)}")

# Step 9: Evaluate the model
model.eval()
with torch.no_grad():
    test_loss = 0
    for batch_X, batch_y in test_loader:
        y_pred = model(batch_X)
        loss = criterion(y_pred.squeeze(), batch_y)
        test_loss += loss.item()

    print(f"Test Loss: {test_loss/len(test_loader)}")

# You can also implement additional evaluation metrics like R^2, accuracy, etc.


Epoch 1/20, Loss: 0.1964153406512031
Epoch 2/20, Loss: 0.18696593103820022
Epoch 3/20, Loss: 0.18565114330539303
Epoch 4/20, Loss: 0.18280902193398427
Epoch 5/20, Loss: 0.18237417653358773
Epoch 6/20, Loss: 0.18123335666461274
Epoch 7/20, Loss: 0.18029561252024948
Epoch 8/20, Loss: 0.17826775783341936
Epoch 9/20, Loss: 0.17916872087567526
Epoch 10/20, Loss: 0.17774937875594826
Epoch 11/20, Loss: 0.17683998822439007
Epoch 12/20, Loss: 0.1752000514690469
Epoch 13/20, Loss: 0.17467091066392873
Epoch 14/20, Loss: 0.17359068127874713
Epoch 15/20, Loss: 0.17311766678759447
Epoch 16/20, Loss: 0.17165864329097164
Epoch 17/20, Loss: 0.17146992358908006
Epoch 18/20, Loss: 0.17035381612773556
Epoch 19/20, Loss: 0.16950339115247492
Epoch 20/20, Loss: 0.16878650446490542
Test Loss: 0.22497546155419615


In [3]:
from sklearn.metrics import classification_report

model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for batch_X, batch_y in test_loader:
        logits = model(batch_X)
        predicted = torch.argmax(logits, dim=1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(batch_y.cpu().numpy())

# Print classification report
report = classification_report(all_labels, all_preds)
print("\n📋 Classification Report:")
print(report)



📋 Classification Report:
              precision    recall  f1-score   support

         0.0       0.71      1.00      0.83      3251
         1.0       0.00      0.00      0.00      1333

    accuracy                           0.71      4584
   macro avg       0.35      0.50      0.41      4584
weighted avg       0.50      0.71      0.59      4584



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
