In [1]:
import numpy as np

# Path to your CSV file
path_train = 'data/train_data.npy'
path_test = 'data/test_data.npy'
path_test_label = 'data/test_labels.npy'

# Read the CSV file into a DataFrame
train = np.load(path_train)
test = np.load(path_test)
test_labels = np.load(path_test_label)

In [4]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras import regularizers
import matplotlib.pyplot as plt

# Load the data
data = np.load('path/to/your/data.npy')
labels = np.load('path/to/your/labels.npy')

# Scale the data
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data)

# Split into training (normal data) and testing sets
X_train, X_test, y_train, y_test = train_test_split(data_scaled, labels, test_size=0.2, random_state=42)
X_train = X_train[y_train == 0]  # Train only on normal data

# Define the autoencoder
input_dim = X_train.shape[1]
encoding_dim = 14  # You can adjust this
input_layer = Input(shape=(input_dim,))
encoder = Dense(encoding_dim, activation='relu', activity_regularizer=regularizers.l1(10e-5))(input_layer)
encoder = Dense(int(encoding_dim / 2), activation='relu')(encoder)
encoder = Dense(int(encoding_dim / 4), activation='relu')(encoder)
decoder = Dense(int(encoding_dim / 2), activation='relu')(encoder)
decoder = Dense(encoding_dim, activation='relu')(decoder)
decoder = Dense(input_dim, activation='sigmoid')(decoder)

autoencoder = Model(inputs=input_layer, outputs=decoder)
autoencoder.compile(optimizer='adam', loss='mse')

# Train the autoencoder
history = autoencoder.fit(X_train, X_train, epochs=50, batch_size=32, shuffle=True, validation_split=0.1, verbose=1)

# Plot training history
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Autoencoder Training History')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

# Use the autoencoder to reconstruct the test data
X_test_pred = autoencoder.predict(X_test)

# Calculate reconstruction error
mse = np.mean(np.power(X_test - X_test_pred, 2), axis=1)

# Set a threshold for anomaly detection (e.g., 95th percentile)
threshold = np.percentile(mse, 95)

# Identify anomalies
anomalies = mse > threshold

# Evaluate the model
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

accuracy = accuracy_score(y_test, anomalies)
precision = precision_score(y_test, anomalies)
recall = recall_score(y_test, anomalies)
f1 = f1_score(y_test, anomalies)

print(f"Number of total samples: {len(X_test)}")
print(f"Number of predicted anomalies: {np.sum(anomalies)}")
print(f"Number of actual anomalies: {np.sum(y_test)}")
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-score: {f1:.2f}")


ModuleNotFoundError: No module named 'tensorflow'

In [5]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import matplotlib.pyplot as plt

# Load the data
data = np.load('path/to/your/data.npy')
labels = np.load('path/to/your/labels.npy')

# Scale the data
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data)

# Split into training (normal data) and testing sets
X_train, X_test, y_train, y_test = train_test_split(data_scaled, labels, test_size=0.2, random_state=42)
X_train = X_train[y_train == 0]  # Train only on normal data

# Convert data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

# Define the autoencoder model
class Autoencoder(nn.Module):
    def __init__(self, input_dim):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 14),
            nn.ReLU(),
            nn.Linear(14, 7),
            nn.ReLU(),
            nn.Linear(7, 3),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(3, 7),
            nn.ReLU(),
            nn.Linear(7, 14),
            nn.ReLU(),
            nn.Linear(14, input_dim),
            nn.Sigmoid()
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

# Instantiate the model, define the loss function and optimizer
input_dim = X_train.shape[1]
model = Autoencoder(input_dim)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the autoencoder
num_epochs = 50
batch_size = 32

for epoch in range(num_epochs):
    permutation = torch.randperm(X_train.size()[0])
    for i in range(0, X_train.size()[0], batch_size):
        indices = permutation[i:i + batch_size]
        batch_x = X_train[indices]

        # Forward pass
        outputs = model(batch_x)
        loss = criterion(outputs, batch_x)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')

# Use the autoencoder to reconstruct the test data
X_test_pred = model(X_test).detach().numpy()

# Calculate reconstruction error
mse = np.mean((X_test.numpy() - X_test_pred) ** 2, axis=1)

# Set a threshold for anomaly detection (e.g., 95th percentile)
threshold = np.percentile(mse, 95)

# Identify anomalies
anomalies = mse > threshold

# Evaluate the model
accuracy = accuracy_score(y_test.numpy(), anomalies)
precision = precision_score(y_test.numpy(), anomalies)
recall = recall_score(y_test.numpy(), anomalies)
f1 = f1_score(y_test.numpy(), anomalies)

print(f"Number of total samples: {len(X_test)}")
print(f"Number of predicted anomalies: {np.sum(anomalies)}")
print(f"Number of actual anomalies: {np.sum(y_test.numpy())}")
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-score: {f1:.2f}")


ModuleNotFoundError: No module named 'torch'