In [1]:
import numpy as np
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from sklearn.svm import OneClassSVM
from sklearn.cluster import KMeans, DBSCAN
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score, accuracy_score

# Configuration
BATCH_SIZE = 128
NORMAL_CLASS = 0
EPOCHS = 10
LATENT_DIM = 16
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Data Preprocessing
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_data = torchvision.datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_data = torchvision.datasets.MNIST(root='./data', train=False, transform=transform, download=True)

def filter_normal(dataset, normal_class):
    imgs, labels = [], []
    for img, label in dataset:
        imgs.append(img)
        labels.append(int(label == normal_class))  # 1 for normal, 0 for anomaly
    return torch.stack(imgs), torch.tensor(labels)

train_imgs, _ = filter_normal(train_data, NORMAL_CLASS)
test_imgs, test_labels = filter_normal(test_data, NORMAL_CLASS)

train_loader = torch.utils.data.DataLoader(train_imgs, batch_size=BATCH_SIZE, shuffle=True)

# Convolutional Autoencoder
class Autoencoder(nn.Module):
    def __init__(self, latent_dim):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 16, 3, stride=2, padding=1),  # [16, 14, 14]
            nn.ReLU(),
            nn.Conv2d(16, 4, 3, stride=2, padding=1),  # [4, 7, 7]
            nn.ReLU(),
            nn.Flatten(),
            nn.Linear(4 * 7 * 7, latent_dim)
        )
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 4 * 7 * 7),
            nn.Unflatten(1, (4, 7, 7)),
            nn.ConvTranspose2d(4, 16, 3, stride=2, padding=1, output_padding=1),  # [16, 14, 14]
            nn.ReLU(),
            nn.ConvTranspose2d(16, 1, 3, stride=2, padding=1, output_padding=1),  # [1, 28, 28]
            nn.Sigmoid()
        )

    def forward(self, x):
        z = self.encoder(x)
        x_recon = self.decoder(z)
        return x_recon

    def encode(self, x):
        return self.encoder(x)

model = Autoencoder(latent_dim=LATENT_DIM).to(DEVICE)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

# Training
for epoch in range(EPOCHS):
    model.train()
    total_loss = 0
    for batch in train_loader:
        batch = batch.to(DEVICE)
        optimizer.zero_grad()
        recon = model(batch)
        loss = criterion(recon, batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}/{EPOCHS}, Loss: {total_loss / len(train_loader):.4f}")

# Feature Extraction
model.eval()
with torch.no_grad():
    train_features = model.encode(train_imgs.to(DEVICE)).cpu().numpy()
    test_features = model.encode(test_imgs.to(DEVICE)).cpu().numpy()

# Standardize Features
scaler = StandardScaler()
train_features = scaler.fit_transform(train_features)
test_features = scaler.transform(test_features)

# Clustering in Latent Space
# K-Means
kmeans = KMeans(n_clusters=2, init='k-means++', random_state=42)
kmeans.fit(train_features)
kmeans_preds = kmeans.predict(test_features)

# DBSCAN
dbscan = DBSCAN(eps=0.5, min_samples=5)
dbscan_preds = dbscan.fit_predict(test_features)

# One-Class SVM
oc_svm = OneClassSVM(kernel='rbf', nu=0.1, gamma='scale')
oc_svm.fit(train_features)
svm_preds = oc_svm.decision_function(test_features)

# Evaluation
def evaluate(preds, true_labels):
    preds = np.where(preds == -1, 0, 1)  # Map -1 to 0 for anomalies
    acc = accuracy_score(true_labels, preds)
    auc = roc_auc_score(true_labels, preds)
    return acc, auc

kmeans_acc, kmeans_auc = evaluate(kmeans_preds, test_labels.numpy())
dbscan_acc, dbscan_auc = evaluate(dbscan_preds, test_labels.numpy())
svm_acc, svm_auc = evaluate(svm_preds, test_labels.numpy())

print(f"K-Means++ Accuracy: {kmeans_acc:.4f}, AUC: {kmeans_auc:.4f}")
print(f"DBSCAN Accuracy: {dbscan_acc:.4f}, AUC: {dbscan_auc:.4f}")
print(f"One-Class SVM Accuracy: {svm_acc:.4f}, AUC: {svm_auc:.4f}")


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9.91M/9.91M [00:00<00:00, 20.5MB/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28.9k/28.9k [00:00<00:00, 1.37MB/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz





Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1.65M/1.65M [00:00<00:00, 9.57MB/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4.54k/4.54k [00:00<00:00, 6.98MB/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw

Epoch 1/10, Loss: 1.0305
Epoch 2/10, Loss: 0.9254
Epoch 3/10, Loss: 0.9254
Epoch 4/10, Loss: 0.9254
Epoch 5/10, Loss: 0.9254
Epoch 6/10, Loss: 0.9254
Epoch 7/10, Loss: 0.9254
Epoch 8/10, Loss: 0.9254
Epoch 9/10, Loss: 0.9254
Epoch 10/10, Loss: 0.9254
K-Means++ Accuracy: 0.0980, AUC: 0.5000
DBSCAN Accuracy: 0.1103, AUC: 0.5018
One-Class SVM Accuracy: 0.0980, AUC: 0.5000
