             Anomaly Detection using Unsupervised Methods in wood category of MVTec dataset

In [2]:
# Shuffle training data
def shuffle_data(data):
    idx = np.arange(data.shape[0])
    np.random.shuffle(idx)
    return data[idx]

train_images = shuffle_data(train_images)
test_images = shuffle_data(test_images)
test_labels = shuffle_data(test_labels)



In [3]:
# HOG Feature Extraction
def extract_hog_features(images):
    hog_features = []
    for image in images:
        # Convert image to grayscale
        gray_image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        
        # Resize image to 128x128
        resized_image = cv2.resize(gray_image, (128, 128))
        
        # Extract HOG features
        fd, _ = hog(resized_image, orientations=8, pixels_per_cell=(16, 16),
                    cells_per_block=(1, 1), visualize=True)
        hog_features.append(fd)
    return np.array(hog_features)

# Extract HOG features for training and testing images
train_images_hog = extract_hog_features(train_images)
test_images_hog = extract_hog_features(test_images)


print("\nExtracted features data shapes:")
print("Train images HOG:", train_images_hog.shape)
print("Test images HOG:", test_images_hog.shape)




Extracted features data shapes:
Train images HOG: (264, 512)
Test images HOG: (78, 512)


In [4]:
import numpy as np
import torch
import torchvision.models as models
from torchvision import transforms
from torchvision.models import VGG16_Weights

# Initialing compute device (use GPU if available).
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# Normalization parameters for VGG16
normalization_std = [0.229, 0.224, 0.225]
normalization_mean = [0.485, 0.456, 0.406]

# Image preprocessing
loader = transforms.Compose([
    transforms.ToTensor(),
    transforms.RandomResizedCrop(128),
    transforms.Normalize(mean=normalization_mean, std=normalization_std)
])

# PyTorch VGG16 Feature Extraction
def extract_vgg16_features(images):
    model = models.vgg16(weights=VGG16_Weights).features.to(device)
    vgg16_features = []
    for image in images:
        img = loader(image).unsqueeze(0).to(device)
        feature = model(img).data.detach().cpu().numpy().flatten()
        vgg16_features.append(feature)
    return np.array(vgg16_features)

# Extract VGG16 features for both normal and anomalous images
train_images_vgg16 = extract_vgg16_features(train_images)
test_images_vgg16 = extract_vgg16_features(test_images)

# Print the extracted features by VGG16
print("\nExtracted features data shapes:")
print("Extracted_features_of_train_images_vgg16:", train_images_vgg16.shape)
print("Extracted_features_of_test_images_vgg16:", test_images_vgg16.shape)


Extracted features data shapes:
Extracted_features_of_train_images_vgg16: (264, 8192)
Extracted_features_of_test_images_vgg16: (78, 8192)


In [5]:

# Normalize data
def min_max_scaling(data):
    min_val = np.min(data)
    max_val = np.max(data)
    scaled_data = (data - min_val) / (max_val - min_val)
    return scaled_data

normalized_train_images = min_max_scaling(train_images_hog)
normalized_test_images = min_max_scaling(test_images_hog)

normalized_train_images_vgg16 = min_max_scaling(train_images_vgg16)
normalized_test_images_vgg16 = min_max_scaling(test_images_vgg16)

print("\nNormalized features of hog data shapes:")
print("Normalized train images of HOG:", normalized_train_images.shape)
print("Normalized test images of HOG:", normalized_test_images.shape)


print("\nNormalized features of VGG16 data shapes:")
print("normalized train_images using VGG16:", normalized_train_images_vgg16.shape)
print("normalized test_images using VGG16:", normalized_test_images_vgg16.shape)



Normalized features of hog data shapes:
Normalized train images of HOG: (264, 512)
Normalized test images of HOG: (78, 512)

Normalized features of VGG16 data shapes:
normalized train_images using VGG16: (264, 8192)
normalized test_images using VGG16: (78, 8192)


In [6]:
# Dimensionality reduction using PCA
def PCA(train_data, alpha=0.95):
    mean = np.mean(train_data, axis=0)
    centered_data = train_data - mean
    cov_matrix = np.dot(centered_data.T, centered_data)
    eig_values, eig_vectors = np.linalg.eigh(cov_matrix)
    idx = np.argsort(eig_values)[::-1]
    eig_values = eig_values[idx]
    eig_vectors = eig_vectors[:, idx]
    total = np.sum(eig_values)
    k = 0
    var = 0
    while var / total < alpha:
        var += eig_values[k]
        k += 1
    eig_vectors = eig_vectors[:, :k]
    return eig_vectors, mean

# Apply PCA on normalized training data
pca_components, mean = PCA(normalized_train_images)

# Project the normalized training and testing data onto the PCA components
train_projected_pca = np.dot(normalized_train_images - mean, pca_components)
test_projected_pca = np.dot(normalized_test_images - mean, pca_components)
print(f"\nPCA - Number of components retained: {pca_components.shape[1]}")

print("\nProjected data shapes after PCA:")
print("Projected train images:", train_projected_pca.shape)
print("Projected test images:", test_projected_pca.shape)


# Perform PCA on VGG16 features
space_pca_vgg16, mean_pca_vgg16 = PCA(normalized_train_images_vgg16)
train_projected_pca_vgg16 = np.dot(normalized_train_images_vgg16 - mean_pca_vgg16, space_pca_vgg16)
test_projected_pca_vgg16 = np.dot(normalized_test_images_vgg16 - mean_pca_vgg16, space_pca_vgg16)

print("\nReduction features of VGG16 data shapes:")
print("Reduction_train_images_PCA:", train_projected_pca_vgg16.shape)
print("Reduction_test_images_PCA:", test_projected_pca_vgg16.shape)



PCA - Number of components retained: 82

Projected data shapes after PCA:
Projected train images: (264, 82)
Projected test images: (78, 82)

Reduction features of VGG16 data shapes:
Reduction_train_images_PCA: (264, 110)
Reduction_test_images_PCA: (78, 110)


In [7]:
# Custom implementation of K-Means with convergence iteration tracking
def k_means(X, n_clusters, max_iters=100, tol=1e-4):
    n_samples, n_features = X.shape
    centroids = X[np.random.choice(n_samples, n_clusters, replace=False)]
    centroid_history = [centroids.copy()]  # Track centroid history
    for iter_ in range(max_iters):
        distances = np.linalg.norm(X[:, np.newaxis] - centroids, axis=2)
        labels = np.argmin(distances, axis=1)
        new_centroids = np.array([X[labels == k].mean(axis=0) for k in range(n_clusters)])
        centroid_history.append(new_centroids.copy())  # Track new centroids
        if np.linalg.norm(new_centroids - centroids) < tol:
            break
        centroids = new_centroids
    return labels, centroids, iter_ + 1, centroid_history


In [None]:

import numpy as np
import cv2
import os
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances_argmin_min

# Function to load images and convert to grayscale
def load_images_from_folder(folder):
    images = []
    for filename in os.listdir(folder):
        img = cv2.imread(os.path.join(folder, filename))
        if img is not None:
            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            images.append(gray)
    return images

# Function to extract HOG features from images
def extract_hog_features(images):
    hog = cv2.HOGDescriptor()
    features = [hog.compute(image).flatten() for image in images]
    return np.array(features)

# Load normal images and extract features
normal_images = load_images_from_folder('path_to_normal_images')
normal_features = extract_hog_features(normal_images)

# Train k-means on normal features
kmeans = KMeans(n_clusters=5, random_state=0).fit(normal_features)

# Load mixed images (normal + anomalies) and extract features
mixed_images = load_images_from_folder('path_to_mixed_images')
mixed_features = extract_hog_features(mixed_images)

# Predict the nearest cluster for each mixed feature
closest_clusters, distances = pairwise_distances_argmin_min(mixed_features, kmeans.cluster_centers_)

# Define a threshold for anomalies
threshold = np.percentile(distances, 95)  # 95th percentile as an example

# Identify anomalies
anomalies = distances > threshold

# Print results
for i, is_anomaly in enumerate(anomalies):
    if is_anomaly:
        print(f"Image {i} is an anomaly.")
    else:
        print(f"Image {i} is normal.")


In [None]:

from sklearn.cluster import DBSCAN

# DBSCAN for anomaly detection
def dbscan_anomaly_detection(normal_features, mixed_features, eps=0.5, min_samples=5):
    dbscan = DBSCAN(eps=eps, min_samples=min_samples).fit(normal_features)
    mixed_labels = dbscan.fit_predict(mixed_features)
    anomalies = mixed_labels == -1
    return anomalies

# DBSCAN Anomaly Detection
anomalies_dbscan = dbscan_anomaly_detection(normal_features, mixed_features)

# Print results for DBSCAN
print("DBSCAN Results:")
for i, is_anomaly in enumerate(anomalies_dbscan):
    if is_anomaly:
        print(f"Image {i} is an anomaly.")
    else:
        print(f"Image {i} is normal.")


In [None]:

from sklearn.mixture import GaussianMixture

# GMM for anomaly detection
def gmm_anomaly_detection(normal_features, mixed_features, n_components=5, threshold=0.01):
    gmm = GaussianMixture(n_components=n_components, random_state=0).fit(normal_features)
    log_likelihood = gmm.score_samples(mixed_features)
    anomalies = log_likelihood < np.log(threshold)
    return anomalies

# GMM Anomaly Detection
anomalies_gmm = gmm_anomaly_detection(normal_features, mixed_features)

# Print results for GMM
print("GMM Results:")
for i, is_anomaly in enumerate(anomalies_gmm):
    if is_anomaly:
        print(f"Image {i} is an anomaly.")
    else:
        print(f"Image {i} is normal.")
