# Downloading Mvtec Dataset

In [4]:
# !wget https://www.mydrive.ch/shares/38536/3830184030e49fe74747669442f0f282/download/420938113-1629952094/mvtec_anomaly_detection.tar.xz

--2024-06-04 18:24:53--  https://www.mydrive.ch/shares/38536/3830184030e49fe74747669442f0f282/download/420938113-1629952094/mvtec_anomaly_detection.tar.xz
Resolving www.mydrive.ch (www.mydrive.ch)... 91.214.169.64
Connecting to www.mydrive.ch (www.mydrive.ch)|91.214.169.64|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5264982680 (4.9G) [application/x-xz]
Saving to: ‘mvtec_anomaly_detection.tar.xz’


2024-06-04 18:27:30 (32.0 MB/s) - ‘mvtec_anomaly_detection.tar.xz’ saved [5264982680/5264982680]



In [10]:
import os
import shutil

In [9]:
# Define paths
mvtec_path = "/teamspace/studios/this_studio/Mvtec"
output_path = "/teamspace/studios/this_studio/differnet-master/Mvtec_dataset"


In [12]:
# List of all object classes in the Mvtec dataset
object_classes = [d for d in os.listdir(mvtec_path) if os.path.isdir(os.path.join(mvtec_path, d))]
print(f"classes inside of the Mvtec dataset: \n{object_classes} ")

classes inside of the Mvtec dataset: 
['toothbrush', 'transistor', 'carpet', 'zipper', 'capsule', 'metal_nut', 'tile', 'leather', 'hazelnut', 'screw', 'bottle', 'wood', 'cable', 'grid', 'pill'] 


In [13]:
# Dictionary to keep track of defect image counts
defect_image_counts = {}

for obj_class in object_classes:
    obj_path = os.path.join(mvtec_path, obj_class)

    # Create target directories for each class
    class_output_path = os.path.join(output_path, obj_class)
    os.makedirs(os.path.join(class_output_path, "train/good"), exist_ok=True)
    os.makedirs(os.path.join(class_output_path, "test/good"), exist_ok=True)
    os.makedirs(os.path.join(class_output_path, "test/anomaly"), exist_ok=True)

    # Initialize the defect image count for the current class
    defect_image_counts[obj_class] = 0

    # Copy training good images
    train_good_path = os.path.join(obj_path, "train/good")
    if os.path.exists(train_good_path):
        for img_name in os.listdir(train_good_path):
            src = os.path.join(train_good_path, img_name)
            dst = os.path.join(class_output_path, "train/good", img_name)
            shutil.copy(src, dst)

    # Copy testing good images
    test_good_path = os.path.join(obj_path, "test/good")
    if os.path.exists(test_good_path):
        for img_name in os.listdir(test_good_path):
            src = os.path.join(test_good_path, img_name)
            dst = os.path.join(class_output_path, "test/good", img_name)
            shutil.copy(src, dst)

    # Copy testing anomaly images and count them
    test_anomaly_path = os.path.join(obj_path, "test")
    if os.path.exists(test_anomaly_path):
        for anomaly_folder in os.listdir(test_anomaly_path):
            if anomaly_folder != "good":
                anomaly_folder_path = os.path.join(test_anomaly_path, anomaly_folder)
                for img_name in os.listdir(anomaly_folder_path):
                    src = os.path.join(anomaly_folder_path, img_name)
                    dst = os.path.join(class_output_path, "test/anomaly", f"{anomaly_folder}_{img_name}")
                    shutil.copy(src, dst)
                    defect_image_counts[obj_class] += 1

# Verify the counts
verification_passed = True
for obj_class in object_classes:
    class_output_anomaly_path = os.path.join(output_path, obj_class, "test/anomaly")
    copied_defect_images = len(os.listdir(class_output_anomaly_path))
    if copied_defect_images != defect_image_counts[obj_class]:
        print(f"Error: Number of copied defect images for class '{obj_class}' does not match.")
        print(f"Expected: {defect_image_counts[obj_class]}, Found: {copied_defect_images}")
        verification_passed = False

if verification_passed:
    print("Dataset reorganized successfully, and all counts are verified!")
else:
    print("Dataset reorganization completed with errors. Please check the messages above.")


Dataset reorganized successfully, and all counts are verified!


In [16]:
def count_images_in_directory(directory_path):
    total_count = 0
    for root, dirs, files in os.walk(directory_path):
        total_count += len(files)
    return total_count

# Count images in the source directories
source_counts = {}
for obj_class in object_classes:
    source_class_path = os.path.join(mvtec_path, obj_class)
    source_counts[obj_class] = {
        "train_good": count_images_in_directory(os.path.join(source_class_path, "train/good")),
        "test_good": count_images_in_directory(os.path.join(source_class_path, "test/good")),
        "test_anomaly": count_images_in_directory(os.path.join(source_class_path, "test")) - count_images_in_directory(os.path.join(source_class_path, "test/good"))
    }

# Count images in the destination directories
destination_counts = {}
for obj_class in object_classes:
    destination_class_path = os.path.join(output_path, obj_class)
    destination_counts[obj_class] = {
        "train_good": count_images_in_directory(os.path.join(destination_class_path, "train/good")),
        "test_good": count_images_in_directory(os.path.join(destination_class_path, "test/good")),
        "test_anomaly": count_images_in_directory(os.path.join(destination_class_path, "test/anomaly"))
    }

# Print the counts for verification
for obj_class in object_classes:
    print(f"Class: {obj_class}")
    print(f"  Source - train/good: {source_counts[obj_class]['train_good']}, test/good: {source_counts[obj_class]['test_good']}, test/anomaly: {source_counts[obj_class]['test_anomaly']}")
    print(f"  Destination - train/good: {destination_counts[obj_class]['train_good']}, test/good: {destination_counts[obj_class]['test_good']}, test/anomaly: {destination_counts[obj_class]['test_anomaly']}")
    print("")

# Verify the counts
verification_passed = True
for obj_class in object_classes:
    if source_counts[obj_class]["train_good"] != destination_counts[obj_class]["train_good"] or \
       source_counts[obj_class]["test_good"] != destination_counts[obj_class]["test_good"] or \
       source_counts[obj_class]["test_anomaly"] != destination_counts[obj_class]["test_anomaly"]:
        print(f"Error: Mismatch in counts for class '{obj_class}'.")
        verification_passed = False

if verification_passed:
    print("All counts match successfully!")
else:
    print("There are mismatches in the counts. Please check the messages above.")


Class: toothbrush
  Source - train/good: 60, test/good: 12, test/anomaly: 30
  Destination - train/good: 60, test/good: 12, test/anomaly: 30

Class: transistor
  Source - train/good: 213, test/good: 60, test/anomaly: 40
  Destination - train/good: 213, test/good: 60, test/anomaly: 40

Class: carpet
  Source - train/good: 280, test/good: 28, test/anomaly: 89
  Destination - train/good: 280, test/good: 28, test/anomaly: 89

Class: zipper
  Source - train/good: 240, test/good: 32, test/anomaly: 119
  Destination - train/good: 240, test/good: 32, test/anomaly: 119

Class: capsule
  Source - train/good: 219, test/good: 23, test/anomaly: 109
  Destination - train/good: 219, test/good: 23, test/anomaly: 109

Class: metal_nut
  Source - train/good: 220, test/good: 22, test/anomaly: 93
  Destination - train/good: 220, test/good: 22, test/anomaly: 93

Class: tile
  Source - train/good: 230, test/good: 33, test/anomaly: 84
  Destination - train/good: 230, test/good: 33, test/anomaly: 84

Class: l

# Model Trainig on Mvtec

In [2]:
cd differnet-master/

/teamspace/studios/this_studio/differnet-master


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


## bottle class until hazelnut class

In [3]:
!python main.py

Training model for class: bottle

Train epoch 0
100%|█████████████████████████████████████████████| 3/3 [00:20<00:00,  6.80s/it]
Epoch: 0.0 	 train loss: 1.9764
100%|█████████████████████████████████████████████| 3/3 [00:16<00:00,  5.52s/it]
Epoch: 0.1 	 train loss: 0.0548
100%|█████████████████████████████████████████████| 3/3 [00:16<00:00,  5.47s/it]
Epoch: 0.2 	 train loss: -0.3447
100%|█████████████████████████████████████████████| 3/3 [00:16<00:00,  5.41s/it]
Epoch: 0.3 	 train loss: -0.7834
100%|█████████████████████████████████████████████| 3/3 [00:16<00:00,  5.46s/it]
Epoch: 0.4 	 train loss: -1.1636
100%|█████████████████████████████████████████████| 3/3 [00:16<00:00,  5.34s/it]
Epoch: 0.5 	 train loss: -1.4075
100%|█████████████████████████████████████████████| 3/3 [00:16<00:00,  5.40s/it]
Epoch: 0.6 	 train loss: -1.4687
100%|█████████████████████████████████████████████| 3/3 [00:16<00:00,  5.44s/it]
Epoch: 0.7 	 train loss: -1.5520

Compute loss and scores on test set:
100%

## Start From hazelnut

In [1]:
cd differnet-master/

/teamspace/studios/this_studio/differnet-master


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [2]:
!python main.py

Training model for class: hazelnut

Train epoch 0
100%|█████████████████████████████████████████████| 6/6 [01:21<00:00, 13.61s/it]
Epoch: 0.0 	 train loss: 0.1722
100%|█████████████████████████████████████████████| 6/6 [00:45<00:00,  7.62s/it]
Epoch: 0.1 	 train loss: -0.4408
100%|█████████████████████████████████████████████| 6/6 [00:37<00:00,  6.22s/it]
Epoch: 0.2 	 train loss: -1.0371
100%|█████████████████████████████████████████████| 6/6 [00:37<00:00,  6.19s/it]
Epoch: 0.3 	 train loss: -1.2135
100%|█████████████████████████████████████████████| 6/6 [00:37<00:00,  6.21s/it]
Epoch: 0.4 	 train loss: -1.2909
100%|█████████████████████████████████████████████| 6/6 [00:37<00:00,  6.23s/it]
Epoch: 0.5 	 train loss: -1.3445
100%|█████████████████████████████████████████████| 6/6 [00:37<00:00,  6.22s/it]
Epoch: 0.6 	 train loss: -1.4609
100%|█████████████████████████████████████████████| 6/6 [00:37<00:00,  6.25s/it]
Epoch: 0.7 	 train loss: -1.4711

Compute loss and scores on test set:
1

## Start From metal_nut

In [1]:
cd differnet-master/

/teamspace/studios/this_studio/differnet-master


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [2]:
!python main.py

Training model for class: metal_nut

Train epoch 0
100%|█████████████████████████████████████████████| 4/4 [00:44<00:00, 11.18s/it]
Epoch: 0.0 	 train loss: 0.7516
100%|█████████████████████████████████████████████| 4/4 [00:25<00:00,  6.26s/it]
Epoch: 0.1 	 train loss: -0.0851
100%|█████████████████████████████████████████████| 4/4 [00:15<00:00,  3.90s/it]
Epoch: 0.2 	 train loss: -0.5398
100%|█████████████████████████████████████████████| 4/4 [00:15<00:00,  3.86s/it]
Epoch: 0.3 	 train loss: -0.7632
100%|█████████████████████████████████████████████| 4/4 [00:15<00:00,  3.87s/it]
Epoch: 0.4 	 train loss: -0.9638
100%|█████████████████████████████████████████████| 4/4 [00:14<00:00,  3.57s/it]
Epoch: 0.5 	 train loss: -0.9958
100%|█████████████████████████████████████████████| 4/4 [00:14<00:00,  3.53s/it]
Epoch: 0.6 	 train loss: -1.0902
100%|█████████████████████████████████████████████| 4/4 [00:14<00:00,  3.56s/it]
Epoch: 0.7 	 train loss: -1.0903

Compute loss and scores on test set:
