In [1]:
import os
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader, random_split, ConcatDataset
import pytorch_lightning as pl
from pytorch_lightning import Trainer
from torch import nn
from torch.optim import Adam
from torchvision.transforms import  Compose, ToTensor, Resize, RandomHorizontalFlip, RandomRotation, ColorJitter, RandomResizedCrop, Lambda
from PIL import Image
import matplotlib.pyplot as plt
from torchsummary import summary
import torchmetrics
from lightning.pytorch.loggers import WandbLogger
import wandb
import random
from datetime import datetime
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
import os

# Model
%env CUDA_VISIBLE_DEVICES=2
#%env PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:32

# HF Cache
os.environ["HF_HOME"] = "../.cache"
!echo $HF_HOME
!huggingface-cli whoami

%matplotlib inline 


  from .autonotebook import tqdm as notebook_tqdm


env: CUDA_VISIBLE_DEVICES=2
../.cache
Not logged in


## find all usefull images out of Mvtec

In [2]:
import os
import shutil

def get_mvtec_with_classes(class_list, image_path, annotation_path, output_path, name="test_set", info=True, map_ids=None):
    valid_images = []

    if not info:
        # Zielordnerstruktur erstellen
        images_out = os.path.join(output_path, "images", name)
        labels_out = os.path.join(output_path, "labels", name)

        os.makedirs(images_out, exist_ok=True)
        os.makedirs(labels_out, exist_ok=True)

    for subfolder in os.listdir(annotation_path):
        folder_path = os.path.join(annotation_path, subfolder)
        valid_count = 0

        for file in os.listdir(folder_path):
            file_path = os.path.join(folder_path, file)

            with open(file_path, "r", encoding="utf-8") as f:
                lines = f.readlines()

            converted_lines = []
            all_valid = True

            for line in lines:
                parts = line.strip().split()
                class_id = int(parts[0])

                if class_id not in class_list:
                    all_valid = False
                    break

                # Mappe die ID falls map_ids gegeben ist
                new_class_id = map_ids[class_id] if map_ids and class_id in map_ids else class_id
                converted_line = f"{new_class_id} " + " ".join(parts[1:])
                converted_lines.append(converted_line)

            if all_valid:
                image_filename = os.path.splitext(file)[0] + ".jpg"
                image_full_path = os.path.join(image_path, subfolder, image_filename)

                if os.path.exists(image_full_path):
                    valid_images.append(image_full_path)
                    valid_count += 1

                    if not info:
                        os.makedirs(images_out, exist_ok=True)
                        os.makedirs(labels_out, exist_ok=True)

                        # Bild kopieren
                        shutil.copy2(image_full_path, os.path.join(images_out, image_filename))

                        # Label mit umgeschriebener ID speichern
                        label_target_path = os.path.join(labels_out, file)
                        with open(label_target_path, "w", encoding="utf-8") as f:
                            f.write("\n".join(converted_lines) + "\n")

        if info:
            print(f"{subfolder}: {valid_count} gültige Bilder")

    return valid_images


## For the first Generated Dataset!

In [3]:
map_ids = {25: 0, 26: 0, 27: 0, 28: 1, 50: 4, 51: 3, 30: 2, 44: 4, 45: 4, 46: 4, 47: 4, 48: 4, 49: 4}

# train on generated images old test with these: 
# {25: 0, 26: 0, 27: 0, 28: 1, 50: 4, 51: 3, 30: 2, 44: 4, 45: 2, 46: 4, 47: 4, 48: 4, 49: 4}
# [25, 26, 27, 28, 50, 51, 30, 44, 45, 46, 47, 48, 49, 50]
# and compare with the new training
# then test on this: (new classes) 
# new: [25, 26, 27, 28, 50, 51, 29, 30, 21, 20, 22, 23, 44, 45, 46, 47, 48, 49, 50]]
# {25: 0, 26: 0, 27: 0, 28: 1, 50: 4, 51: 3, 30: 2, 44: 4, 45: 2, 46: 4, 47: 4, 48: 4, 49: 4, 21: }

get_mvtec_with_classes(
    [25, 26, 27, 28, 50, 51, 30, 44, 45, 46, 47, 48, 49, 50],
    "../../huggingface/mvtec_annotated/images",
    "../../huggingface/mvtec_annotated/labels",
    "../../huggingface/ai_shelf/artificial_created_dataset",
    name="mv_tec_images",
    info=False,
    map_ids=map_ids
)


['../../huggingface/mvtec_annotated/images/train/D2S_016124.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_003124.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_004102.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_004518.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_002603.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_004114.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_003611.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_004614.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_013815.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_004506.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_004608.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_003126.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_002529.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_003007.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_002624.jpg',
 '../../hu

## For the second But to test with OLD clases !

In [4]:
map_ids = {25: 0, 26: 0, 27: 0, 28: 1, 50: 5, 51: 4, 30: 2, 44: 5, 45: 5, 46: 5, 47: 5, 48: 5, 49: 5}
# there is no lemen oat meal or tomato souce in mvtec
get_mvtec_with_classes(
    [25, 26, 27, 28, 50, 51, 30, 44, 45, 46, 47, 48, 49, 50],
    "../../huggingface/mvtec_annotated/images",
    "../../huggingface/mvtec_annotated/labels",
    "../../huggingface/ai_shelf/artificial_mult_back_10_class",
    name="mv_tec_images_compare",
    info=False,
    map_ids=map_ids
)

['../../huggingface/mvtec_annotated/images/train/D2S_016124.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_003124.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_004102.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_004518.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_002603.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_004114.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_003611.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_004614.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_013815.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_004506.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_004608.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_003126.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_002529.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_003007.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_002624.jpg',
 '../../hu

## For the Second with all clases

In [5]:
map_ids = {25: 0, 26: 0, 27: 0, 28: 1, 50: 5, 51: 4, 30: 2, 44: 5, 45: 5, 46: 5, 47: 5, 48: 5, 49: 5, 21: 3, 22: 3, 20:3}
# added: only coffee
get_mvtec_with_classes(
    [25, 26, 27, 28, 50, 51, 29, 30, 21, 20, 22, 23, 44, 45, 46, 47, 48, 49, 50],
    "../../huggingface/mvtec_annotated/images",
    "../../huggingface/mvtec_annotated/labels",
    "../../huggingface/ai_shelf/artificial_mult_back_10_class",
    name="all_usefull_mv_tec_images",
    info=False,
    map_ids=map_ids
)

['../../huggingface/mvtec_annotated/images/train/D2S_009403.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_005906.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_016124.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_003124.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_004102.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_005721.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_004518.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_006001.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_002603.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_012326.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_012417.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_001812.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_001203.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_004114.jpg',
 '../../huggingface/mvtec_annotated/images/train/D2S_003611.jpg',
 '../../hu

# Find images out of custom Dataset that are usefull!

In [12]:
import shutil
from pathlib import Path
import ast
import os

def filter_and_copy_dataset_structured(
    useful_classes,
    path_input,
    dst_base_path,
    name
):
    path_input = Path(path_input)
    dst_base_path = Path(dst_base_path)

    image_dst_path = dst_base_path / "images" / name
    labels_dst_path = dst_base_path / "labels" / name

    image_dst_path.mkdir(parents=True, exist_ok=True)
    labels_dst_path.mkdir(parents=True, exist_ok=True)

    for folder in os.listdir(path_input):
        combined_path = path_input / folder

        if combined_path.is_dir():
            for txt_file in combined_path.glob("*.txt"):
                with open(txt_file, "r") as f:
                    first_line = f.readline()
                    if first_line.startswith("Objects:"):
                        dict_str = first_line.replace("Objects:", "").strip()
                        try:
                            obj_dict = ast.literal_eval(dict_str)
                            keys = obj_dict.keys()
                            if all(k in useful_classes for k in keys):
                                print(f"{txt_file}: ✅ gültig")

                                # Bild kopieren
                                jpg_file = txt_file.with_suffix(".jpg")
                                if jpg_file.exists():
                                    shutil.copy(jpg_file, image_dst_path / jpg_file.name)

                                    # Label-Datei schreiben
                                    out_label_file = labels_dst_path / txt_file.name
                                    with open(out_label_file, "w") as out_f:
                                        for k in keys:
                                            out_f.write(f"{k}\n")
                                else:
                                    print(f"{jpg_file} fehlt ❌")
                            else:
                                print(f"{txt_file}: ❌ enthält unerwünschte Klassen")
                        except Exception as e:
                            print(f"{txt_file}: Fehler beim Parsen - {e}")

In [None]:
from pathlib import Path
import ast
import os

useful_classes_small = [1, 2, 3, 4, 48, 26]  # 1: apple, 2: lemon, 3: avocado, 5: tomato_scouce, 4: Banane, 13: coffe
useful_classes_full = [5, 1, 2, 3, 4, 48, 26, 13]  # 9: Pasta, 26: Tea, 48: cucumber
path_input = Path("../../Datast/local_dataset_all")  # KEIN Komma!

images = "images"
labels = "labels"
dst_path = "../../huggingface/artificial_created_dataset"
name = "custom_images_small_classes"


dst_path = os.path.join(dst_path,name)
image_dst_path = os.path.join(dst_path,images)
labels_dst_path = os.path.join(dst_path,labels)



In [14]:
useful_classes_small = [1, 2, 3, 4, 48, 26]
filter_and_copy_dataset_structured(
    useful_classes=useful_classes_small,
    path_input="../../Dataset/local_dataset_all",
    dst_base_path="../../huggingface/ai_shelf/artificial_created_dataset",
    name="custom_images_small_classes"
)

../../Dataset/local_dataset_all/train/image_278.txt: ❌ enthält unerwünschte Klassen
../../Dataset/local_dataset_all/train/image_1410.txt: ✅ gültig
../../Dataset/local_dataset_all/train/image_23437.txt: ❌ enthält unerwünschte Klassen
../../Dataset/local_dataset_all/train/image_261.txt: ❌ enthält unerwünschte Klassen
../../Dataset/local_dataset_all/train/image_918.txt: ✅ gültig
../../Dataset/local_dataset_all/train/image_161.txt: ❌ enthält unerwünschte Klassen
../../Dataset/local_dataset_all/train/image_2706.txt: ❌ enthält unerwünschte Klassen
../../Dataset/local_dataset_all/train/image_22621.txt: ❌ enthält unerwünschte Klassen
../../Dataset/local_dataset_all/train/image_1445.txt: ✅ gültig
../../Dataset/local_dataset_all/train/image_1383.txt: ✅ gültig
../../Dataset/local_dataset_all/train/image_751.txt: ✅ gültig
../../Dataset/local_dataset_all/train/image_415.txt: ✅ gültig
../../Dataset/local_dataset_all/train/image_1788.txt: ❌ enthält unerwünschte Klassen
../../Dataset/local_dataset_all

In [15]:
useful_classes_full = [5, 1, 2, 3, 4, 48, 26, 13]  
filter_and_copy_dataset_structured(
    useful_classes=useful_classes_full,
    path_input="../../Dataset/local_dataset_all",
    dst_base_path="../../huggingface/ai_shelf/artificial_created_dataset",
    name="custom_images_full_classes"
)

../../Dataset/local_dataset_all/train/image_278.txt: ✅ gültig
../../Dataset/local_dataset_all/train/image_1410.txt: ✅ gültig
../../Dataset/local_dataset_all/train/image_23437.txt: ❌ enthält unerwünschte Klassen
../../Dataset/local_dataset_all/train/image_261.txt: ✅ gültig
../../Dataset/local_dataset_all/train/image_918.txt: ✅ gültig
../../Dataset/local_dataset_all/train/image_161.txt: ✅ gültig
../../Dataset/local_dataset_all/train/image_2706.txt: ✅ gültig
../../Dataset/local_dataset_all/train/image_22621.txt: ❌ enthält unerwünschte Klassen
../../Dataset/local_dataset_all/train/image_1445.txt: ✅ gültig
../../Dataset/local_dataset_all/train/image_1383.txt: ✅ gültig
../../Dataset/local_dataset_all/train/image_751.txt: ✅ gültig
../../Dataset/local_dataset_all/train/image_415.txt: ✅ gültig
../../Dataset/local_dataset_all/train/image_1788.txt: ✅ gültig
../../Dataset/local_dataset_all/train/image_259.txt: ✅ gültig
../../Dataset/local_dataset_all/train/image_3926.txt: ✅ gültig
../../Dataset/lo