In [1]:
from pathlib import Path
import torch
import clip
from PIL import Image
import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, confusion_matrix, classification_report

In [4]:
# 1) Where is this notebook?
notebook_dir = Path.cwd()  
print("Notebook cwd:", notebook_dir)

# 2) Repo root is one level up
repo_root = notebook_dir.parent  
print("Repo root:", repo_root)

# 3) Now include the 'Versuch_2' folder in your data paths
normal_dir = (
    repo_root
    / "Versuch_2"
    / "Erdbeeren_yolo"
    / "Riseholme"
    / "Riseholme-2021"
    / "Data"
    / "Normal"
    / "Ripe"
)
anomaly_dir = (
    repo_root
    / "Versuch_2"
    / "Erdbeeren_yolo"
    / "Riseholme"
    / "Riseholme-2021"
    / "Data"
    / "Anomalous"
)

# 4) Sanity‐check that Python can see these folders
print("Normal dir exists?  ", normal_dir.exists())
print("Anomaly dir exists? ", anomaly_dir.exists())

# 5) Now glob your images
extensions = ("*.jpg", "*.jpeg", "*.png")
paths = sorted(str(p) for ext in extensions for p in normal_dir.glob(ext))
anomaly_paths = sorted(str(p) for ext in extensions for p in anomaly_dir.glob(ext))

print(f"Found {len(paths)} normal images\nFound {len(anomaly_paths)} anomaly images")

Notebook cwd: /home/parallels/Documents/Forschsem/Erdbeeren/CLIP
Repo root: /home/parallels/Documents/Forschsem/Erdbeeren
Normal dir exists?   True
Anomaly dir exists?  True
Found 462 normal images
Found 153 anomaly images


## 2. Split normals into train & validation

In [5]:
train_paths, val_normal_paths = train_test_split(
    paths, test_size=0.20, random_state=42, shuffle=True
)
print(f"{len(train_paths)} train normals, {len(val_normal_paths)} val normals")



369 train normals, 93 val normals


## 3. Load CLIP model

In [6]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)
model.eval()

100%|███████████████████████████████████████| 338M/338M [00:32<00:00, 11.0MiB/s]


CLIP(
  (visual): VisionTransformer(
    (conv1): Conv2d(3, 768, kernel_size=(32, 32), stride=(32, 32), bias=False)
    (ln_pre): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    (transformer): Transformer(
      (resblocks): Sequential(
        (0): ResidualAttentionBlock(
          (attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
          )
          (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (mlp): Sequential(
            (c_fc): Linear(in_features=768, out_features=3072, bias=True)
            (gelu): QuickGELU()
            (c_proj): Linear(in_features=3072, out_features=768, bias=True)
          )
          (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        )
        (1): ResidualAttentionBlock(
          (attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
          

## 4. Feature extraction helper

In [7]:
def extract_embeddings(image_paths, model, preprocess, device, batch_size=4):
    all_feats = []
    for i in range(0, len(image_paths), batch_size):
        batch = image_paths[i : i + batch_size]
        imgs = torch.cat([
            preprocess(Image.open(p).convert("RGB")).unsqueeze(0)
            for p in batch
        ], dim=0).to(device)
        with torch.no_grad():
            feats = model.encode_image(imgs)
        feats = feats / feats.norm(dim=-1, keepdim=True)
        all_feats.append(feats.cpu().numpy())
    return np.vstack(all_feats)

## 5. Train Isolation Forest on train normals

In [8]:
train_embeddings = extract_embeddings(train_paths, model, preprocess, device)
iso_forest = IsolationForest(contamination=0.01, random_state=0)
iso_forest.fit(train_embeddings)
print("Isolation Forest trained on normal embeddings")


Isolation Forest trained on normal embeddings


## 6. Calibrate threshold on validation normals

In [9]:
val_embeddings = extract_embeddings(val_normal_paths, model, preprocess, device)
val_scores     = iso_forest.decision_function(val_embeddings)
# e.g. set threshold so 1% of normals are flagged
threshold = np.percentile(val_scores, 1)
print(f"Threshold (1st percentile of val normals) = {threshold:.4f}")


Threshold (1st percentile of val normals) = 0.0106


## 7. Evaluate on anomalies

In [None]:
anom_embeddings = extract_embeddings(anomaly_paths, model, preprocess, device)
anom_scores     = iso_forest.decision_function(anom_embeddings)

# combine for metrics
y_true   = np.concatenate([np.zeros_like(val_scores), np.ones_like(anom_scores)])
y_scores = np.concatenate([val_scores,        anom_scores])
roc_auc  = roc_auc_score(y_true, y_scores)

print(f"ROC AUC: {roc_auc:.4f}")

# confusion matrix at chosen threshold
y_pred = (y_scores < threshold).astype(int)
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
print(f"Confusion Matrix @ thresh={threshold:.4f} → TN={tn}, FP={fp}, FN={fn}, TP={tp}\n")
print(classification_report(y_true, y_pred, target_names=["Normal","Anomalous"]))

ROC AUC: 0.0984
Confusion Matrix @ thresh=0.0106 → TN=92, FP=1, FN=112, TP=41

              precision    recall  f1-score   support

      Normal       0.45      0.99      0.62        93
   Anomalous       0.98      0.27      0.42       153

    accuracy                           0.54       246
   macro avg       0.71      0.63      0.52       246
weighted avg       0.78      0.54      0.50       246

