In [2]:
import sys
import os

# Go up one level: from notebooks/ â†’ project root
project_root = os.path.abspath("..")
sys.path.append(project_root)

print("Added to sys.path:", project_root)
print("Current working directory:", os.getcwd())


Added to sys.path: C:\Users\sadek\OneDrive\Desktop\DSAI4101-project
Current working directory: C:\Users\sadek\OneDrive\Desktop\DSAI4101-project\notebooks


In [4]:
import numpy as np
import torch
from torch.utils.data import DataLoader
from torchvision import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor

from src.b_models_impl import MyEmbeddingClient  

emb_client = MyEmbeddingClient(
    model_path="../models/classifier/simple_cnn.pth",
    classes_path="../models/classifier/classes.json"
)

device = emb_client.device
model = emb_client.model
model.eval()

print("Device:", device)
print("Classes:", emb_client.idx_to_class)


Device: cpu
Classes: ['cardboard', 'glass', 'metal', 'paper', 'plastic', 'trash']


In [17]:
from pathlib import Path
from torch.utils.data import DataLoader
from torchvision import datasets

fewshot_root = Path("../data/rare_fewshot")   # clothes/electronics/etc.
transform = emb_client.transform              # SAME transform as classifier/anomaly

fewshot_ds = datasets.ImageFolder(str(fewshot_root), transform=transform)
fewshot_loader = DataLoader(fewshot_ds, batch_size=16, shuffle=False)

print("Few-shot classes:", fewshot_ds.classes)


Few-shot classes: ['clothes', 'electronics', 'food', 'toys']


In [18]:
model.eval()
embs_per_class = {i: [] for i in range(len(fewshot_ds.classes))}

with torch.no_grad():
    for imgs, labels in fewshot_loader:
        imgs = imgs.to(device)
        feats = model.forward_features(imgs).cpu().numpy()  # (B, 256)
        
        for f, lbl in zip(feats, labels.numpy()):
            embs_per_class[int(lbl)].append(f)

for k, v in embs_per_class.items():
    print(f"{fewshot_ds.classes[k]} -> {len(v)} embeddings")


clothes -> 12 embeddings
electronics -> 8 embeddings
food -> 13 embeddings
toys -> 7 embeddings


In [19]:
import numpy as np

prototypes = {}
for cls_idx, embs in embs_per_class.items():
    arr = np.stack(embs, axis=0)              # (N, 256)
    proto = arr.mean(axis=0)                  # (256,)
    proto = proto / (np.linalg.norm(proto) + 1e-8)  # normalize
    prototypes[cls_idx] = proto

print("Built prototypes for:", fewshot_ds.classes)


Built prototypes for: ['clothes', 'electronics', 'food', 'toys']


In [20]:
# Build prototype matrix & label list once
proto_mat = np.stack([prototypes[i] for i in sorted(prototypes.keys())], axis=0)  # (C, 256)
proto_labels = [i for i in sorted(prototypes.keys())]
class_names = fewshot_ds.classes

print("Proto_mat shape:", proto_mat.shape)
print("Class names:", class_names)


Proto_mat shape: (4, 256)
Class names: ['clothes', 'electronics', 'food', 'toys']


In [21]:
def fewshot_predict_from_imgs(imgs, unknown_threshold=0.4):
    """
    imgs: torch Tensor (B, C, H, W) already transformed
    returns: list of dicts {label, confidence, similarity}
    """
    model.eval()
    with torch.no_grad():
        feats = model.forward_features(imgs.to(device)).cpu().numpy()  # (B, 256)

    # normalize embeddings
    X_norm = feats / (np.linalg.norm(feats, axis=1, keepdims=True) + 1e-8)

    # cosine similarity = dot product (since normalized)
    sims = X_norm @ proto_mat.T   # (B, C)

    results = []
    for i in range(X_norm.shape[0]):
        sim_vec = sims[i]
        best_idx = int(np.argmax(sim_vec))
        best_sim = float(sim_vec[best_idx])
        cls_idx  = proto_labels[best_idx]
        cls_name = class_names[cls_idx]

        # simple confidence scaled to [0,1]
        confidence = (best_sim + 1) / 2.0

        if best_sim < unknown_threshold:
            label = "unknown"
        else:
            label = cls_name

        results.append({
            "label": label,
            "confidence": confidence,
            "similarity": best_sim
        })

    return results


In [27]:
from itertools import islice

# take first few images from the few-shot loader
imgs_batch, labels_batch = next(iter(fewshot_loader))

outs = fewshot_predict_from_imgs(imgs_batch, unknown_threshold=0.90)

for i, o in enumerate(outs):
    true_label = class_names[labels_batch[i].item()]
    print(f"True: {true_label}  ->  Pred:", o)


True: clothes  ->  Pred: {'label': 'clothes', 'confidence': 0.9727566838264465, 'similarity': 0.9455133676528931}
True: clothes  ->  Pred: {'label': 'clothes', 'confidence': 0.9758226275444031, 'similarity': 0.9516452550888062}
True: clothes  ->  Pred: {'label': 'clothes', 'confidence': 0.9587733745574951, 'similarity': 0.9175467491149902}
True: clothes  ->  Pred: {'label': 'clothes', 'confidence': 0.977133572101593, 'similarity': 0.954267144203186}
True: clothes  ->  Pred: {'label': 'clothes', 'confidence': 0.9672718942165375, 'similarity': 0.934543788433075}
True: clothes  ->  Pred: {'label': 'clothes', 'confidence': 0.9715071022510529, 'similarity': 0.9430142045021057}
True: clothes  ->  Pred: {'label': 'electronics', 'confidence': 0.952756941318512, 'similarity': 0.9055138826370239}
True: clothes  ->  Pred: {'label': 'unknown', 'confidence': 0.9479445219039917, 'similarity': 0.8958890438079834}
True: clothes  ->  Pred: {'label': 'unknown', 'confidence': 0.9306397438049316, 'similar

In [28]:
# Create folder if missing
save_dir = "../models/fewshot"
os.makedirs(save_dir, exist_ok=True)

# Save everything Person A needs
save_path = os.path.join(save_dir, "prototypes.pkl")

joblib.dump(
    {
        "prototypes": prototypes,          # dict: {class_idx: vector}
        "class_names": class_names,        # list of names
        "unknown_threshold": 0.92          # your chosen threshold
    },
    save_path
)

print("Saved few-shot prototypes to:", save_path)


Saved few-shot prototypes to: ../models/fewshot\prototypes.pkl


In [31]:
correct = 0
total = 0

unknown_count = 0

for imgs, labels in fewshot_loader:
    outs = fewshot_predict_from_imgs(imgs, unknown_threshold=0.92)
    for i, o in enumerate(outs):
        true_name = class_names[labels[i].item()]
        pred_name = o["label"]
        total += 1
        if pred_name == "unknown":
            unknown_count += 1
        if pred_name == true_name:
            correct += 1

print("Few-shot accuracy (excluding unknown cases):", correct / (total - unknown_count))
print("Unknown rate:", unknown_count / total)


Few-shot accuracy (excluding unknown cases): 0.6923076923076923
Unknown rate: 0.675
