In [None]:
# handle_new_batch.py

import pickle
import numpy as np
from pathlib import Path

def process_new_batch(fc1_batch_4096):
    """
    fc1_batch_4096: shape (B, 4096) new data's raw fc1 features
    Returns cluster labels or distances
    """
    # 1) Load PCA & KMeans
    pca_path = Path('..','models','pca_fit_base.pickle')
    kmeans_path = Path('..','models','kmeans_base_50d.pickle')
    
    with open(pca_path, 'rb') as f:
        pca = pickle.load(f)
    with open(kmeans_path, 'rb') as f:
        kmeans = pickle.load(f)
    
    # 2) PCA transform => shape (B, 50)
    feats_50 = pca.transform(fc1_batch_4096)
    
    # 3) KMeans predict or transform
    cluster_labels = kmeans.predict(feats_50)
    distances = kmeans.transform(feats_50)
    
    return cluster_labels, distances




In [None]:
# ---------------
# Example usage
# ---------------
if __name__ == "__main__":
    # Suppose you have a new batch of fc1 features, shape (40, 4096)
    # e.g. from your pipeline or a separate model extraction
    new_batch_fc1 = np.random.rand(40, 4096).astype(np.float32)  # placeholder
    
    # Process them
    labels, dists = process_new_batch(new_batch_fc1)
    
    print("[INFO] Cluster labels:", labels)
    print("[INFO] Distances shape:", dists.shape)
    
    # You could check if any images are "novel" by distance threshold, etc.
    threshold = 15.0
    min_dists = np.min(dists, axis=1)
    is_novel = min_dists > threshold
    print(f"[INFO] Novel count: {is_novel.sum()} / {len(is_novel)}")