In [None]:
import numpy as np
import pickle
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans, DBSCAN

class NoveltyDetectionSystem:
    def __init__(self, pca_path, kmeans_path, encoder_path):
        # Load PCA, KMeans, and label encoder
        with open(pca_path, 'rb') as f:
            self.pca = pickle.load(f)
        with open(kmeans_path, 'rb') as f:
            self.kmeans = pickle.load(f)
        with open(encoder_path, 'rb') as f:
            self.label_encoder = pickle.load(f)
        
        self.n_clusters = self.kmeans.n_clusters

    def process_batch(self, batch_features, batch_labels=None):
        """Process a batch of 40 images."""
        # Step 1: Transform features using PCA
        transformed_features = self.pca.transform(batch_features)
        
        # Step 2: Compute distances to clusters
        distances = self.kmeans.transform(transformed_features)
        min_distances = np.min(distances, axis=1)
        
        # Step 3: Detect novelty (top 5% distance threshold)
        threshold = np.quantile(min_distances, 0.95)
        novelty_mask = min_distances > threshold
        novel_features = batch_features[novelty_mask]

        if len(novel_features) > 0:
            print(f"Detected {len(novel_features)} novel samples.")
            
            # Step 4: Cluster novel samples
            self._create_new_cluster(novel_features)
            
            # Step 5: Update system with new data
            self._update_system(batch_features, batch_labels)
        else:
            print("No novel samples detected.")

    def _create_new_cluster(self, novel_features):
        """Cluster novel samples and create a new label."""
        dbscan = DBSCAN(eps=0.5, min_samples=2)
        transformed = self.pca.transform(novel_features)
        labels = dbscan.fit_predict(transformed)

        n_new_clusters = len(set(labels)) - (1 if -1 in labels else 0)
        self.n_clusters += n_new_clusters
        print(f"Created {n_new_clusters} new cluster(s).")

    def _update_system(self, new_features, new_labels=None):
        """Update PCA, KMeans, and label encoder."""
        # Combine existing and new data
        combined_features = np.vstack([self.pca.inverse_transform(self.kmeans.cluster_centers_), new_features])
        
        # Retrain PCA
        self.pca = PCA(n_components=50)
        combined_features_transformed = self.pca.fit_transform(combined_features)
        
        # Retrain KMeans
        self.kmeans = KMeans(n_clusters=self.n_clusters, n_init=10)
        self.kmeans.fit(combined_features_transformed)

        # Update label encoder (if new labels are provided)
        if new_labels is not None:
            self.label_encoder.fit(np.hstack([self.label_encoder.classes_, new_labels]))

# Usage
# Initialize with saved models
system = NoveltyDetectionSystem(
    pca_path='../models/pca.pkl',
    kmeans_path='../models/kmeans.pkl',
    encoder_path='../models/label_encoder.pkl'
)

# Simulate a batch (20 known + 20 unknown)
batch_features = np.random.randn(40, 4096)  # Replace with actual features
batch_labels = ["known_class"] * 20 + ["new_class"] * 20  # Replace with actual labels

# Process the batch
system.process_batch(batch_features, batch_labels)
