In [4]:
import os
print("Working Directory:", os.getcwd())


Working Directory: d:\KRISPI\Code\poisoning_detection


In [3]:
cd ..

d:\KRISPI\Code\poisoning_detection


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [6]:
import pandas as pd
import numpy as np
from engine import RAGRecommender
from engine import CharacteristicVectorExtractor
from engine import PoisonSimulator
from engine import SimplePoisonDetector

class SecureRAGSystem:
    """
    Integrated System: RAG + Poison Detection
    """
    
    def __init__(self):
        self.rag = RAGRecommender()
        self.detector = None
        self.poison_simulator = PoisonSimulator()
        self.feature_extractor = CharacteristicVectorExtractor()
        self.poisoned_items = set()
        
    def initialize_system(self, movies_df, build_detector=True):
        """
        Initialize the complete system
        """
        print("Initializing Secure RAG System...")
        
        # Step 1: Build RAG knowledge base
        self.rag.build_knowledge_base(movies_df)
        
        if build_detector:
            # Step 2: Train poison detector on clean data
            self._train_detector(movies_df)
        
        print("Secure RAG System initialized successfully!")
    
    def _train_detector(self, clean_movies_df):
        """
        Train poison detector using existing components
        """
        print("Training poison detector...")
        
        # Use existing feature extractor
        descriptions = clean_movies_df['title'] + " " + clean_movies_df['genres'].astype(str)
        genres = clean_movies_df['genres'].tolist()
        
        # Extract characteristics
        characteristics = self.feature_extractor.batch_extract(descriptions.tolist())
        
        # Initialize and train detector
        self.detector = SimplePoisonDetector(threshold=0.25)
        self.detector.compute_centroids(characteristics, genres)
        
        print("Poison detector trained!")
    
    def secure_recommend(self, query, top_k=10, filter_poisons=True):
        """
        Get secure recommendations with poison filtering
        """
        if not self.rag.is_built:
            raise ValueError("RAG system not initialized. Call initialize_system() first.")
        
        # Step 1: Get initial recommendations
        candidates = self.rag.retrieve(query, top_k=top_k * 3)  # Get more for filtering
        
        if filter_poisons and self.detector:
            # Step 2: Filter out detected poisons
            candidates = self._filter_poisons(candidates)
        
        # Step 3: Rerank and return top_k
        final_recommendations = self.rag._simulate_llm_reranking(candidates, query, top_k)
        
        # Add security info
        final_recommendations['is_secure'] = True
        
        return final_recommendations.head(top_k)
    
    def _filter_poisons(self, candidates):
        """
        Filter out poisoned items from candidates
        """
        if self.detector is None:
            return candidates
        
        # Extract characteristics for candidate items
        candidate_descriptions = candidates['description'].tolist()
        candidate_genres = candidates['genres'].tolist()
        
        characteristics = self.feature_extractor.batch_extract(candidate_descriptions)
        
        # Detect poisons
        predictions, distances = self.detector.detect_poison(characteristics, candidate_genres)
        
        # Filter out poisoned items - FIXED VERSION
        clean_indices = []
        poisoned_indices = []
        
        for idx, pred in enumerate(predictions):
            if not pred:  # If not poisoned
                clean_indices.append(idx)
            else:  # If poisoned
                poisoned_indices.append(candidates.index[idx])
        
        # Get clean candidates using indices
        clean_candidates = candidates.iloc[clean_indices].copy()
        
        # Store poisoned items for analysis
        self.poisoned_items.update(poisoned_indices)
        
        print(f"Filtered out {len(poisoned_indices)} potentially poisoned items")
        
        return clean_candidates
    
    def simulate_attack_and_defense(self, movies_df, poison_ratio=0.1, test_queries=None):
        """
        Complete simulation: Attack -> Defense -> Evaluation
        """
        if test_queries is None:
            test_queries = [
                "action comedy movies",
                "emotional drama", 
                "scary horror films",
                "romantic stories"
            ]
        
        print("Starting Attack-Defense Simulation...")
        
        # Step 1: Generate poisoned dataset
        print("Simulating poisoning attacks...")
        clean_df, poisoned_df = self.poison_simulator.generate_poisoned_dataset(
            movies_df, poison_ratio=poison_ratio
        )
        
        # Combine datasets (simulating real-world scenario)
        test_df = pd.concat([clean_df, poisoned_df[poisoned_df['is_poisoned'] == True]])
        
        # Step 2: Rebuild RAG system with poisoned data
        self.rag.build_knowledge_base(test_df)
        
        # Step 3: Test secure recommendations
        results = []
        for query in test_queries:
            secure_recs = self.secure_recommend(query, top_k=5, filter_poisons=True)
            insecure_recs = self.rag.recommend(query, top_k=5)  # Without filtering
            
            results.append({
                'query': query,
                'secure_recommendations': len(secure_recs),
                'insecure_recommendations': len(insecure_recs),
                'poisoned_blocked': len(insecure_recs) - len(secure_recs)
            })
        
        return pd.DataFrame(results)

# Demo and testing
if __name__ == "__main__":
    # Load data
    movies = pd.read_csv('data/ml-32m/movies.csv')
    
    # Initialize secure system
    secure_system = SecureRAGSystem()
    secure_system.initialize_system(movies)
    
    # Test secure recommendations
    print("\n" + "="*50)
    print("SECURE RECOMMENDATIONS DEMO")
    print("="*50)
    
    test_query = "action movies with comedy elements"
    recommendations = secure_system.secure_recommend(test_query, top_k=5)
    
    print(f"Query: '{test_query}'")
    print("\nRecommended Movies:")
    for idx, movie in recommendations.iterrows():
        print(f"{movie['title']} | {movie['genres']} | Score: {movie['similarity_score']:.3f}")
    
    # Run attack-defense simulation
    print("\n" + "="*50)
    print("ATTACK-DEFENSE SIMULATION")
    print("="*50)
    
    simulation_results = secure_system.simulate_attack_and_defense(
        movies, poison_ratio=0.1, test_queries=["action movies", "comedy films"]
    )
    
    print("\nSimulation Results:")
    print(simulation_results)

Initializing Secure RAG System...
Building RAG Knowledge Base...
Generating embeddings...


Batches:   0%|          | 0/2738 [00:00<?, ?it/s]

Knowledge base built with 87585 items
Knowledge base saved to knowledge_base.pkl
Training poison detector...
Computed centroids for 1798 genres
Poison detector trained!
Secure RAG System initialized successfully!

SECURE RECOMMENDATIONS DEMO
Filtered out 8 potentially poisoned items
Query: 'action movies with comedy elements'

Recommended Movies:
Action Point (2018) | Comedy | Score: 0.989
The New King of Comedy (2019) | Comedy | Score: 0.903
The Humorist (2019) | Drama | Score: 0.821
Actors (2000) | Comedy | Score: 0.820
The Comedian (2016) | Comedy | Score: 0.819

ATTACK-DEFENSE SIMULATION
Starting Attack-Defense Simulation...
Simulating poisoning attacks...
Building RAG Knowledge Base...
Generating embeddings...


Batches:   0%|          | 0/3011 [00:00<?, ?it/s]

Knowledge base built with 96343 items
Knowledge base saved to knowledge_base.pkl
Filtered out 7 potentially poisoned items
Filtered out 5 potentially poisoned items

Simulation Results:
           query  secure_recommendations  insecure_recommendations  \
0  action movies                       5                         5   
1   comedy films                       5                         5   

   poisoned_blocked  
0                 0  
1                 0  
