# Advanced Classification Techniques with InCA

This notebook demonstrates advanced classification features including:
1. Multi-LLM Ensemble Classification
2. Confidence Thresholding
3. Dynamic Class Management
4. Distribution Analysis

In [None]:
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sys.path.append('../../')

from src.inca_agent import InCAAgent
from src.llm_integration.gpt4_handler import GPT4Handler
from src.llm_integration.claude_handler import ClaudeHandler
from src.llm_integration.mistral_handler import MistralHandler

## 1. Multi-LLM Ensemble Classification

Demonstrate how to use multiple LLMs for robust classification.

In [None]:
# Initialize multiple LLM handlers
llm_handlers = {
    'gpt4': GPT4Handler(os.getenv('OPENAI_API_KEY')),
    'claude': ClaudeHandler(os.getenv('ANTHROPIC_API_KEY')),
    'mistral': MistralHandler(os.getenv('MISTRAL_API_KEY'))
}

# Create agent with multiple LLMs
agent = InCAAgent(llm_handlers)

# Add test classes
classes = {
    'technical': [
        'The API endpoint requires OAuth2 authentication',
        'Memory leak detected in the garbage collector',
        'Database indexing improves query performance'
    ],
    'business': [
        'Q4 revenue exceeded projections by 15%',
        'New market expansion planned for next quarter',
        'Customer acquisition cost decreased by 20%'
    ]
}

for class_name, examples in classes.items():
    agent.add_class(class_name, f'{class_name} content', examples)

# Test ensemble classification
test_text = "The microservice architecture improved system scalability"
results = {}
for llm_name, handler in llm_handlers.items():
    results[llm_name] = agent.classify(test_text, llm_name=llm_name)

# Display results from each LLM
for llm_name, classifications in results.items():
    print(f"\n{llm_name} classification:")
    for class_name, confidence in classifications.items():
        print(f"{class_name}: {confidence:.2f}")

## 2. Confidence Thresholding

Demonstrate how to use confidence thresholds for reliable classification.

In [None]:
def classify_with_threshold(agent, text, threshold=0.6):
    """Classify text only if confidence exceeds threshold."""
    results = agent.classify(text)
    return {k: v for k, v in results.items() if v >= threshold}

# Test with different thresholds
test_texts = [
    "The system requires 2GB of RAM",  # Clear technical
    "Market analysis shows potential growth",  # Clear business
    "The meeting discussed system architecture"  # Ambiguous
]

thresholds = [0.5, 0.7, 0.9]

for text in test_texts:
    print(f"\nClassifying: {text}")
    for threshold in thresholds:
        results = classify_with_threshold(agent, text, threshold)
        print(f"\nThreshold {threshold}:")
        if results:
            for class_name, conf in results.items():
                print(f"{class_name}: {conf:.2f}")
        else:
            print("No classifications exceeded threshold")

## 3. Distribution Analysis

Visualize class distributions and decision boundaries.

In [None]:
def plot_class_distributions(agent, test_texts):
    """Plot Mahalanobis distances for test texts across classes."""
    distances = []
    class_names = list(agent.class_metadata.keys())
    
    for text in test_texts:
        text_distances = []
        for class_name in class_names:
            distance = agent.get_mahalanobis_distance(text, class_name)
            text_distances.append(distance)
        distances.append(text_distances)
    
    # Create heatmap
    plt.figure(figsize=(10, 6))
    sns.heatmap(distances, 
                xticklabels=class_names,
                yticklabels=[f'Text {i+1}' for i in range(len(test_texts))],
                annot=True, 
                fmt='.2f')
    plt.title('Mahalanobis Distances to Class Centers')
    plt.show()

# Test with example texts
test_texts = [
    "Database optimization improved query speed",
    "Revenue growth exceeded expectations",
    "System architecture review meeting discussed market impact"
]

plot_class_distributions(agent, test_texts)