# Testing the DriftAnalyzer

This notebook demonstrates the functionality of the DriftAnalyzer class from the babel_ai project. We'll test various aspects of the analyzer including:
- Word statistics analysis
- Similarity analysis
- Different types of inputs
- Edge cases

In [None]:
import sys
from pathlib import Path
sys.path.append('..')

from src.babel_ai.llm_drift import DriftExperiment, ExperimentConfig
from src.babel_ai.prompt_fetcher import ShareGPTPromptFetcher

import pandas as pd
import matplotlib.pyplot as plt

In [2]:
df_list = []

In [None]:
config = ExperimentConfig(
    temperature=1.1,
    max_tokens=2000,
    frequency_penalty=2.0,
    presence_penalty=2.0,
    top_p=1.0,
    max_iterations=4,
    max_total_characters=1000000,
    analyze_window=50,
)

drift_experiment = DriftExperiment(config=config)
# initial_prompt= drift_experiment.fetch_prompt(category="creative")
metrics = drift_experiment.run(
    initial_prompt='initial_prompt'
)

In [None]:
pd.DataFrame([metric['analysis'] for metric in metrics])

In [5]:
# Load metrics from CSV file
metrics_df = pd.read_csv('drift_experiment_20250312_163741.csv')

# Convert DataFrame rows to list of dictionaries in same format as metrics
metrics = []
for _, row in metrics_df.iterrows():
    metric = {
        'iteration': row['iteration'],
        'timestamp': row['timestamp'],
        'response': row['response'],
        'analysis': {
            'word_count': row['word_count'],
            'unique_word_count': row['unique_word_count'], 
            'coherence_score': row['coherence_score'],
            'is_repetitive': row['is_repetitive'],
            'lexical_similarity': row['lexical_similarity'],
            'semantic_similarity': row['semantic_similarity'],
            'semantic_surprise': row['semantic_surprise'],
            'max_semantic_surprise': row['max_semantic_surprise'],
            'is_surprising': row['is_surprising']
        },
        'config': {
            'temperature': row['temperature'],
            'max_tokens': row['max_tokens'],
            'frequency_penalty': row['frequency_penalty'],
            'presence_penalty': row['presence_penalty'],
            'top_p': row['top_p'],
            'max_iterations': row['max_iterations'],
            'max_total_characters': row['max_total_characters']
        }
    }
    metrics.append(metric)


In [3]:
file_name = Path.cwd().parent / "data/llm_only_data/drift_experiment_20250317_155440.csv"

# Load metrics from CSV file
metrics_df = pd.read_csv(file_name)

# Convert DataFrame rows to list of dictionaries in same format as metrics
metrics = []
for _, row in metrics_df.iterrows():
    metric = {
        'iteration': row['iteration'],
        'timestamp': row['timestamp'],
        'response': row['response'],
        'analysis': {
            'word_count': row['word_count'],
            'unique_word_count': row['unique_word_count'], 
            'coherence_score': row['coherence_score'],
            'is_repetitive': row['is_repetitive'],
            'lexical_similarity': row['lexical_similarity'],
            'semantic_similarity': row['semantic_similarity'],
            'semantic_surprise': row['semantic_surprise'],
            'max_semantic_surprise': row['max_semantic_surprise'],
            'is_surprising': row['is_surprising']
        },
    }
    metrics.append(metric)


In [None]:
# Create figure and axis objects with a single subplot
fig, ax1 = plt.subplots(figsize=(12, 6))

# Extract data from metrics
iterations = [m['iteration'] for m in metrics]
lexical_sim = [m['analysis']['lexical_similarity'] 
               if 'lexical_similarity' in m['analysis'] else None 
               for m in metrics]
semantic_sim = [m['analysis']['semantic_similarity']
                if 'semantic_similarity' in m['analysis'] else None
                for m in metrics]
semantic_surp = [m['analysis']['semantic_surprise']
                 if 'semantic_surprise' in m['analysis'] else None
                 for m in metrics]

# Remove None values (from first iteration)
valid_indices = [i for i, v in enumerate(lexical_sim) if v is not None]
iterations = [iterations[i] for i in valid_indices]
lexical_sim = [v for v in lexical_sim if v is not None]
semantic_sim = [v for v in semantic_sim if v is not None]
semantic_surp = [v for v in semantic_surp if v is not None]

# Plot similarities on the first y-axis
ax1.set_xlabel('Iteration')
ax1.set_ylabel('Similarity Score', color='tab:blue')
ax1.plot(iterations, lexical_sim, label='Lexical Similarity',
         marker='o', color='tab:blue', alpha=0.6)
ax1.plot(iterations, semantic_sim, label='Semantic Similarity',
         marker='s', color='tab:orange', alpha=0.6)
ax1.tick_params(axis='y', labelcolor='tab:blue')
ax1.set_ylim(0, 1)  # Set y-axis limits for similarities

# Create a second y-axis for surprise
ax2 = ax1.twinx()
ax2.set_ylabel('Surprise Score', color='tab:red')
ax2.plot(iterations, semantic_surp, label='Semantic Surprise',
         marker='^', color='tab:red', alpha=0.6)
ax2.tick_params(axis='y', labelcolor='tab:red')

# Add both legends
lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax1.legend(lines1 + lines2, labels1 + labels2, loc='upper right')

# Set finer x-axis ticks every 5 iterations
max_iter = max(iterations)
ax1.set_xticks(range(0, max_iter + 1, 5))
ax1.set_xticks(range(0, max_iter + 1), minor=True)

# Add grid with major and minor lines
ax1.grid(True, which='major', linestyle='-', alpha=0.5)
ax1.grid(True, which='minor', linestyle=':', alpha=0.2)

plt.title(f"{file_name}")
plt.tight_layout()
plt.show()

In [None]:
from pprint import pprint

pprint(" ".join([metric["response"] for metric in metrics]))

# Use the ShareGPT prompt fetcher

In [None]:
from pathlib import Path

cwd = Path.cwd()

config = ExperimentConfig(
    temperature=0.7,
    max_tokens=2000,
    frequency_penalty=2.0,
    presence_penalty=2.0,
    top_p=1.0,
    max_iterations=50,
    max_total_characters=1000000,
    analyze_window=50,
)

drift_experiment = DriftExperiment(
    config=config,
    prompt_fetcher=ShareGPTPromptFetcher(
        data_path=cwd.parent / 'data' / 'human-ai_datasets' / 'sharegpt_clean.json',
        min_messages=50,
        max_messages=100,
    )
)

initial_prompt= drift_experiment.prompt_fetcher.get_random_prompt()
metrics = drift_experiment.run(
    initial_messages=initial_prompt
)

In [3]:
file_name = "Whatever"