# Drift Analysis Notebook
This notebook demonstrates how to calculate and visualize drift between LLM outputs using cosine similarity.

In [None]:
import json
from sentence_transformers import SentenceTransformer, util
import matplotlib.pyplot as plt
import numpy as np

# Load data
with open('../data/gpt4_outputs_v1.json') as f:
    examples = json.load(f)

# Initialize model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Compute cosine similarity
drift_scores = []
for ex in examples:
    emb1 = model.encode(ex['output_v1'], convert_to_tensor=True)
    emb2 = model.encode(ex['output_v2'], convert_to_tensor=True)
    score = util.cos_sim(emb1, emb2).item()
    drift_scores.append(1 - score)  # 1 - similarity = drift

# Plot drift
plt.figure(figsize=(8, 4))
plt.bar(range(len(drift_scores)), drift_scores, color='tomato')
plt.xlabel('Prompt index')
plt.ylabel('Drift Score (1 - cosine similarity)')
plt.title('LLM Output Drift per Prompt')
plt.grid(True)
plt.tight_layout()
plt.savefig('../plots/sample_drift_plot.png')
plt.show()