In [16]:
import numpy as np
import os
from dotenv import load_dotenv
load_dotenv()


True

In [12]:
# MMD to track model drift D to D'


In [13]:
# ============================================================
# STEP 1: CANARY PROMPTS
# ============================================================

PROMPTS = [
    "What is 2 + 2?",
    "Name the capital of France.",
    "Explain gravity in one sentence.",
    "Write a haiku about rain.",
    "What color is the sky?",
    "Define 'algorithm' simply.",
    "List 3 prime numbers.",
    "Who wrote Romeo and Juliet?",
    "What is H2O?",
    "Reverse the word 'hello'.",
]

In [77]:
# ============================================================
# STEP 2: GET OUTPUTS FROM TWO MODELS
# ============================================================

from google import genai
from google.genai import types

client = genai.Client(api_key=os.getenv('GEMINI_API_KEY'))

def get_outputs(model, prompts, n_samples=10):
    result = []
    for p in prompts:
        for _ in range(n_samples):
            response = client.models.generate_content(
                model=model,
                contents=p,
                config=types.GenerateContentConfig(
                    temperature=0.7,
                    seed=42
    )
            )
            result.append(response.text)
        
    return result



In [78]:
# Run same prompts on both models
outputs_g3 = get_outputs("gemini-3-flash-preview", PROMPTS)
outputs_g3
outputs_g2 = get_outputs("gemini-2.5-pro", PROMPTS)
outputs_g2

KeyboardInterrupt: 

In [None]:
outputs_g3

['2 + 2 = 4',
 '2 + 2 = 4',
 '2 + 2 = 4',
 '2 + 2 = 4',
 '2 + 2 = 4',
 '2 + 2 = 4',
 '2 + 2 = 4',
 '2 + 2 = 4',
 '2 + 2 = 4',
 '2 + 2 = 4',
 'The capital of France is **Paris**.',
 'The capital of France is **Paris**.',
 'The capital of France is **Paris**.',
 'The capital of France is **Paris**.',
 'The capital of France is **Paris**.',
 'The capital of France is **Paris**.',
 'The capital of France is **Paris**.',
 'The capital of France is **Paris**.',
 'The capital of France is **Paris**.',
 'The capital of France is **Paris**.',
 'Gravity is the fundamental force of attraction that pulls all objects with mass toward one another, most notably keeping us on the ground and planets in orbit.',
 'Gravity is the fundamental force of attraction that pulls all objects with mass toward one another, most notably keeping us on the ground and planets in orbit.',
 'Gravity is the fundamental force of attraction that pulls all objects with mass toward one another, most notably keeping us on the

In [None]:
outputs_g3_mock = [
    "some complete radom text that has nothing to do with reality",
    "Hypothesis: Embeddings capture semantic meaning. Wrong facts should land in different regions of embedding space.",
    "some other stuff"
]

In [None]:
# Original style - concise, direct
outputs_concise = [
    "4",
    "Paris.",
    "Gravity is the force of attraction between masses.",
    "Rain falls gently down,\nWashing all the dust away,\nEarth smells fresh and new.",
    "The sky is blue due to Rayleigh scattering.",
    "An algorithm is a step-by-step procedure to solve a problem.",
    "2, 3, 5",
    "William Shakespeare.",
    "Water.",
    "olleh"
]

# Drifted style - verbose, over-explained, corporate
outputs_verbose = [
    "Great question! The sum of 2 and 2 is 4. This is a fundamental arithmetic operation that demonstrates the commutative property of addition. Is there anything else I can help you with today?",
    "Thank you for your inquiry! The capital of France is Paris, a beautiful city known for the Eiffel Tower, world-class cuisine, and rich cultural heritage. Paris has been the capital since the 10th century and remains a global center for art, fashion, and gastronomy.",
    "I'd be happy to explain! Gravity is a fundamental force of nature that causes objects with mass to attract one another. First described mathematically by Sir Isaac Newton in 1687, and later refined by Albert Einstein's theory of general relativity, gravity is what keeps planets in orbit, causes apples to fall from trees, and keeps our feet firmly on the ground. It's truly one of the most fascinating phenomena in physics!",
    "What a lovely request! Here's a haiku about rain for you:\n\nRain falls gently down,\nWashing all the dust away,\nEarth smells fresh and new.\n\nI hope this captures the serene essence of rainfall. Would you like me to write another one?",
    "Excellent question! The sky appears blue due to a phenomenon called Rayleigh scattering. When sunlight enters Earth's atmosphere, it collides with gas molecules and gets scattered in different directions. Blue light has a shorter wavelength and is scattered more than other colors, which is why we perceive the sky as blue during the day. At sunset, the light travels through more atmosphere, scattering away blue light and leaving the beautiful reds and oranges we observe.",
    "I'd love to help explain this concept! An algorithm is essentially a step-by-step set of instructions designed to perform a specific task or solve a particular problem. Think of it like a recipe in cooking - you follow each step in order to achieve the desired result. Algorithms are fundamental to computer science and are used in everything from search engines to social media feeds. They help computers process information efficiently and make decisions based on defined rules.",
    "Of course! Here are three prime numbers for you: 2, 3, and 5. A prime number is a natural number greater than 1 that is only divisible by 1 and itself. Interestingly, 2 is the only even prime number! Would you like to know more about prime numbers?",
    "Great literary question! Romeo and Juliet was written by the legendary William Shakespeare, often regarded as the greatest writer in the English language. This iconic tragedy was composed around 1594-1596 and tells the story of two young star-crossed lovers whose deaths ultimately reconcile their feuding families. It remains one of the most performed plays in history!",
    "Happy to explain! H2O is the chemical formula for water, one of the most essential compounds for life on Earth. The formula indicates that each water molecule consists of two hydrogen atoms bonded to one oxygen atom. Water is unique in many ways - it's the only natural substance found in all three physical states at normal Earth temperatures, and it has an unusually high specific heat capacity. About 71% of Earth's surface is covered in water!",
    "The word 'hello' reversed is 'olleh'. Reversing strings is a common operation in programming and can be accomplished in many ways depending on the programming language you're using. Would you like me to show you how to reverse a string in a specific programming language?"
]

In [None]:
from sentence_transformers import SentenceTransformer

embedder = SentenceTransformer('all-MiniLM-L6-v2')

emb_gpt4 = embedder.encode(outputs_g2)    # shape: (10, 384)
emb_gpt4o = embedder.encode(outputs_g3)  # shape: (10, 384)


Loading weights: 100%|██████████| 103/103 [00:00<00:00, 2006.66it/s, Materializing param=pooler.dense.weight]                             
BertModel LOAD REPORT from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


In [None]:
def rbf_kernel(X, Y, gamma):
    """k(x,y) = exp(-gamma * ||x-y||²)"""
    # ||x-y||² = ||x||² + ||y||² - 2*x·y
    X_sqnorm = np.sum(X**2, axis=1)
    Y_sqnorm = np.sum(Y**2, axis=1)
    sq_dists = X_sqnorm[:, None] + Y_sqnorm[None, :] - 2 * X @ Y.T
    return np.exp(-gamma * sq_dists)

def mmd_squared(X, Y, gamma):
    """Unbiased estimator of MMD²"""
    n, m = len(X), len(Y)
    
    K_XX = rbf_kernel(X, X, gamma)
    K_YY = rbf_kernel(Y, Y, gamma)
    K_XY = rbf_kernel(X, Y, gamma)
    
    # Remove diagonal for unbiased estimate
    np.fill_diagonal(K_XX, 0)
    np.fill_diagonal(K_YY, 0)
    
    term1 = K_XX.sum() / (n * (n - 1))  # E[k(x,x')]
    term2 = K_YY.sum() / (m * (m - 1))  # E[k(y,y')]
    term3 = K_XY.sum() / (n * m)        # E[k(x,y)]
    
    return term1 + term2 - 2 * term3

In [None]:
# Median heuristic for gamma
all_emb = np.vstack([emb_gpt4, emb_gpt4o])
dists = np.sqrt(((all_emb[:, None] - all_emb[None, :])**2).sum(axis=2))
median_dist = np.median(dists[dists > 0])
gamma = 1.0 / (2 * median_dist**2)

observed_mmd2 = mmd_squared(emb_gpt4, emb_gpt4o, gamma)

In [None]:
observed_mmd2

np.float32(-0.06023574)

In [None]:
def permutation_test(X, Y, gamma, n_perm=500):
    observed = mmd_squared(X, Y, gamma)
    
    pooled = np.vstack([X, Y])
    n = len(X)
    
    null_mmd2s = []
    for _ in range(n_perm):
        perm = np.random.permutation(len(pooled))
        X_perm = pooled[perm[:n]]
        Y_perm = pooled[perm[n:]]
        null_mmd2s.append(mmd_squared(X_perm, Y_perm, gamma))
    
    null_mmd2s = np.array(null_mmd2s)
    p_value = (null_mmd2s >= observed).mean()
    
    return observed, p_value, null_mmd2s

observed, p_value, null_dist = permutation_test(emb_gpt4, emb_gpt4o, gamma)

# ============================================================
# RESULTS
# ============================================================

print(f"MMD² = {observed:.6f}")
print(f"p-value = {p_value:.4f}")
print(f"Null mean = {null_dist.mean():.6f}")
print(f"Drift detected: {'YES' if p_value < 0.05 else 'NO'}")

MMD² = -0.060236
p-value = 1.0000
Null mean = -0.001489
Drift detected: NO
