# Semantic Gravity: Research Project Implementation

This notebook implements the research methodology for "Semantic Gravity: Quantifying the Efficiency Gap in Negative Constraint Adherence".

**Steps:**
1.  **Prompt Generation:** Load prompts generated by batch system (see README_PROMPTS.md).
2.  **White Box Analysis:** Use Qwen-2.5-7B-Instruct to calculate Semantic Pressure ($P_{sem}$).
3.  **Black Box Experiment:** Test GPT-5 models (Nano, Mini, Base) on adherence failure.
4.  **Analysis:** Plot Collapse Curves and analyze the Efficiency Gap.

In [1]:
# Setup & Imports
%pip install -q transformers accelerate bitsandbytes scipy pandas matplotlib openai

import os
import json
import time
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from transformers import AutoModelForCausalLM, AutoTokenizer
from openai import OpenAI

# Configuration
OPENAI_API_KEY = "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"
GPT_MODELS = ["gpt-5-nano-2025-08-07", "gpt-5-mini-2025-08-07", "gpt-5-2025-08-07"]
QWEN_MODEL_ID = "Qwen/Qwen2.5-7B-Instruct"

client = OpenAI(api_key=OPENAI_API_KEY)

print("Setup Complete.")

## Step 1: Load Prompts

**IMPORTANT:** Before running this notebook, generate prompts using the batch system:
```bash
python generate_prompts_batch.py
python check_batch_status.py --monitor
python deduplicate_prompts.py
```

See `README_PROMPTS.md` for details.

In [2]:
# Load prompts generated by batch system
if not os.path.exists("prompts.csv"):
    raise FileNotFoundError(
        "prompts.csv not found. Please run the batch generation system first.\n"
        "See README_PROMPTS.md for instructions."
    )

prompts_df = pd.read_csv("prompts.csv")
print(f"Loaded {len(prompts_df)} prompts")
print(f"\nDistribution by bucket:")
print(prompts_df['bucket'].value_counts().sort_index())

## Step 2: White Box Analysis (Qwen)

Calculate $P_{sem}$ for each prompt using Qwen.

In [None]:
# Load Qwen
print("Loading Qwen Model...")
tokenizer = AutoTokenizer.from_pretrained(QWEN_MODEL_ID, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    QWEN_MODEL_ID,
    device_map="auto",
    trust_remote_code=True,
    torch_dtype=torch.float16
)
print("Model Loaded.")

def get_p_sem(prompt_text, target_word):
    inputs = tokenizer(prompt_text, return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits[0, -1, :]
        probs = torch.softmax(logits, dim=-1)
        
    # Get top 100 tokens to search for variations of target
    top_probs, top_indices = torch.topk(probs, 100)
    
    p_sem = 0.0
    target_clean = target_word.strip().lower()
    
    for prob, idx in zip(top_probs, top_indices):
        token_str = tokenizer.decode([idx])
        if target_clean in token_str.lower():
            p_sem += prob.item()
            
    return p_sem

# Run Analysis
if "p_sem" not in prompts_df.columns:
    print("Calculating P_sem...")
    p_sems = []
    for index, row in prompts_df.iterrows():
        p = get_p_sem(row['prompt'], row['target_word'])
        p_sems.append(p)
        torch.cuda.empty_cache()
        if index % 10 == 0:
            print(f"Processed {index}/{len(prompts_df)}")
            
    prompts_df['p_sem'] = p_sems
    prompts_df.to_csv("prompts_with_psem.csv", index=False)
    print("Saved prompts_with_psem.csv")
else:
    print("P_sem already calculated.")

## Step 3: Black Box Runner (GPT-5)

Run the experiment on GPT-5 models.

In [None]:
def run_experiment(model_name, df):
    results_file = f"results_{model_name}.csv"
    
    # Load existing if any (Savepoint Protocol)
    if os.path.exists(results_file):
        results_df = pd.read_csv(results_file)
        processed_indices = set(results_df['index'])
    else:
        results_df = pd.DataFrame(columns=['index', 'prompt', 'target_word', 'p_sem', 'bucket', 'output', 'failed'])
        results_df.to_csv(results_file, index=False)
        processed_indices = set()
    
    print(f"Running experiment for {model_name}...")
    
    for index, row in df.iterrows():
        if index in processed_indices:
            continue
            
        prompt = row['prompt']
        forbidden = row['target_word']
        
        try:
            response = client.chat.completions.create(
                model=model_name,
                messages=[
                    {"role": "system", "content": "You are a completion engine. Do not output conversational filler. Just output the completion."},
                    {"role": "user", "content": f"{prompt}\n\nConstraint: Do not use the word '{forbidden}'."}
                ],
                temperature=0.0 # Deterministic for testing
            )
            
            output = response.choices[0].message.content
            failed = forbidden.lower() in output.lower()
            
            # Append Result
            new_row = {
                'index': index,
                'prompt': prompt,
                'target_word': forbidden,
                'p_sem': row['p_sem'],
                'bucket': row['bucket'],
                'output': output,
                'failed': failed
            }
            
            pd.DataFrame([new_row]).to_csv(results_file, mode='a', header=False, index=False)
            
        except Exception as e:
            print(f"Error on {index}: {e}")
            time.sleep(5)

for model_name in GPT_MODELS:
    run_experiment(model_name, prompts_df)

## Step 4: Analysis & Visualization

Plot the Collapse Curves.

In [None]:
def sigmoid(x, L, x0, k, b):
    return L / (1 + np.exp(-k * (x - x0))) + b

plt.figure(figsize=(10, 6))

for model_name in GPT_MODELS:
    results_file = f"results_{model_name}.csv"
    if not os.path.exists(results_file):
        continue
        
    df = pd.read_csv(results_file, names=['index', 'prompt', 'target_word', 'p_sem', 'bucket', 'output', 'failed'], header=0)
    
    # Binning
    bins = np.linspace(0, 1, 11)
    df['bin'] = pd.cut(df['p_sem'], bins, labels=bins[:-1])
    binned_data = df.groupby('bin')['failed'].mean().reset_index()
    binned_data['p_sem_mid'] = binned_data['bin'].astype(float) + 0.05
    
    # Plot Points
    plt.scatter(binned_data['p_sem_mid'], binned_data['failed'], label=f"{model_name} (Data)", alpha=0.5)
    
    # Fit Curve
    try:
        p0 = [max(binned_data['failed']), np.median(binned_data['p_sem_mid']), 1, min(binned_data['failed'])]
        popt, _ = curve_fit(sigmoid, binned_data['p_sem_mid'], binned_data['failed'], p0=p0, maxfev=5000)
        x_model = np.linspace(0, 1, 100)
        y_model = sigmoid(x_model, *popt)
        plt.plot(x_model, y_model, label=f"{model_name} (Fit)")
    except:
        print(f"Could not fit curve for {model_name}")

plt.title("Semantic Gravity: Collapse Curves")
plt.xlabel("Semantic Pressure ($P_{sem}$)")
plt.ylabel("Failure Rate ($R_{fail}$)")
plt.legend()
plt.grid(True)
plt.savefig("collapse_curves.png")
plt.show()

In [None]:
# Step 5: Export & Download
import json
from datetime import datetime
from google.colab import files
import shutil

# Create metadata
metadata = {
    'timestamp': datetime.now().isoformat(),
    'models': GPT_MODELS,
    'qwen_model': QWEN_MODEL_ID,
    'num_prompts': len(prompts_df)
}
with open('experiment_details.json', 'w') as f:
    json.dump(metadata, f, indent=2)

# List of files to zip
files_to_zip = [
    'prompts.csv',
    'prompts_with_psem.csv',
    'experiment_details.json',
    'collapse_curves.png'
]

# Add result files
for model in GPT_MODELS:
    files_to_zip.append(f'results_{model}.csv')

# Zip everything
output_filename = 'experiment_results.zip'
with shutil.ZipFile(output_filename, 'w') as zipf:
    for file in files_to_zip:
        if os.path.exists(file):
            zipf.write(file)
        else:
            print(f'Warning: {file} not found')

# Download
print(f'Downloading {output_filename}...')
files.download(output_filename)