# Semantic Gravity Experiment - Inference Notebook

This notebook runs inference on Qwen 2.5-7B-Instruct in Google Colab with A100.

**Prerequisites:**
- Google Colab with A100 GPU runtime
- Qwen model files in Google Drive
- Source files copied to Drive

## 1. Environment Setup

In [None]:
# Install required packages
!pip install -q torch transformers accelerate tokenizers numpy pandas scipy scikit-learn matplotlib tqdm requests wordfreq

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Configuration - UPDATE THESE PATHS
MODEL_PATH = "/content/drive/MyDrive/models/Qwen2.5-7B-Instruct"  # Path to Qwen model
DATA_ROOT = "/content/drive/MyDrive/SemanticGravity"  # Root for experiment data
SRC_PATH = "/content/drive/MyDrive/SemanticGravity/src"  # Path to source files

import sys
sys.path.insert(0, SRC_PATH)

In [None]:
# Import experiment modules
from config import CONFIG, PROMPT_TEMPLATES, setup_directories, validate_environment
from utils import (
    set_seed, ModelWrapper, setup_logging,
    normalize_for_match, word_in_text, find_word_occurrences,
    generate_surface_variants, compute_token_char_spans, map_word_to_tokens
)

# Validate environment
metadata = validate_environment()
print(f"GPU: {metadata.get('gpu_name', 'N/A')}")
print(f"Transformers compatible: {metadata.get('transformers_compatible', False)}")

In [None]:
# Set seeds for reproducibility
set_seed(42)

# Setup directories
dirs = setup_directories()
print("Directories created:", list(dirs.keys()))

## 2. Load Model

In [None]:
# Load model (singleton pattern - only loads once)
wrapper = ModelWrapper.get_instance()
wrapper.load(model_path=MODEL_PATH)

# Verify model loaded
print(f"Model loaded: {wrapper.is_loaded}")
print(f"Vocab size: {len(wrapper.tokenizer)}")

## 3. Test Generation

In [None]:
# Test basic generation
test_prompt = "Answer with exactly one English word.\nQuestion: The capital of France is ____.\nAnswer:"

result = wrapper.generate(
    prompt=test_prompt,
    max_new_tokens=8,
    do_sample=False,  # Greedy for testing
)

print(f"Prompt: {test_prompt}")
print(f"Generated: {result['generated_text']}")

In [None]:
# Test with negative instruction
test_prompt_neg = """Answer with exactly one English word.
Do not use the word "Paris" anywhere in your answer.
Question: The capital of France is ____.
Answer:"""

result_neg = wrapper.generate(
    prompt=test_prompt_neg,
    max_new_tokens=8,
    do_sample=False,
)

print(f"Prompt (with constraint): {test_prompt_neg}")
print(f"Generated: {result_neg['generated_text']}")

## 4. Utility Functions Demo

In [None]:
# Test word detection
target = "Paris"
completion = "Paris is beautiful."

# Word present?
print(f"Word '{target}' in '{completion}': {word_in_text(target, completion)}")

# Find occurrences
occs = find_word_occurrences(target, completion)
print(f"Occurrences: {occs}")

# Normalize
print(f"Normalized: {normalize_for_match(completion)}")

In [None]:
# Test token mapping
text = "Paris"
token_ids = wrapper.tokenizer.encode(text, add_special_tokens=False)
decoded = wrapper.tokenizer.decode(token_ids)

print(f"Text: {text}")
print(f"Token IDs: {token_ids}")
print(f"Decoded: {decoded}")

# Get char spans
spans = compute_token_char_spans(token_ids, wrapper.tokenizer)
print(f"Char spans: {spans}")

## 5. Template for Running Experiments

The cells below show the structure for running the main experiment. 
Actual experiment code will be added in later modules.

In [None]:
# Example: Format prompt using templates
from string import Template

question = "The capital of France is ____."
target = "Paris"

baseline_prompt = PROMPT_TEMPLATES['baseline'].format(question=question)
negative_prompt = PROMPT_TEMPLATES['negative_instruction'].format(question=question, target=target)

print("Baseline prompt:")
print(baseline_prompt)
print("\nNegative instruction prompt:")
print(negative_prompt)

In [None]:
# Cleanup when done
# wrapper.unload()  # Uncomment to free GPU memory