# Modal Diagnostics with Dynamic Cue Location

This notebook investigates why SENTENCE ≈ JABBERWOCKY for modals while other cue families show SENTENCE > JABBERWOCKY.

**Key Fix:** This version uses **DYNAMIC cue location** - it finds the cue word's actual position in each condition string, rather than assuming a fixed position. This makes scrambled baselines valid.

## Outputs
1. `modal_cue_alignment_log.txt` - Cue location statistics
2. `modal_next_token_diagnostics.md` - Top-30 predictions for sampled items
3. `modal_mass_decomposition.csv` - Mass breakdown by bucket
4. `modal_summary_altTargets.csv` - Summary with VerbOnly and VPStart
5. `modal_contrasts_altTargets.csv` - Statistical contrasts
6. `figure_modals_altTargets.png` - Paper-ready figure

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Create output directory
import os
OUTPUT_DIR = '/content/drive/MyDrive/morphosyntax_modal_diagnostics'
os.makedirs(OUTPUT_DIR, exist_ok=True)
print(f"Output will be saved to: {OUTPUT_DIR}")

In [None]:
# Install dependencies
!pip install transformers torch pandas numpy matplotlib scipy tqdm -q

In [None]:
import json
import random
import numpy as np
import pandas as pd
import torch
import matplotlib.pyplot as plt
from scipy import stats
from datetime import datetime
from transformers import AutoModelForCausalLM, AutoTokenizer
from tqdm.notebook import tqdm

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

## Configuration

In [None]:
# Configuration
MODEL_NAME = 'gpt2'  # Change to 'gpt2-medium', 'gpt2-large', etc. if desired

MODALS_LIST = {'can', 'will', 'could', 'would', 'should', 'must', 'may', 'might'}

# Target class definitions
VERB_SET = {
    'be', 'have', 'do', 'say', 'go', 'get', 'make', 'know', 'think', 'take',
    'see', 'come', 'want', 'use', 'find', 'give', 'tell', 'work', 'call', 'try',
    'ask', 'need', 'feel', 'become', 'leave', 'put', 'mean', 'keep', 'let', 'begin',
    'seem', 'help', 'show', 'hear', 'play', 'run', 'move', 'live', 'believe',
    'bring', 'happen', 'write', 'sit', 'stand', 'lose', 'pay', 'meet', 'continue',
    'set', 'learn', 'change', 'lead', 'understand', 'watch', 'follow', 'stop', 'create', 'speak',
    'read', 'allow', 'add', 'spend', 'grow', 'open', 'walk', 'win', 'teach', 'offer',
    'remember', 'love', 'consider', 'appear', 'buy', 'serve', 'die', 'send', 'build', 'stay',
    'fall', 'cut', 'reach', 'kill', 'raise', 'pass', 'sell', 'decide', 'return', 'explain',
    'hope', 'develop', 'carry', 'break', 'receive', 'agree', 'support', 'hit', 'produce', 'eat',
    'study', 'research', 'investigate', 'examine', 'analyze', 'explore',
    'paint', 'draw', 'design', 'construct', 'perform', 'practice',
    'publish', 'edit', 'revise', 'prepare', 'cook', 'repair', 'fix',
    'solve', 'calculate', 'improve', 'enhance', 'test', 'validate',
    'organize', 'arrange', 'defend', 'protect', 'film', 'record',
    'sail', 'navigate', 'discuss', 'debate', 'assemble', 'combine',
    'plan', 'schedule', 'finish', 'complete', 'start', 'end',
}

# BE/HAVE/DO auxiliary forms
BEHAVE_SET = {
    'be', 'been', 'being', 'am', 'is', 'are', 'was', 'were',
    'have', 'has', 'had', 'having',
    'do', 'does', 'did', 'done', 'doing',
}

# Negation tokens
NEG_SET = {'not', "n't", "nt"}

# Common adverbs that might follow modals
ADV_SET = {
    'also', 'always', 'never', 'ever', 'just', 'still', 'only', 'even',
    'already', 'often', 'soon', 'now', 'then', 'perhaps', 'probably',
    'certainly', 'definitely', 'possibly', 'actually', 'really', 'simply',
    'easily', 'quickly', 'slowly', 'well', 'better', 'best',
}

CONDITIONS = ['sentence', 'jabberwocky', 'full_scrambled', 'content_scrambled', 'function_scrambled']

## Helper Functions

In [None]:
def find_cue_position(condition_text, cue_word, expected_occurrence=1):
    """
    DYNAMICALLY locate the cue word in a condition string.
    
    This is the KEY FIX - instead of assuming cue is at a fixed position,
    we find where it actually is in each condition.
    
    Returns:
        tuple: (cue_index, status, message)
    """
    words = condition_text.lower().split()
    cue_lower = cue_word.lower()
    
    # Find all indices where word matches cue
    matches = []
    for i, w in enumerate(words):
        w_clean = w.strip('.,!?;:"\'-()"\'[]{}«»')
        if w_clean == cue_lower:
            matches.append(i)
    
    if len(matches) == 0:
        return None, 'missing', f"Cue '{cue_word}' not found in text"
    elif len(matches) == 1:
        return matches[0], 'ok', f"Cue found at position {matches[0]}"
    else:
        return matches[0], 'ambiguous', f"Cue found {len(matches)} times at {matches}, using first"


def is_word_start_token(tokenizer, token_id):
    """Check if token is word-start (space-prefixed)."""
    token_str = tokenizer.decode([token_id])
    if token_str in ['<|endoftext|>', '<unk>', '<pad>', '']:
        return False, None
    if token_str.startswith(' ') or token_str.startswith('\n'):
        word = token_str.strip().lower().strip('.,!?;:"\'-()')
        return True, word
    return False, None


def contains_negation(token_str):
    """Check if token contains negation."""
    token_lower = token_str.lower().strip()
    if token_lower in NEG_SET:
        return True
    if "n't" in token_lower or "nt" in token_lower:
        return True
    return False


def classify_token(word, token_str):
    """Classify a token into buckets."""
    if word is None:
        return 'OTHER'
    word_lower = word.lower()
    
    if contains_negation(token_str):
        return 'NEG'
    if word_lower in BEHAVE_SET:
        return 'BEHAVE'
    if word_lower in VERB_SET and word_lower not in BEHAVE_SET:
        return 'VERB'
    if word_lower in ADV_SET:
        return 'ADV'
    return 'OTHER'


def get_context_at_cue(text, cue_position):
    """Get context up to and including cue."""
    words = text.split()
    return ' '.join(words[:cue_position + 1])


def compute_mass_decomposition(probs, tokenizer, top_k=1000):
    """Compute probability mass in each bucket."""
    top_k_probs, top_k_ids = torch.topk(probs, min(top_k, len(probs)))
    
    mass = {'VERB': 0.0, 'BEHAVE': 0.0, 'NEG': 0.0, 'ADV': 0.0, 'OTHER': 0.0}
    
    for prob, token_id in zip(top_k_probs, top_k_ids):
        is_start, word = is_word_start_token(tokenizer, token_id.item())
        if not is_start:
            continue
        token_str = tokenizer.decode([token_id.item()])
        bucket = classify_token(word, token_str)
        mass[bucket] += prob.item()
    
    return mass


def get_top_predictions(probs, tokenizer, top_k=30):
    """Get top-k predictions with classification."""
    top_k_probs, top_k_ids = torch.topk(probs, min(top_k, len(probs)))
    
    predictions = []
    for prob, token_id in zip(top_k_probs, top_k_ids):
        token_str = tokenizer.decode([token_id.item()])
        is_start, word = is_word_start_token(tokenizer, token_id.item())
        
        if is_start:
            bucket = classify_token(word, token_str)
            in_verb_only = word.lower() in VERB_SET if word else False
        else:
            bucket = 'SUBWORD'
            in_verb_only = False
        
        predictions.append({
            'token': token_str,
            'prob': prob.item(),
            'word': word,
            'bucket': bucket,
            'in_VerbOnly': in_verb_only,
        })
    
    return predictions

## Load Model and Stimuli

In [None]:
# Load model
print(f"Loading model: {MODEL_NAME}")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
model.eval()

device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = model.to(device)
print(f"Using device: {device}")

In [None]:
# Upload stimuli_locked.json
from google.colab import files
print("Please upload stimuli_locked.json:")
uploaded = files.upload()

# Load stimuli
stimuli_file = list(uploaded.keys())[0]
with open(stimuli_file, 'r') as f:
    all_stimuli = json.load(f)

# Filter to modals only
modal_stimuli = [s for s in all_stimuli if s['cue_family'] == 'modals']
print(f"\nLoaded {len(modal_stimuli)} modal stimuli")

## Step 0: Cue Alignment Check with Dynamic Location

In [None]:
print("=" * 70)
print("STEP 0: Cue Alignment Check (Dynamic Location)")
print("=" * 70)

alignment_log = [
    "MODAL CUE ALIGNMENT LOG (DYNAMIC LOCATION)",
    f"Generated: {datetime.now().isoformat()}",
    "=" * 60,
    "",
    "This version DYNAMICALLY locates the cue in each condition,",
    "so scrambled conditions are now analyzed correctly.",
    ""
]

cue_location_stats = {cond: {'ok': 0, 'missing': 0, 'ambiguous': 0, 'positions': []} for cond in CONDITIONS}
issues_found = 0

for stim in modal_stimuli:
    item_id = stim['set_id']
    cue_word = stim['cue_word'].lower()
    
    if cue_word not in MODALS_LIST:
        alignment_log.append(f"[FAIL] Item {item_id}: cue_word '{cue_word}' not in MODALS_LIST")
        issues_found += 1
        continue
    
    for cond in CONDITIONS:
        text = stim[cond]
        cue_pos, status, message = find_cue_position(text, cue_word)
        
        cue_location_stats[cond][status] += 1
        if cue_pos is not None:
            cue_location_stats[cond]['positions'].append(cue_pos)
        
        if status == 'missing':
            alignment_log.append(f"[FAIL] Item {item_id} {cond}: {message}")
            issues_found += 1
        elif status == 'ambiguous':
            alignment_log.append(f"[WARN] Item {item_id} {cond}: {message}")

# Summary
alignment_log.extend(["", "=" * 60, "DYNAMIC CUE LOCATION SUMMARY", "=" * 60, ""])

print("\nCue location summary by condition:")
for cond in CONDITIONS:
    cue_stats = cue_location_stats[cond]  # Renamed to avoid shadowing scipy.stats
    total = cue_stats['ok'] + cue_stats['missing'] + cue_stats['ambiguous']
    valid = cue_stats['ok'] + cue_stats['ambiguous']
    positions = cue_stats['positions']
    
    if positions:
        pos_min, pos_max = min(positions), max(positions)
        pos_mean = sum(positions) / len(positions)
        pos_range = f"range [{pos_min}-{pos_max}], mean={pos_mean:.1f}"
    else:
        pos_range = "no valid positions"
    
    print(f"  {cond}: {valid}/{total} valid, positions {pos_range}")
    
    alignment_log.extend([
        f"{cond.upper()}:",
        f"  Valid items: {valid}/{total} ({100*valid/total:.1f}%)",
        f"  - ok: {cue_stats['ok']}, missing: {cue_stats['missing']}, ambiguous: {cue_stats['ambiguous']}",
        f"  Cue positions: {pos_range}",
        ""
    ])

alignment_log.extend([f"Total issues: {issues_found}", "Status: " + ("PASS" if issues_found == 0 else "ISSUES FOUND")])

with open(f'{OUTPUT_DIR}/modal_cue_alignment_log.txt', 'w') as f:
    f.write('\n'.join(alignment_log))
print(f"\nSaved: {OUTPUT_DIR}/modal_cue_alignment_log.txt")

## Step 1: Next-Token Diagnostics (10 sampled items)

In [None]:
print("=" * 70)
print("STEP 1: Next-Token Diagnostics (Dynamic Cue Location)")
print("=" * 70)

random.seed(42)
sampled_items = random.sample(modal_stimuli, min(10, len(modal_stimuli)))

diagnostics_md = [
    "# Modal Next-Token Diagnostics (Dynamic Cue Location)",
    f"\nGenerated: {datetime.now().isoformat()}",
    f"\nModel: {MODEL_NAME}",
    f"\nSampled items: {len(sampled_items)}",
    "\n**Note:** Cue position is dynamically located in each condition.",
    ""
]

for stim in tqdm(sampled_items, desc="Sampling diagnostics"):
    item_id = stim['set_id']
    cue_word = stim['cue_word']
    
    diagnostics_md.extend([f"\n## Item {item_id} (cue: '{cue_word}')", ""])
    
    for cond in CONDITIONS:
        text = stim[cond]
        cue_pos, status, message = find_cue_position(text, cue_word)
        
        diagnostics_md.append(f"### {cond.upper()}")
        
        if cue_pos is None:
            diagnostics_md.extend([f"**SKIPPED:** {message}", ""])
            continue
        
        context = get_context_at_cue(text, cue_pos)
        diagnostics_md.extend([
            f"**Cue position:** {cue_pos} ({status})",
            f"**Context:** `{context}`",
            f"**Full text:** `{text}`",
            ""
        ])
        
        # Get predictions
        inputs = tokenizer(context, return_tensors='pt').to(device)
        with torch.no_grad():
            outputs = model(**inputs)
        logits = outputs.logits[0, -1, :]
        probs = torch.softmax(logits, dim=-1).cpu()
        
        top_preds = get_top_predictions(probs, tokenizer, top_k=30)
        
        diagnostics_md.extend([
            "| Rank | Token | Prob | Word | Bucket | In VerbOnly |",
            "|------|-------|------|------|--------|-------------|"
        ])
        
        for i, p in enumerate(top_preds):
            token_display = p['token'].replace('|', '\\|').replace('\n', '\\n')
            in_vo = "Y" if p['in_VerbOnly'] else ""
            diagnostics_md.append(f"| {i+1} | `{token_display}` | {p['prob']:.4f} | {p['word']} | {p['bucket']} | {in_vo} |")
        
        diagnostics_md.append("")

with open(f'{OUTPUT_DIR}/modal_next_token_diagnostics.md', 'w') as f:
    f.write('\n'.join(diagnostics_md))
print(f"Saved: {OUTPUT_DIR}/modal_next_token_diagnostics.md")

## Step 2: Mass Decomposition for ALL Modal Items

In [None]:
print("=" * 70)
print("STEP 2: Mass Decomposition (Dynamic Cue Location)")
print("=" * 70)

decomposition_results = []
skipped_counts = {cond: 0 for cond in CONDITIONS}

for stim in tqdm(modal_stimuli, desc="Computing mass decomposition"):
    item_id = stim['set_id']
    cue_word = stim['cue_word']
    
    for cond in CONDITIONS:
        text = stim[cond]
        
        # DYNAMICALLY locate cue
        cue_pos, status, message = find_cue_position(text, cue_word)
        
        if cue_pos is None:
            skipped_counts[cond] += 1
            continue
        
        context = get_context_at_cue(text, cue_pos)
        
        inputs = tokenizer(context, return_tensors='pt').to(device)
        with torch.no_grad():
            outputs = model(**inputs)
        logits = outputs.logits[0, -1, :]
        probs = torch.softmax(logits, dim=-1).cpu()
        
        mass = compute_mass_decomposition(probs, tokenizer, top_k=1000)
        
        decomposition_results.append({
            'item_id': item_id,
            'condition': cond.upper(),
            'cue_word': cue_word,
            'cue_position': cue_pos,
            'mass_VERB': mass['VERB'],
            'mass_BEHAVE': mass['BEHAVE'],
            'mass_NEG': mass['NEG'],
            'mass_ADV': mass['ADV'],
            'mass_OTHER': mass['OTHER'],
            'mass_VerbOnly': mass['VERB'],
            'mass_VPStart': mass['VERB'] + mass['BEHAVE'] + mass['NEG'],
        })

# Report skipped items
print("\nSkipped items (missing cue):")
for cond in CONDITIONS:
    if skipped_counts[cond] > 0:
        print(f"  {cond}: {skipped_counts[cond]} items skipped")

decomp_df = pd.DataFrame(decomposition_results)
decomp_df.to_csv(f'{OUTPUT_DIR}/modal_mass_decomposition.csv', index=False)
print(f"\nSaved: {OUTPUT_DIR}/modal_mass_decomposition.csv")
print(f"Total records: {len(decomp_df)}")

## Step 3: Summary and Statistical Contrasts

In [None]:
print("=" * 70)
print("STEP 3: Alternate Target Definitions")
print("=" * 70)

# Summary table
summary_alt = decomp_df.groupby('condition').agg({
    'mass_VerbOnly': ['mean', 'std', 'count'],
    'mass_VPStart': ['mean', 'std'],
}).reset_index()

summary_alt.columns = ['condition', 'VerbOnly_mean', 'VerbOnly_std', 'n', 'VPStart_mean', 'VPStart_std']
summary_alt['VerbOnly_se'] = summary_alt['VerbOnly_std'] / np.sqrt(summary_alt['n'])
summary_alt['VPStart_se'] = summary_alt['VPStart_std'] / np.sqrt(summary_alt['n'])

summary_alt.to_csv(f'{OUTPUT_DIR}/modal_summary_altTargets.csv', index=False)
print(f"\nSaved: {OUTPUT_DIR}/modal_summary_altTargets.csv")
print("\nSummary:")
print(summary_alt[['condition', 'VerbOnly_mean', 'VerbOnly_se', 'VPStart_mean', 'VPStart_se', 'n']].round(4).to_string(index=False))

In [None]:
# Statistical contrasts
contrasts_results = []
contrast_pairs = [
    ('SENTENCE', 'JABBERWOCKY'),
    ('JABBERWOCKY', 'FULL_SCRAMBLED'),
    ('JABBERWOCKY', 'CONTENT_SCRAMBLED'),
    ('JABBERWOCKY', 'FUNCTION_SCRAMBLED'),
]

for target_col in ['mass_VerbOnly', 'mass_VPStart']:
    target_name = 'VerbOnly' if 'VerbOnly' in target_col else 'VPStart'
    
    for cond_a, cond_b in contrast_pairs:
        df_a = decomp_df[decomp_df['condition'] == cond_a].set_index('item_id')[target_col]
        df_b = decomp_df[decomp_df['condition'] == cond_b].set_index('item_id')[target_col]
        
        common = df_a.index.intersection(df_b.index)
        if len(common) == 0:
            continue
        
        x, y = df_a.loc[common].values, df_b.loc[common].values
        diff = np.mean(x) - np.mean(y)
        t_stat, p_val = stats.ttest_rel(x, y)
        d = np.mean(x - y) / np.std(x - y, ddof=1) if np.std(x - y, ddof=1) > 0 else 0
        
        contrasts_results.append({
            'target_def': target_name,
            'contrast': f"{cond_a} - {cond_b}",
            'mean_a': np.mean(x),
            'mean_b': np.mean(y),
            'diff': diff,
            't_stat': t_stat,
            'p_value': p_val,
            'cohens_d': d,
            'n': len(common),
        })

contrasts_df = pd.DataFrame(contrasts_results)
contrasts_df.to_csv(f'{OUTPUT_DIR}/modal_contrasts_altTargets.csv', index=False)
print(f"\nSaved: {OUTPUT_DIR}/modal_contrasts_altTargets.csv")
print("\nContrasts:")
print(contrasts_df[['target_def', 'contrast', 'diff', 'p_value', 'cohens_d', 'n']].round(4).to_string(index=False))

## Step 4: Generate Figure

In [None]:
print("=" * 70)
print("STEP 4: Generate Figure")
print("=" * 70)

fig, axes = plt.subplots(1, 2, figsize=(12, 5))

conditions_order = ['SENTENCE', 'JABBERWOCKY', 'FULL_SCRAMBLED', 'CONTENT_SCRAMBLED', 'FUNCTION_SCRAMBLED']
condition_labels = ['Sent', 'Jab', 'Full\nScram', 'Cont\nScram', 'Func\nScram']
colors = ['#2ecc71', '#3498db', '#e74c3c', '#f39c12', '#9b59b6']

for ax_idx, (target_col, title) in enumerate([('mass_VerbOnly', 'VerbOnly (VERB only)'), 
                                               ('mass_VPStart', 'VPStart (VERB+BE/HAVE+NEG)')]):
    ax = axes[ax_idx]
    
    means = []
    ses = []
    for cond in conditions_order:
        cond_data = decomp_df[decomp_df['condition'] == cond][target_col]
        if len(cond_data) > 0:
            means.append(cond_data.mean())
            ses.append(cond_data.std() / np.sqrt(len(cond_data)))
        else:
            means.append(0)
            ses.append(0)
    
    x = np.arange(len(conditions_order))
    bars = ax.bar(x, means, yerr=ses, capsize=4, color=colors, alpha=0.8, edgecolor='white', linewidth=1.5)
    
    ax.set_title(f'Modal Target: {title}', fontweight='bold', fontsize=12)
    ax.set_xticks(x)
    ax.set_xticklabels(condition_labels, fontsize=9)
    ax.set_ylabel('Target Class Mass', fontsize=11)
    ax.set_ylim(0, max(means) * 1.35 if max(means) > 0 else 1)
    
    # Add value labels
    for i, (m, s) in enumerate(zip(means, ses)):
        if m > 0:
            ax.text(i, m + s + 0.01, f'{m:.3f}', ha='center', va='bottom', fontsize=9)
    
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)

plt.suptitle(f'Modal Cue Family - Dynamic Cue Location ({MODEL_NAME})', fontweight='bold', fontsize=14, y=1.02)
plt.tight_layout()
plt.savefig(f'{OUTPUT_DIR}/figure_modals_altTargets.png', dpi=300, bbox_inches='tight', facecolor='white')
plt.show()
print(f"\nSaved: {OUTPUT_DIR}/figure_modals_altTargets.png")

## Summary and Interpretation

In [None]:
print("=" * 70)
print("DIAGNOSTIC SUMMARY")
print("=" * 70)

# Key comparison
sent_vo = decomp_df[decomp_df['condition'] == 'SENTENCE']['mass_VerbOnly'].mean()
jab_vo = decomp_df[decomp_df['condition'] == 'JABBERWOCKY']['mass_VerbOnly'].mean()
sent_vp = decomp_df[decomp_df['condition'] == 'SENTENCE']['mass_VPStart'].mean()
jab_vp = decomp_df[decomp_df['condition'] == 'JABBERWOCKY']['mass_VPStart'].mean()

print(f"\nSENTENCE vs JABBERWOCKY comparison:")
print(f"  VerbOnly:  SENT={sent_vo:.4f}, JAB={jab_vo:.4f}, diff={sent_vo-jab_vo:+.4f}")
print(f"  VPStart:   SENT={sent_vp:.4f}, JAB={jab_vp:.4f}, diff={sent_vp-jab_vp:+.4f}")

# Get p-values
vo_contrast = contrasts_df[(contrasts_df['target_def'] == 'VerbOnly') & (contrasts_df['contrast'] == 'SENTENCE - JABBERWOCKY')]
vp_contrast = contrasts_df[(contrasts_df['target_def'] == 'VPStart') & (contrasts_df['contrast'] == 'SENTENCE - JABBERWOCKY')]

if len(vo_contrast) > 0:
    print(f"  VerbOnly p-value: {vo_contrast['p_value'].values[0]:.6f}, d={vo_contrast['cohens_d'].values[0]:.2f}")
if len(vp_contrast) > 0:
    print(f"  VPStart p-value:  {vp_contrast['p_value'].values[0]:.6f}, d={vp_contrast['cohens_d'].values[0]:.2f}")

# Scrambled baselines (NOW VALID with dynamic cue location)
print("\n" + "=" * 70)
print("SCRAMBLED BASELINES (Now Valid with Dynamic Cue Location)")
print("=" * 70)

for contrast_name in ['JABBERWOCKY - FULL_SCRAMBLED', 'JABBERWOCKY - FUNCTION_SCRAMBLED']:
    print(f"\n{contrast_name}:")
    for target in ['VerbOnly', 'VPStart']:
        row = contrasts_df[(contrasts_df['target_def'] == target) & (contrasts_df['contrast'] == contrast_name)]
        if len(row) > 0:
            r = row.iloc[0]
            sig = "***" if r['p_value'] < 0.001 else "**" if r['p_value'] < 0.01 else "*" if r['p_value'] < 0.05 else "n.s."
            print(f"  {target}: diff={r['diff']:+.4f}, p={r['p_value']:.4f} {sig}, d={r['cohens_d']:.2f}")

In [None]:
print("\n" + "=" * 70)
print("INTERPRETATION (with Dynamic Cue Location)")
print("=" * 70)
print("\nNote: This analysis uses DYNAMIC cue location for all conditions,")
print("so scrambled baselines now correctly measure 'prediction after the")
print("modal cue with disrupted structure' - not arbitrary position 2.")
print()

if sent_vp > jab_vp and (sent_vo - jab_vo) < (sent_vp - jab_vp):
    print("-> The VerbOnly target definition appears to be an ARTIFACT.")
    print("   When BE/HAVE/NEG are included (VPStart), SENTENCE > JABBERWOCKY as expected.")
    print("   Real sentences use more auxiliary/negation continuations after modals.")
else:
    print("-> Modals appear to be genuinely CUE-DRIVEN.")
    print("   Even with expanded target definition, SENTENCE ~ JABBERWOCKY.")
    print("   The modal cue alone saturates morphosyntactic constraint.")

print("\n" + "=" * 70)
print("OUTPUT FILES")
print("=" * 70)
print(f"1. {OUTPUT_DIR}/modal_cue_alignment_log.txt")
print(f"2. {OUTPUT_DIR}/modal_next_token_diagnostics.md")
print(f"3. {OUTPUT_DIR}/modal_mass_decomposition.csv")
print(f"4. {OUTPUT_DIR}/modal_summary_altTargets.csv")
print(f"5. {OUTPUT_DIR}/modal_contrasts_altTargets.csv")
print(f"6. {OUTPUT_DIR}/figure_modals_altTargets.png")