# Baseline Experiments - All Perturbations

This notebook runs baseline rating experiments for all perturbations using Qwen3-8B.

**Perturbations:**
- `add_typos` (0.3, 0.5, 0.7 probability)
- `change_dosage`
- `remove_sentences` (30%, 50%, 70%)
- `add_confusion`

**Levels:**
- `coarse` (answer-level)
- `fine` (sentence-level)

## Setup

In [None]:
import sys
import os
import json
import random
from pathlib import Path

# Add code directory to path
project_root = Path.cwd().parent
code_dir = project_root / 'code'
sys.path.insert(0, str(code_dir))

from helpers.experiment_utils import (
    setup_paths,
    load_qa_data,
    get_processed_ids,
    clean_model_name,
    get_id_key,
    get_or_create_perturbations,
    get_or_create_original_ratings,
    save_result
)
from helpers.multi_llm_inference import get_provider_from_model
from perturbation_pipeline import load_prompt, get_rating_with_averaging

print("‚úì Imports successful")

## Configuration

In [None]:
# Experiment configuration
MODEL = 'Qwen3-8B'
SEED = 42
NUM_RUNS = 5  # Number of rating runs to average

# Which perturbations to run (set to None to run all)
PERTURBATIONS = None  # ['add_typos', 'change_dosage', 'remove_sentences', 'add_confusion']

# Which level to run
LEVEL = 'both'  # 'coarse', 'fine', or 'both'

# BACKUP CONFIGURATION
# Set this to the directory where you want to backup existing results before running new experiments
# If None, no backup will be created
BACKUP_DIR = project_root / 'output' / 'cqa_eval' / 'experiment_results' / 'backup'
# Example: BACKUP_DIR = project_root / 'output' / 'cqa_eval' / 'experiment_results' / 'backup_2026_01_30'

# Set random seed
random.seed(SEED)

print(f"Model: {MODEL}")
print(f"Provider: {get_provider_from_model(MODEL)}")
print(f"Random seed: {SEED}")
print(f"Level: {LEVEL}")
if BACKUP_DIR:
    print(f"Backup directory: {BACKUP_DIR}")

## Load Paths and Data

In [None]:
# Setup paths
paths = setup_paths()
output_dir = paths['output_dir']
model_name_clean = clean_model_name(MODEL)

print(f"Output directory: {output_dir}")
print(f"Coarse data: {paths['coarse_data_path']}")
print(f"Fine data: {paths['fine_data_path']}")
print(f"Prompts: {paths['prompts_dir']}")

## Define Perturbations

In [None]:
# All perturbations with their parameter variations
all_perturbations = {
    'add_typos': {'typo_prob': [0.3, 0.5, 0.7], 'remove_pct': [0.3]},
    'change_dosage': {'typo_prob': [0.5], 'remove_pct': [0.3]},
    'remove_sentences': {'typo_prob': [0.5], 'remove_pct': [0.3, 0.5, 0.7]},
    'add_confusion': {'typo_prob': [0.5], 'remove_pct': [0.3]}
}

if PERTURBATIONS is None:
    perturbations_to_run = all_perturbations
else:
    perturbations_to_run = {k: all_perturbations[k] for k in PERTURBATIONS}

print(f"Perturbations to run: {list(perturbations_to_run.keys())}")

## Backup Existing Results

Before running experiments, backup any existing result files to preserve previous runs.

In [None]:
import shutil
from datetime import datetime

def backup_existing_results(backup_dir, output_dir, model_name_clean, perturbations_to_run, levels):
    """
    Backup existing result files before running new experiments.
    
    Args:
        backup_dir: Path to backup directory
        output_dir: Base output directory
        model_name_clean: Cleaned model name
        perturbations_to_run: Dict of perturbations to process
        levels: List of levels to process
    
    Returns:
        Number of files backed up
    """
    if backup_dir is None:
        print("‚ö†Ô∏è  No backup directory configured. Skipping backup.")
        return 0
    
    # Create backup directory with timestamp
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    backup_path = Path(backup_dir) / f"backup_{timestamp}"
    backup_path.mkdir(parents=True, exist_ok=True)
    
    print(f"üì¶ Backing up existing results to: {backup_path}")
    
    baseline_dir = Path(output_dir) / 'experiment_results' / 'baseline'
    files_backed_up = 0
    
    for level in levels:
        for perturbation_name, params in perturbations_to_run.items():
            perturbation_dir = baseline_dir / perturbation_name
            
            if not perturbation_dir.exists():
                continue
            
            # Iterate over parameter combinations to find all possible files
            for remove_pct in params['remove_pct']:
                for typo_prob in params['typo_prob']:
                    # Determine output filename
                    if perturbation_name == 'remove_sentences':
                        pct_str = str(int(remove_pct * 100))
                        output_filename = f"{perturbation_name}_{pct_str}pct_{level}_{model_name_clean}_rating.jsonl"
                    elif perturbation_name == 'add_typos':
                        prob_str = str(typo_prob).replace('.', '')
                        output_filename = f"{perturbation_name}_{prob_str}prob_{level}_{model_name_clean}_rating.jsonl"
                    else:
                        output_filename = f"{perturbation_name}_{level}_{model_name_clean}_rating.jsonl"
                    
                    source_file = perturbation_dir / output_filename
                    
                    if source_file.exists():
                        # Create perturbation subdirectory in backup
                        backup_subdir = backup_path / perturbation_name
                        backup_subdir.mkdir(parents=True, exist_ok=True)
                        
                        # Copy file to backup
                        dest_file = backup_subdir / output_filename
                        shutil.copy2(source_file, dest_file)
                        files_backed_up += 1
                        print(f"  ‚úì Backed up: {perturbation_name}/{output_filename}")
    
    if files_backed_up > 0:
        print(f"\n‚úÖ Backed up {files_backed_up} files to: {backup_path}")
    else:
        print(f"\n‚ÑπÔ∏è  No existing files to backup")
    
    return files_backed_up

# Perform backup
levels = ['coarse', 'fine'] if LEVEL == 'both' else [LEVEL]
backup_existing_results(BACKUP_DIR, output_dir, model_name_clean, perturbations_to_run, levels)

## Experiment Loop

In [None]:
# Determine which levels to process
levels = ['coarse', 'fine'] if LEVEL == 'both' else [LEVEL]

for level in levels:
    print(f"\n{'='*80}")
    print(f"LEVEL: {level.upper()}")
    print(f"{'='*80}")

    # Load data
    data_path = paths['coarse_data_path'] if level == 'coarse' else paths['fine_data_path']
    all_qa_pairs = load_qa_data(data_path)
    print(f"Loaded {len(all_qa_pairs)} QA pairs")

    # For fine level, filter to only IDs that exist in original ratings
    if level == 'fine':
        original_ratings_filename = f"original_{level}_{model_name_clean}_rating.jsonl"
        original_ratings_path = os.path.join(output_dir, 'original_ratings', original_ratings_filename)

        if os.path.exists(original_ratings_path):
            # Load IDs from original ratings file
            rated_ids = set()
            with open(original_ratings_path, 'r') as f:
                for line in f:
                    entry = json.loads(line)
                    # Get ID using the appropriate key
                    id_key_temp = get_id_key([entry])
                    rated_ids.add(entry[id_key_temp])

            print(f"Found {len(rated_ids)} entries in original fine ratings file")
            print(f"Filtering fine data to only process these {len(rated_ids)} entries")

            # Filter qa_pairs to only include rated IDs
            id_key = get_id_key(all_qa_pairs)
            qa_pairs = [qa for qa in all_qa_pairs if qa[id_key] in rated_ids]
            print(f"After filtering: {len(qa_pairs)} examples to process")
        else:
            print(f"Warning: Original ratings file not found at {original_ratings_path}")
            print(f"Processing all {len(all_qa_pairs)} fine examples")
            qa_pairs = all_qa_pairs
    else:
        qa_pairs = all_qa_pairs

    id_key = get_id_key(qa_pairs)

    # Select prompt
    prompt_path = os.path.join(paths['prompts_dir'], f'{level}prompt_system.txt')

    # Step 1: Get/compute original ratings
    print(f"\n{'-'*80}")
    print("STEP 1: ORIGINAL RATINGS")
    print(f"{'-'*80}")

    # For fine level, skip computing missing ratings (use only existing ones)
    skip_missing = (level == 'fine')

    original_ratings_dict = get_or_create_original_ratings(
        qa_pairs=qa_pairs,
        level=level,
        prompt_path=prompt_path,
        model=MODEL,
        output_dir=output_dir,
        model_name_clean=model_name_clean,
        num_runs=NUM_RUNS,
        skip_missing=skip_missing
    )

    # Filter qa_pairs to only include IDs that have original ratings
    # This is especially important for fine level where we skip computing missing ratings
    qa_pairs = [qa for qa in qa_pairs if qa[id_key] in original_ratings_dict]
    print(f"‚úì Processing {len(qa_pairs)} examples with original ratings")

    # Step 2: Process each perturbation
    print(f"\n{'-'*80}")
    print("STEP 2: PERTURBATIONS")
    print(f"{'-'*80}")

    # Create baseline experiment directory
    baseline_dir = os.path.join(output_dir, 'experiment_results', 'baseline')
    os.makedirs(baseline_dir, exist_ok=True)

    for perturbation_name, params in perturbations_to_run.items():
        print(f"\n[{perturbation_name.upper()}]")

        # Create perturbation-specific subdirectory
        perturbation_dir = os.path.join(baseline_dir, perturbation_name)
        os.makedirs(perturbation_dir, exist_ok=True)

        # Iterate over parameter combinations
        for remove_pct in params['remove_pct']:
            for typo_prob in params['typo_prob']:
                # Determine output filename
                if perturbation_name == 'remove_sentences':
                    pct_str = str(int(remove_pct * 100))
                    output_filename = f"{perturbation_name}_{pct_str}pct_{level}_{model_name_clean}_rating.jsonl"
                elif perturbation_name == 'add_typos':
                    prob_str = str(typo_prob).replace('.', '')
                    output_filename = f"{perturbation_name}_{prob_str}prob_{level}_{model_name_clean}_rating.jsonl"
                else:
                    output_filename = f"{perturbation_name}_{level}_{model_name_clean}_rating.jsonl"

                output_path = os.path.join(perturbation_dir, output_filename)

                # Check which entries have already been processed
                processed_ids = get_processed_ids(output_path)
                remaining_qa_pairs = [qa for qa in qa_pairs if qa[id_key] not in processed_ids]

                if len(remaining_qa_pairs) == 0:
                    print(f"  ‚úì {output_filename}: All {len(qa_pairs)} entries complete")
                    continue

                print(f"  Processing: {output_filename}")
                print(f"    {len(remaining_qa_pairs)} remaining (out of {len(qa_pairs)})")

                # Load or generate perturbations
                perturbations_dict = get_or_create_perturbations(
                    perturbation_name=perturbation_name,
                    level=level,
                    qa_pairs=qa_pairs,
                    typo_prob=typo_prob,
                    remove_pct=remove_pct,
                    seed=SEED,
                    output_dir=output_dir
                )

                # Process each QA pair
                import time
                for idx, qa_pair in enumerate(remaining_qa_pairs, 1):
                    question = qa_pair['question']
                    original_answer = qa_pair['answer']

                    # Get pre-generated perturbation
                    perturbation_entry = perturbations_dict.get(qa_pair[id_key])

                    if perturbation_entry is None:
                        print(f"    Skipping {qa_pair[id_key]} - no perturbation found")
                        continue

                    perturbed_answer = perturbation_entry['perturbed_answer']

                    # Get perturbed rating
                    print(f"    [{idx}/{len(remaining_qa_pairs)}] {qa_pair[id_key]}...", end=" ")
                    start_time = time.time()
                    perturbed_rating = get_rating_with_averaging(
                        question, perturbed_answer, *load_prompt(prompt_path),
                        MODEL, num_runs=NUM_RUNS
                    )
                    elapsed_time = time.time() - start_time
                    print(f"{elapsed_time:.1f}s")

                    # Get original rating from dict
                    original_rating = original_ratings_dict.get(qa_pair[id_key])

                    if original_rating is None:
                        print(f"    WARNING: No original rating found for {qa_pair[id_key]}, skipping...")
                        continue

                    # Build result
                    result = qa_pair.copy()
                    result['perturbation'] = perturbation_name
                    result['perturbed_answer'] = perturbed_answer
                    result['original_rating'] = original_rating
                    result['perturbed_rating'] = perturbed_rating
                    result['random_seed'] = SEED

                    # Add perturbation metadata
                    for key in ['typo_probability', 'removal_percentage', 'change_counts', 'skip_reason']:
                        if key in perturbation_entry:
                            result[key] = perturbation_entry[key]

                    # Save to file
                    save_result(output_path, result)

print(f"\n{'='*80}")
print("BASELINE EXPERIMENTS COMPLETED")
print(f"{'='*80}")

## Summary

Results saved to: `output/cqa_eval/experiment_results/baseline/`