In [None]:
# Mock Setup - Hidden in rendered documentation
# This cell is tagged with "hide-cell" in notebook metadata

import tempfile
import sys
import os
import hashlib
import json
from pathlib import Path
from unittest.mock import Mock, MagicMock, patch, PropertyMock
from typing import Any, Dict, List
from datetime import datetime
import pandas as pd

# Add karenina to path
sys.path.insert(0, "/Users/carli/Projects/karenina-monorepo/karenina/src")

# Temporary directory for file operations
TEMP_DIR = Path(tempfile.mkdtemp(prefix="karenina_docs_"))

# Create sample Excel/CSV file for extraction examples
SAMPLE_EXCEL_DATA = {
    "Question": [
        "How many chromosomes are in a human somatic cell?",
        "What is the approved drug target of Venetoclax?",
        "How many protein subunits does hemoglobin A have?"
    ],
    "Answer": ["46", "BCL2", "4"],
    "Author": ["Dr. Smith", "Dr. Chen", "Dr. Smith"],
    "Keywords": [
        "genetics, chromosomes",
        "pharmacology, cancer",
        "proteins, hemoglobin"
    ]
}

# Create sample Excel file
SAMPLE_DF = pd.DataFrame(SAMPLE_EXCEL_DATA)
SAMPLE_EXCEL_PATH = TEMP_DIR / "sample_questions.xlsx"
SAMPLE_DF.to_excel(SAMPLE_EXCEL_PATH, index=False)

# Create sample CSV file
SAMPLE_CSV_PATH = TEMP_DIR / "sample_questions.csv"
SAMPLE_DF.to_csv(SAMPLE_CSV_PATH, index=False)

print("✓ Mock setup complete")
print(f"✓ Temp directory: {TEMP_DIR}")
print(f"✓ Sample files created for extraction examples")
print(f"✓ Karenina package loaded from: /Users/carli/Projects/karenina-monorepo/karenina/src")

# Adding Questions to a Benchmark

This guide covers different methods for adding questions to your benchmark, including manual creation and automatic extraction from files.

**Quick Navigation:**

- [Manual Question Creation](#manual-question-creation) - Add questions directly with code
- [Automatic Question Extraction](#automatic-question-extraction-from-files) - Extract from Excel, CSV, TSV files
- [Working with Questions](#working-with-questions) - Access, list, and inspect questions
- [Question Organization](#question-organization) - Keywords, tags, and batch addition
- [Complete Example](#complete-example) - End-to-end workflow with manual and file-based addition

## Manual Question Creation

### Basic Question Addition

Add questions directly using the `add_question` method:

In [None]:
from karenina import Benchmark

benchmark = Benchmark.create(name="Genomics Knowledge Benchmark")

# Add a simple question
question_id = benchmark.add_question(
    question="How many chromosomes are in a human somatic cell?",
    raw_answer="46"
)

print(f"Question added with ID: {question_id}")

The `add_question` method returns a unique question ID that you can use to reference the question later.

### Questions with Author Information

Add author metadata to track question provenance:

In [None]:
# Add question with author information
question_id = benchmark.add_question(
    question="What is the approved drug target of Venetoclax?",
    raw_answer="BCL2",
    author={
        "name": "Dr. Sarah Chen",
        "email": "schen@research.edu"
    }
)

print(f"Question added with ID: {question_id}")

## Automatic Question Extraction from Files

Karenina provides utilities to extract questions from various file formats automatically. This is useful when you have existing question sets in spreadsheets or structured files.

### Supported File Types

- **Excel** (.xlsx, .xls)
- **CSV** (Comma-separated values)
- **TSV** (Tab-separated values)

### Basic File Extraction

Extract questions from a file and add them to your benchmark:

In [None]:
from karenina.domain.questions.extractor import extract_questions_from_file

# Extract questions from an Excel file
# Returns list of tuples: (Question object, metadata dict)
questions = extract_questions_from_file(
    file_path=str(SAMPLE_EXCEL_PATH),
    question_column="Question",
    answer_column="Answer"
)

# Add all extracted questions to benchmark
for question_obj, metadata in questions:
    benchmark.add_question(question_obj)

print(f"Added {len(questions)} questions from file")

### Example Excel/CSV Format

Here's a sample spreadsheet structure that works well with the extraction utility:

| Question | Answer | Author | Keywords |
|----------|--------|--------|----------|
| How many chromosomes are in a human somatic cell? | 46 | Dr. Smith | genetics, chromosomes |
| What is the approved drug target of Venetoclax? | BCL2 | Dr. Chen | pharmacology, cancer |
| How many protein subunits does hemoglobin A have? | 4 | Dr. Smith | proteins, hemoglobin |

### Automatic Data Cleaning

The extraction process automatically performs several data cleaning steps:

1. **Whitespace normalization** - Removes leading/trailing spaces and normalizes internal spacing
2. **Empty row filtering** - Skips rows where essential fields (question content) are empty
3. **Encoding detection** - Automatically detects and handles different text encodings
4. **Type coercion** - Converts string representations to appropriate data types
5. **Null value handling** - Replaces various null indicators (`null`, `None`, `N/A`, empty strings) with proper null values
6. **Column name normalization** - Standardizes column headers to consistent naming conventions

### Advanced Extraction with Optional Columns

You can extract additional metadata by specifying optional column names:

In [None]:
# Extract with author and keyword metadata
questions = extract_questions_from_file(
    file_path=str(SAMPLE_EXCEL_PATH),
    question_column="Question",
    answer_column="Answer",
    author_name_column="Author",      # Optional: author name
    keywords_column="Keywords"         # Optional: comma-separated keywords
)

# Each result is a tuple of (Question object, metadata dict)
for question_obj, metadata in questions:
    print(f"Question: {question_obj.question[:50]}...")
    author = metadata.get("author", {})
    print(f"  Author: {author.get('name', 'N/A')}")
    print(f"  Keywords: {metadata.get('keywords', [])}")
    print()

## Working with Questions

### Accessing Questions

Once you've added questions, you can access them using their question IDs:

In [None]:
# Get a specific question by ID
question = benchmark.get_question(question_id)

# Access question attributes (returns a dictionary)
print(f"Question text: {question['question']}")
print(f"Expected answer: {question['raw_answer']}")
print(f"Author: {question.get('author', {})}")

### Listing All Questions

In [None]:
# Get all question IDs
question_ids = list(benchmark.get_question_ids())

# Iterate through all questions
for qid in question_ids:
    question = benchmark.get_question(qid)
    print(f"{qid[:8]}...: {question['question'][:50]}...")

### Question Attributes

Each question in Karenina has the following key attributes:

| Attribute | Type | Description |
|-----------|------|-------------|
| `question` | `str` | The question text or prompt |
| `raw_answer` | `str` | The expected answer |
| `author` | `dict` | Author information (name, email) |
| `keywords` | `list[str]` | Searchable keywords or tags |
| `question_id` | `str` | Unique identifier (MD5 hash) |

## Question Organization

### Using Keywords (Tags)

Keywords (also called tags) help organize and filter questions. You can add keywords using the `Question` object with its `tags` field:

In [None]:
from karenina.schemas.domain import Question

# Create Question objects with tags
q1 = Question(
    question="What is the role of telomerase in cell division?",
    raw_answer="Telomerase adds telomeric sequences to chromosome ends",
    tags=["cell-biology", "telomeres", "aging"]
)

q2 = Question(
    question="Describe the structure of a nucleosome",
    raw_answer="DNA wrapped around histone octamer",
    tags=["chromatin", "epigenetics", "dna-structure"]
)

# Add Question objects to benchmark
qid1 = benchmark.add_question(q1)
qid2 = benchmark.add_question(q2)

print(f"Added questions with tags: {qid1[:8]}..., {qid2[:8]}...")

### Batch Addition

Add multiple questions efficiently:

In [None]:
# Prepare question data
genomics_questions = [
    {
        "question": "How many chromosomes are in a human somatic cell?",
        "raw_answer": "46",
        "author": {"name": "Bio Curator"},
    },
    {
        "question": "What is the approved drug target of Venetoclax?",
        "raw_answer": "BCL2",
        "author": {"name": "Bio Curator"},
    },
    {
        "question": "How many protein subunits does hemoglobin A have?",
        "raw_answer": "4",
        "author": {"name": "Bio Curator"},
    }
]

# Add all questions
for q in genomics_questions:
    qid = benchmark.add_question(**q)
    print(f"Added: {q['question'][:40]}... (ID: {qid[:8]}...)")

## Complete Example

Here's a complete workflow showing both manual and file-based question addition:

In [None]:
from karenina import Benchmark
from karenina.domain.questions.extractor import extract_questions_from_file

# 1. Create benchmark
benchmark = Benchmark.create(
    name="Genomics Knowledge Benchmark",
    description="Testing LLM knowledge of genomics and molecular biology",
    version="1.0.0"
)

# 2. Add questions manually
question_ids = []

qid1 = benchmark.add_question(
    question="How many chromosomes are in a human somatic cell?",
    raw_answer="46",
    author={"name": "Dr. Smith", "email": "smith@example.com"}
)
question_ids.append(qid1)

qid2 = benchmark.add_question(
    question="What is the approved drug target of Venetoclax?",
    raw_answer="BCL2",
    author={"name": "Dr. Chen", "email": "chen@example.com"}
)
question_ids.append(qid2)

# 3. Extract additional questions from file
file_questions = extract_questions_from_file(
    file_path=str(SAMPLE_EXCEL_PATH),
    question_column="Question",
    answer_column="Answer",
    author_name_column="Author",
    keywords_column="Keywords"
)

# Add extracted questions
# Each result is (Question object, metadata dict)
# Note: Keywords are stored in the Question object's tags, not passed separately
for question_obj, metadata in file_questions:
    # Extract clean author dict from metadata if present
    author_data = None
    if "author" in metadata:
        auth = metadata["author"]
        # Remove @type field if present
        author_data = {k: v for k, v in auth.items() if k != "@"}
    
    # Add question with clean author data
    qid = benchmark.add_question(question_obj, author=author_data)
    question_ids.append(qid)

print(f"Total questions: {len(question_ids)}")

# 4. Verify questions were added
for qid in question_ids:
    question = benchmark.get_question(qid)
    print(f"✓ {question['question'][:50]}...")

## Next Steps

Once you have questions in your benchmark, you can:

- [Generate templates](templates.md) to define evaluation structure
- [Set up rubrics](rubrics.md) for qualitative assessment
- [Run verification](verification.md) to evaluate LLM responses
- [Save your benchmark](saving-loading.md) using checkpoints or database

## Related Documentation

- [Defining Benchmarks](defining-benchmark.md) - Creating and configuring benchmarks
- [Templates](templates.md) - Structured answer evaluation
- [Rubrics](rubrics.md) - Qualitative assessment criteria
- [Quick Start](../quickstart.md) - End-to-end workflow example