In [1]:
import pandas as pd
from pathlib import Path

# Read sentences from the file
data_file = Path('../data/questions.txt')
with open(data_file, 'r', encoding='utf-8') as f:
    sentences = [line.strip() for line in f if line.strip()]

# Create DataFrame
df = pd.DataFrame({'question': sentences})

# Remove duplicates
df_dedup = df.drop_duplicates(subset=['question'], keep='first')

# Save as parquet using fastparquet engine to avoid PyArrow conflicts
output_file = Path('../data/baseQuestions.parquet')
df_dedup.to_parquet(output_file, engine='fastparquet', index=False)

print(f"Total sentences: {len(df)}")
print(f"Deduplicated sentences: {len(df_dedup)}")
print(f"Duplicates removed: {len(df) - len(df_dedup)}")
print(f"Saved to: {output_file}")

Total sentences: 1271
Deduplicated sentences: 255
Duplicates removed: 1016
Saved to: ../data/baseQuestions.parquet


In [None]:
import pandas as pd
from pathlib import Path
import ollama
import random

# Load base questions using fastparquet engine
base_questions_file = Path('../data/baseQuestions.parquet')
df_base = pd.read_parquet(base_questions_file, engine='fastparquet')
base_questions = df_base['question'].tolist()

print(f"Loaded {len(base_questions)} base questions")

# Generate hallucination-inducing questions using Ollama with Gemma 3
hallucination_questions = []
target_count = 1000

# Sample base questions to work with
sample_size = min(255, len(base_questions))
sampled_questions = random.sample(base_questions, sample_size)

for i, base_question in enumerate(sampled_questions):
    # Generate multiple variations per base question
    questions_per_base = (target_count // sample_size) + (1 if i < (target_count % sample_size) else 0)
    
    prompt = f"""Based on this question: "{base_question}"

Generate {questions_per_base} new questions designed to cause LLM hallucinations (not just incorrect answers, but actual hallucinations involving confabulation of plausible-sounding but false details).

Use these techniques:
- Replace entities with plausible but potentially non-existent names
- Add specific numerical details (dates, percentages, amounts, limits)
- Combine real concepts with fabricated specifics
- Include precise policy/feature details that require exact knowledge
- Use obscure or ambiguous entity names that sound legitimate

Return ONLY the questions, one per line, no numbering or extra text."""

    response = ollama.chat(
        model="gemma3",
        messages=[{"role": "user", "content": prompt}]
    )
    
    # Extract questions from response
    generated = response['message']['content'].strip().split('\n')
    generated = [q.strip() for q in generated if q.strip() and not q.strip()[0].isdigit()]
    
    hallucination_questions.extend(generated[:questions_per_base])
    print(f"Generated {len(generated[:questions_per_base])} questions from base question {i+1}/{len(sampled_questions)}")

# Ensure we have exactly 50 questions
hallucination_questions = hallucination_questions[:target_count]

print(f"\nTotal hallucination questions generated: {len(hallucination_questions)}")
print("Now generating answers for each question...")

# Generate answers for each hallucination question
results = []
for i, question in enumerate(hallucination_questions):
    response = ollama.chat(
        model="gemma3",
        messages=[{"role": "user", "content": question}]
    )
    
    answer = response['message']['content'].strip()
    results.append({
        'question': question,
        'answer': answer
    })
    
    if (i + 1) % 10 == 0:
        print(f"Generated answers for {i + 1}/{len(hallucination_questions)} questions")

# Create DataFrame and save
df_hallucination = pd.DataFrame(results)
output_file = Path('../data/hallucinationQuestions_gemma3.parquet')
df_hallucination.to_parquet(output_file, engine='fastparquet', index=False)

print(f"\nTotal question-answer pairs generated: {len(results)}")
print(f"Saved to: {output_file}")
print(f"Columns: {df_hallucination.columns.tolist()}")

In [11]:
import pandas as pd
from pathlib import Path
import ollama
import json

# Patronus Lynx hallucination detection using Ollama
# Model: tensortemplar/patronus-lynx:8b-instruct-q4_k_m
# See: https://huggingface.co/PatronusAI/Llama-3-Patronus-Lynx-8B-Instruct

# Load the question-answer pairs generated by Gemma 3
#input_file = Path('../data/hallucinationQuestions_gemma3.parquet')
input_file = Path('../data/hallucinationQuestionsAnth.parquet')
df = pd.read_parquet(input_file, engine='fastparquet')
df=df[:10]
print(f"Loaded {len(df)} question-answer pairs")

# Lynx prompt template for hallucination detection
LYNX_PROMPT_TEMPLATE = """Given the following QUESTION, DOCUMENT and ANSWER you must analyze the provided answer and determine whether it is faithful to the contents of the DOCUMENT.

The ANSWER must not offer new information beyond the context provided in the DOCUMENT.

The ANSWER also must not contradict information provided in the DOCUMENT.

Output your final verdict by strictly following this format: "PASS" if the answer is faithful to the DOCUMENT and "FAIL" if the answer is not faithful to the DOCUMENT.

Show your reasoning.

--
QUESTION (THIS DOES NOT COUNT AS BACKGROUND INFORMATION):
{question}

--
DOCUMENT:
{document}

--
ANSWER:
{answer}

--
Your output should be in JSON FORMAT with the keys "REASONING" and "SCORE":
{{"REASONING": <your reasoning as bullet points>, "SCORE": <your final score>}}
"""

def detect_hallucination(question: str, answer: str, document: str = "") -> dict:
    """
    Use Patronus Lynx to detect if an answer contains hallucinations.
    
    Args:
        question: The question that was asked
        answer: The answer to evaluate
        document: Reference document/context (if empty, we're checking for unsupported claims)
    
    Returns:
        dict with 'reasoning', 'score', and 'raw_response'
    """
    # If no document provided, use a minimal context indicating no ground truth available
    if not document:
        document = "No reference document provided. Evaluate if the answer makes specific claims that cannot be verified."
    
    prompt = LYNX_PROMPT_TEMPLATE.format(
        question=question,
        document=document,
        answer=answer
    )
    
    response = ollama.chat(
        model="tensortemplar/patronus-lynx:8b-instruct-q4_k_m",  # or use full name: tensortemplar/patronus-lynx:8b-instruct-q4_k_m
        messages=[{"role": "user", "content": prompt}]
    )
    
    raw_response = response['message']['content'].strip()
    
    # Try to parse JSON from response
    try:
        # Find JSON in response (it might have extra text)
        json_start = raw_response.find('{')
        json_end = raw_response.rfind('}') + 1
        if json_start != -1 and json_end > json_start:
            json_str = raw_response[json_start:json_end]
            parsed = json.loads(json_str)
            return {
                'reasoning': parsed.get('REASONING', ''),
                'score': parsed.get('SCORE', 'UNKNOWN'),
                'raw_response': raw_response
            }
    except json.JSONDecodeError:
        pass
    
    # Fallback: extract score from text
    score = 'UNKNOWN'
    if 'FAIL' in raw_response.upper():
        score = 'FAIL'
    elif 'PASS' in raw_response.upper():
        score = 'PASS'
    
    return {
        'reasoning': raw_response,
        'score': score,
        'raw_response': raw_response
    }

# Run hallucination detection on all Q&A pairs
results = []
for i, row in df.iterrows():
    result = detect_hallucination(
        question=row['question'],
        answer=row['answer']
    )
    
    results.append({
        'question': row['question'],
        'answer': row['answer'],
        'hallucination_score': result['score'],
        'reasoning': result['reasoning'],
        'raw_lynx_response': result['raw_response']
    })
    
    if (i + 1) % 10 == 0:
        print(f"Processed {i + 1}/{len(df)} pairs")

# Create results DataFrame
df_results = pd.DataFrame(results)

# Save results
output_file = Path('../data/hallucinationDetection_lynx.parquet')
df_results.to_parquet(output_file, engine='fastparquet', index=False)

# Summary statistics
pass_count = (df_results['hallucination_score'] == 'PASS').sum()
fail_count = (df_results['hallucination_score'] == 'FAIL').sum()
unknown_count = (df_results['hallucination_score'] == 'UNKNOWN').sum()

print(f"\n=== Hallucination Detection Results ===")
print(f"Total pairs evaluated: {len(df_results)}")
print(f"PASS (faithful): {pass_count} ({100*pass_count/len(df_results):.1f}%)")
print(f"FAIL (hallucination detected): {fail_count} ({100*fail_count/len(df_results):.1f}%)")
print(f"UNKNOWN: {unknown_count} ({100*unknown_count/len(df_results):.1f}%)")
print(f"\nSaved to: {output_file}")

Loaded 10 question-answer pairs
Processed 10/10 pairs

=== Hallucination Detection Results ===
Total pairs evaluated: 10
PASS (faithful): 5 (50.0%)
FAIL (hallucination detected): 4 (40.0%)
UNKNOWN: 1 (10.0%)

Saved to: ../data/hallucinationDetection_lynx.parquet


In [None]:
import pandas as pd
from pathlib import Path
from anthropic import Anthropic
import httpx
import random

# Initialize Anthropic client with custom httpx client to handle SSL issues
http_client = httpx.Client(verify=False)
client = Anthropic(
    api_key="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",
    http_client=http_client
)

# Load base questions using fastparquet engine
base_questions_file = Path('../data/baseQuestions.parquet')
df_base = pd.read_parquet(base_questions_file, engine='fastparquet')
base_questions = df_base['question'].tolist()

print(f"Loaded {len(base_questions)} base questions")

# Generate hallucination-inducing questions
hallucination_questions = []
target_count = 50

# Sample base questions to work with
sample_size = min(10, len(base_questions))
sampled_questions = random.sample(base_questions, sample_size)

for i, base_question in enumerate(sampled_questions):
    # Generate multiple variations per base question
    questions_per_base = (target_count // sample_size) + (1 if i < (target_count % sample_size) else 0)
    
    prompt = f"""Based on this question: "{base_question}"

Generate {questions_per_base} new questions designed to cause LLM hallucinations (not just incorrect answers, but actual hallucinations involving confabulation of plausible-sounding but false details).

Use these techniques:
- Replace entities with plausible but potentially non-existent names
- Add specific numerical details (dates, percentages, amounts, limits)
- Combine real concepts with fabricated specifics
- Include precise policy/feature details that require exact knowledge
- Use obscure or ambiguous entity names that sound legitimate

Return ONLY the questions, one per line, no numbering or extra text."""

    response = client.messages.create(
        model="claude-sonnet-4-5-20250929",
        max_tokens=2000,
        messages=[{"role": "user", "content": prompt}]
    )
    
    # Extract questions from response
    generated = response.content[0].text.strip().split('\n')
    generated = [q.strip() for q in generated if q.strip() and not q.strip()[0].isdigit()]
    
    hallucination_questions.extend(generated[:questions_per_base])
    print(f"Generated {len(generated[:questions_per_base])} questions from base question {i+1}/{len(sampled_questions)}")

# Ensure we have exactly target_count questions
hallucination_questions = hallucination_questions[:target_count]

print(f"\nTotal hallucination questions generated: {len(hallucination_questions)}")
print("Now generating answers for each question...")

# Generate answers for each hallucination question
results = []
for i, question in enumerate(hallucination_questions):
    response = client.messages.create(
        model="claude-sonnet-4-5-20250929",
        max_tokens=2000,
        messages=[{"role": "user", "content": question}]
    )
    
    answer = response.content[0].text.strip()
    results.append({
        'question': question,
        'answer': answer
    })
    
    if (i + 1) % 10 == 0:
        print(f"Generated answers for {i + 1}/{len(hallucination_questions)} questions")

# Create DataFrame and save
df_hallucination = pd.DataFrame(results)
output_file = Path('../data/hallucinationQuestionsAnth.parquet')
df_hallucination.to_parquet(output_file, engine='fastparquet', index=False)

print(f"\nTotal question-answer pairs generated: {len(results)}")
print(f"Saved to: {output_file}")
print(f"Columns: {df_hallucination.columns.tolist()}")