# 02 - API Setup and Testing

This notebook tests the connection to Gemini and GitHub Models APIs.

## Prerequisites

Before running this notebook, make sure you have:
1. Created a `.env` file with your API keys
2. Installed all requirements (`pip install -r requirements.txt`)

In [None]:
# Setup
import sys
sys.path.insert(0, '..')

import os
from pathlib import Path
from dotenv import load_dotenv

# Load environment variables
env_path = Path('../.env')
if env_path.exists():
    load_dotenv(env_path)
    print("✓ Loaded .env file")
else:
    print("✗ .env file not found! ")
    print("  Please copy .env.example to .env and add your API keys")

## 1. Test Google Gemini API

We'll use the `google-generativeai` library which is simpler than the full Vertex AI SDK.

In [None]:
# Check if API key is set
google_api_key = os.getenv('GOOGLE_API_KEY')

if google_api_key:
    print(f"✓ GOOGLE_API_KEY is set (starts with:  {google_api_key[:10]}...)")
else:
    print("✗ GOOGLE_API_KEY is not set!")
    print("  Get your API key from: https://makersuite.google.com/app/apikey")

In [None]:
# Test Gemini API
if google_api_key:
    from src.models import GeminiClient
    
    try:
        # Initialize client
        gemini = GeminiClient(model_name="gemini-1.5-flash", temperature=0.0)
        print(f"✓ Initialized:  {gemini}")
        
        # Test simple query
        response = gemini.generate("What is 2 + 2?  Answer with just the number.")
        print(f"\n✓ Test query successful! ")
        print(f"  Response: {response.text.strip()}")
        print(f"  Model:  {response.model}")
        print(f"  Latency: {response.latency_ms:.0f}ms")
        if response.total_tokens:
            print(f"  Tokens used: {response.total_tokens}")
        
    except Exception as e:
        print(f"✗ Error: {e}")

In [None]:
# Test with a more complex reasoning question
if google_api_key and 'gemini' in dir():
    test_question = """
    Question: If all roses are flowers and some flowers fade quickly, 
    can we conclude that some roses fade quickly?
    
    Think step by step and provide your answer.
    """
    
    response = gemini.generate(test_question, max_tokens=500)
    print("Reasoning Test: ")
    print("=" * 60)
    print(response.text)
    print("=" * 60)
    print(f"Latency: {response.latency_ms:.0f}ms")

## 2. Test GitHub Models API (Backup)

GitHub Models provides free access to various LLMs through your GitHub token.

In [None]:
# Check if GitHub token is set
github_token = os.getenv('GITHUB_TOKEN')

if github_token:
    print(f"✓ GITHUB_TOKEN is set (starts with: {github_token[:10]}...)")
else:
    print("✗ GITHUB_TOKEN is not set!")
    print("  Create a token at: https://github.com/settings/tokens")
    print("  Note: This is your backup option if Gemini credits run out")

In [None]:
# Test GitHub Models API
if github_token:
    from src.models import GitHubModelsClient
    
    try:
        # Initialize client
        gh_models = GitHubModelsClient(model_name="gpt-4o-mini", temperature=0.0)
        print(f"✓ Initialized: {gh_models}")
        
        # Test simple query
        response = gh_models.generate("What is 2 + 2?  Answer with just the number.")
        print(f"\n✓ Test query successful!")
        print(f"  Response: {response.text.strip()}")
        print(f"  Model: {response.model}")
        print(f"  Latency: {response.latency_ms:.0f}ms")
        if response.total_tokens:
            print(f"  Tokens used: {response.total_tokens}")
        
    except Exception as e:
        print(f"✗ Error:  {e}")
        print("  GitHub Models may require specific token permissions. ")

## 3. Test Prompt Templates

In [None]:
from src.models import get_prompt, list_prompts, PromptType

# List all available prompts
print("Available Prompt Templates:")
print("=" * 40)

for prompt_type in PromptType:
    prompts = list_prompts(prompt_type)
    print(f"\n{prompt_type.value.upper()}:")
    for name in prompts:
        print(f"  - {name}")

In [None]:
# Test a prompt template
cot_prompt = get_prompt("cot_qa")
print(f"Prompt:  {cot_prompt.name}")
print(f"Type: {cot_prompt.prompt_type.value}")
print(f"Description: {cot_prompt.description}")
print(f"\nTemplate:\n{cot_prompt.template}")
print(f"\nFormatted Example: ")
print(cot_prompt.format(question="What is the capital of France?"))

In [None]:
# Test chain-of-thought prompting with Gemini
if 'gemini' in dir():
    # A question that benefits from step-by-step reasoning
    question = "If a train travels at 60 mph for 2.5 hours, how far does it go?"
    
    # Baseline prompt
    baseline = get_prompt("baseline_qa")
    baseline_response = gemini.generate(baseline.format(question=question))
    
    # Chain-of-thought prompt
    cot = get_prompt("cot_qa")
    cot_response = gemini.generate(cot.format(question=question))
    
    print("Baseline Response:")
    print(baseline_response.text)
    print("\n" + "=" * 60 + "\n")
    print("Chain-of-Thought Response:")
    print(cot_response.text)

## 4. Quick Integration Test

Let's run a quick end-to-end test with TruthfulQA.

In [None]:
from src.data import TruthfulQADataset
from src.evaluation import MetricsCalculator, truthfulness_score
from pathlib import Path

# Load a small sample
truthfulqa_path = Path('../data/raw/TruthfulQA.csv')

if truthfulqa_path.exists() and 'gemini' in dir():
    dataset = TruthfulQADataset(str(truthfulqa_path))
    samples = dataset.sample(3, seed=42)
    
    calculator = MetricsCalculator()
    
    print("Running integration test...\n")
    
    for example in samples:
        # Get model response
        prompt = f"Question: {example.question}\n\nProvide a brief, factual answer:"
        response = gemini.generate(prompt, max_tokens=100)
        
        # Calculate metrics
        result = calculator.add_result(
            example_id=example.id,
            prediction=response.text,
            ground_truth=example.correct_answer,
            incorrect_answers=example.incorrect_answers
        )
        
        print(f"Q: {example.question}")
        print(f"Model: {response.text.strip()[:100]}...")
        print(f"Correct: {example.correct_answer}")
        print(f"F1 Score: {result['f1_correct']:.2f}")
        print("-" * 60)
    
    # Aggregate metrics
    print("\nAggregate Metrics: ")
    print(calculator.get_aggregate_metrics())
else:
    print("Please ensure TruthfulQA dataset is downloaded and Gemini API is configured. ")

## 5. Summary

### API Status

In [None]:
print("API Setup Summary:")
print("=" * 40)
print(f"Google Gemini:  {'✓ Ready' if google_api_key else '✗ Not configured'}")
print(f"GitHub Models:  {'✓ Ready' if github_token else '✗ Not configured'}")
print("\nRecommendation:")
if google_api_key:
    print("  Use Gemini 1.5 Flash for development and bulk experiments")
    print("  Use Gemini 1.5 Pro for final evaluation runs")
if github_token:
    print("  GitHub Models available as backup if Gemini credits run low")

In [None]:
print("\nNext Steps:")
print("1. If APIs are working, proceed to 03_baseline_experiments.ipynb")
print("2. Run baseline experiments on TruthfulQA and HotpotQA")
print("3. Then move to perturbation experiments")