<a href="https://colab.research.google.com/github/armelida/MELIDA/blob/main/notebooks/prompting_strategy_evaluator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import torch
import os
import subprocess

# Function to check if Colab is using a GPU or CPU
def check_runtime():
    if torch.cuda.is_available():
        gpu_name = torch.cuda.get_device_name(0)
        print(f"✅ GPU is enabled! Using: {gpu_name}")
    elif "COLAB_TPU_ADDR" in os.environ:
        print("✅ TPU is enabled!")
    else:
        print("⚠️ WARNING: No GPU or TPU detected. Running on CPU.")
        print("👉 Go to Runtime > Change runtime type > Select GPU/TPU")

# Function to check GPU details (if available)
def check_gpu():
    try:
        gpu_info = subprocess.run(["nvidia-smi"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
        if gpu_info.returncode == 0:
            print(gpu_info.stdout)
        else:
            print("⚠️ `nvidia-smi` not found. No GPU detected.")
    except FileNotFoundError:
        print("⚠️ No GPU found.")

# Run the check
check_runtime()
check_gpu()


👉 Go to Runtime > Change runtime type > Select GPU/TPU
⚠️ No GPU found.


In [3]:
# MELIDA: Model Evaluation for Life-sciences Intelligence and Decision Assistance
# Production Evaluation Runner
!rm -rf MELIDA
# 1. Clone the repository and set up the environment
!git clone https://github.com/armelida/MELIDA.git
%cd MELIDA
# Install required packages
!pip install -r requirements.txt

Cloning into 'MELIDA'...
remote: Enumerating objects: 128, done.[K
remote: Counting objects: 100% (128/128), done.[K
remote: Compressing objects: 100% (111/111), done.[K
remote: Total 128 (delta 52), reused 50 (delta 11), pack-reused 0 (from 0)[K
Receiving objects: 100% (128/128), 125.78 KiB | 3.14 MiB/s, done.
Resolving deltas: 100% (52/52), done.
/content/MELIDA
Collecting anthropic>=0.7.0 (from -r requirements.txt (line 2))
  Downloading anthropic-0.49.0-py3-none-any.whl.metadata (24 kB)
Collecting jupyter>=1.0.0 (from -r requirements.txt (line 7))
  Downloading jupyter-1.1.1-py2.py3-none-any.whl.metadata (2.0 kB)
Collecting jupyterlab (from jupyter>=1.0.0->-r requirements.txt (line 7))
  Downloading jupyterlab-4.3.6-py3-none-any.whl.metadata (16 kB)
Collecting async-lru>=1.0.0 (from jupyterlab->jupyter>=1.0.0->-r requirements.txt (line 7))
  Downloading async_lru-2.0.4-py3-none-any.whl.metadata (4.5 kB)
Collecting jupyter-lsp>=2.0.0 (from jupyterlab->jupyter>=1.0.0->-r requirem

In [5]:
# 2. Set up configuration with support for multiple environments
import os
import json
import re

# Create config directory
os.makedirs('config', exist_ok=True)

# Create prompt strategies configuration
prompt_strategies = {
    "Prompt-001": {
        "description": "Spanish prompt for AI model taking standardized test",
        "language": "Spanish",
        "template": "Estás tomando un examen estandarizado MIR. Las respuestas correctas suman +3 puntos y las incorrectas restan -1 punto. Debes responder solo con la letra de la opción que consideres correcta (A, B, C, D) o 'NO' si prefieres no responder. No incluyas texto adicional en tu respuesta. Tu objetivo es maximizar tu puntuación.\n\nPregunta: {question_text}\n\nA) {option_a}\nB) {option_b}\nC) {option_c}\nD) {option_d}\n\nTu respuesta (solo A, B, C, D o NO):",
        "expected_format": "single_letter",
        "tags": ["spanish", "direct", "no_reasoning"]
    },
    "Prompt-002": {
        "description": "English prompt, questions and options in Spanish",
        "language": "English",
        "template": "You are taking a standardized MIR medical exam. Correct answers are worth +3 points and incorrect answers are -1 point. You must respond only with the letter of the option you consider correct (A, B, C, D) or 'NO' if you prefer not to answer. Do not include any additional text in your response. Your goal is to maximize your score.\n\nQuestion: {question_text}\n\nA) {option_a}\nB) {option_b}\nC) {option_c}\nD) {option_d}\n\nYour answer (only A, B, C, D or NO):",
        "expected_format": "single_letter",
        "tags": ["english", "direct", "no_reasoning"]
    },
    # Keep other strategies with the new format
    # ...
}

# Validate prompt templates
required_placeholders = ["{question_text}", "{option_a}", "{option_b}", "{option_c}", "{option_d}"]
for strategy_id, strategy in prompt_strategies.items():
    template = strategy["template"]
    missing = [ph for ph in required_placeholders if ph not in template]
    if missing:
        print(f"Warning: Strategy {strategy_id} is missing placeholders: {', '.join(missing)}")

# Save prompt strategies
with open('config/prompt_strategies.json', 'w') as f:
    json.dump(prompt_strategies, f, indent=2)

# Try to get API keys from different sources
api_keys = {"openai": None, "anthropic": None}

# Method 1: Try Colab secrets
try:
    from google.colab import userdata
    api_keys["openai"] = userdata.get('OPENAI_API_KEY')
    api_keys["anthropic"] = userdata.get('ANTHROPIC_API_KEY')
    if api_keys["openai"] and api_keys["anthropic"]:
        print("✓ API keys loaded from Colab secrets")
except (ImportError, Exception) as e:
    print(f"Note: Couldn't load from Colab secrets - {str(e)}")

# Method 2: Try environment variables if any keys are still missing
if not all(api_keys.values()):
    try:
        import os
        if not api_keys["openai"]:
            api_keys["openai"] = os.environ.get("OPENAI_API_KEY")
        if not api_keys["anthropic"]:
            api_keys["anthropic"] = os.environ.get("ANTHROPIC_API_KEY")
        if api_keys["openai"] or api_keys["anthropic"]:
            print("✓ API keys loaded from environment variables")
    except Exception as e:
        print(f"Note: Error accessing environment variables - {str(e)}")

# Method 3: Try loading from a local .env file
if not all(api_keys.values()):
    try:
        from dotenv import load_dotenv
        load_dotenv()
        if not api_keys["openai"]:
            api_keys["openai"] = os.environ.get("OPENAI_API_KEY")
        if not api_keys["anthropic"]:
            api_keys["anthropic"] = os.environ.get("ANTHROPIC_API_KEY")
        if api_keys["openai"] or api_keys["anthropic"]:
            print("✓ API keys loaded from .env file")
    except (ImportError, Exception) as e:
        print(f"Note: Couldn't load from .env file - {str(e)}")

# Create API configuration
api_config = {
    "openai": {
        "api_key": api_keys["openai"] or "YOUR_OPENAI_API_KEY_HERE"
    },
    "anthropic": {
        "api_key": api_keys["anthropic"] or "YOUR_ANTHROPIC_API_KEY_HERE"
    }
}

# Save API configuration
with open('config/api_config.json', 'w') as f:
    json.dump(api_config, f, indent=2)

# Check if real keys were found
if api_keys["openai"] and api_keys["anthropic"]:
    print("✓ Complete API configuration saved")
else:
    missing_keys = []
    if not api_keys["openai"]:
        missing_keys.append("OpenAI")
    if not api_keys["anthropic"]:
        missing_keys.append("Anthropic")

    print(f"⚠ Missing API keys: {', '.join(missing_keys)}")
    print("Please provide API keys using one of these methods:")
    print("  - Colab: Click on the 🔑 icon and add OPENAI_API_KEY and ANTHROPIC_API_KEY")
    print("  - Environment variables: Set OPENAI_API_KEY and ANTHROPIC_API_KEY")
    print("  - .env file: Create a .env file with OPENAI_API_KEY and ANTHROPIC_API_KEY")
    print("Placeholder values have been saved to config/api_config.json")

✓ API keys loaded from Colab secrets
✓ Complete API configuration saved
