In [None]:
try:
    import torch
    import transformers
    import deepspeed
    import vllm
    import launchpad as lp

    print("--- OAT Core Dependencies Check ---")
    print(f"PyTorch version: {torch.__version__}")
    print(f"Transformers version: {transformers.__version__}")
    print(f"DeepSpeed version: {deepspeed.__version__}")
    print(f"vLLM version: {vllm.__version__}")

    try:
        # Try to print the version, but don't fail if it's not there
        print(f"Launchpad version: {lp.__version__}")
    except AttributeError:
        print("Launchpad version: Not found (this is expected and okay).")

    print(f" Your environment is correctly set up!")

except ImportError as e:
    print(f"\n❌ FAILED: A library is missing or could not be loaded.")
    print(f"Error details: {e}")
    print("\nThere may be an issue with the installation or system dependencies.")

  from .autonotebook import tqdm as notebook_tqdm


[2025-07-10 00:40:54,322] [INFO] [real_accelerator.py:239:get_accelerator] Setting ds_accelerator to cuda (auto detect)


/usr/bin/ld: cannot find -laio: No such file or directory
collect2: error: ld returned 1 exit status
/usr/bin/ld: cannot find -laio: No such file or directory
collect2: error: ld returned 1 exit status
2025-07-10 00:40:58.955896: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-12.4/lib64:
2025-07-10 00:40:58.955926: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


INFO 07-10 00:41:00 [__init__.py:239] Automatically detected platform cuda.


2025-07-10 00:41:02,037	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


--- OAT Core Dependencies Check ---
PyTorch version: 2.6.0+cu124
Transformers version: 4.51.3
DeepSpeed version: 0.16.8
vLLM version: 0.8.4
Launchpad version: Not found (this is expected and okay).

✅ SUCCESS: Your environment is correctly set up!


In [25]:
import os
import json
import dotenv
import logging
import google.generativeai as genai
from oat.oracles.base import RewardOracleBase

In [28]:
dotenv.load_dotenv()
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))

model = genai.GenerativeModel("gemini-1.5-flash-latest")

# response = model.generate_content("Find the derivative of f(x) = x^2 * sin(x). Think step by step.")
response = model.generate_content("tell me a jokes for a farmer")
print(response.text)

Why did the farmer bring a ladder to the party?  Because he heard the drinks were on the house!



In [24]:
os.getenv("GEMINI_API_KEY")



In [22]:
class CALOracle:
    def __init__(self, cal_model_name: str, few_shot_path: str, api_key_env: str = "GEMINI_API_KEY"):
        self.cal_model_name = cal_model_name
        self.few_shot_path = few_shot_path
        api_key = os.getenv(api_key_env)
        if not api_key: raise ValueError(f"'{api_key_env}' not set. Please export your GOOGLE_API_KEY.")
        genai.configure(api_key=api_key)
        
        generation_config = {"temperature": 0.0, "max_output_tokens": 150}
        safety_settings = [{"category": c, "threshold": "BLOCK_NONE"} for c in ["HARM_CATEGORY_HARASSMENT", "HARM_CATEGORY_HATE_SPEECH", "HARM_CATEGORY_SEXUALLY_EXPLICIT", "HARM_CATEGORY_DANGEROUS_CONTENT"]]
        self.safety_settings = [
            {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
            {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
            {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
            {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
        ]
        self.model = genai.GenerativeModel(model_name=cal_model_name, generation_config=generation_config, safety_settings=safety_settings)
        
        with open(few_shot_path, 'r') as f: self.few_shot_examples = json.load(f)
        logging.info(f"CAL Oracle: Initialized with Gemini model '{cal_model_name}'.")
        
    def _build_full_prompt(self, question: str, correct_solution: str, incorrect_solution: str) -> str:
        """Builds the full few-shot prompt for the Gemini API call."""
        prompt_parts = [
            "You are a meticulous logic checker and math tutor. Your task is to compare a flawed student solution to a correct expert solution and identify the single, complete sentence from the 'Incorrect Solution' that contains the first critical logical or mathematical error. You must only output the exact divergent sentence and nothing else."
        ]
        # Add the few-shot examples
        for example in self.few_shot_examples:
            prompt_parts.append("\n---\n")
            prompt_parts.append(f"Question: {example['question']}")
            prompt_parts.append(f"Correct Solution: {example['correct_solution']}")
            prompt_parts.append(f"Incorrect Solution: {example['incorrect_solution']}")
            prompt_parts.append(f"Divergent Sentence: {example['divergent_sentence']}")
        
        # Add the final, new problem to be solved
        prompt_parts.append("\n---\n")
        prompt_parts.append(f"Question: {question}")
        prompt_parts.append(f"Correct Solution: {correct_solution}")
        prompt_parts.append(f"Incorrect Solution: {incorrect_solution}")
        prompt_parts.append(f"Divergent Sentence:")
        
        return "\n".join(prompt_parts)

    def get_error_segment(self, question: str, correct_solution: str, incorrect_solution: str) -> str:
        """Calls the Gemini model to get the divergent sentence."""
        full_prompt = self._build_full_prompt(question, correct_solution, incorrect_solution)
        
        print("Sending request to Gemini API...")
        try:
            # Use generate_content for the new Gemini API
            response = self.model.generate_content(
                full_prompt,
                # We configure the model to be deterministic
                generation_config=genai.types.GenerationConfig(
                    candidate_count=1,
                    temperature=0.0,
                    top_p=1.0,
                ),
                safety_settings=self.safety_settings
            )
            # The response text is in the .text attribute
            error_segment = response.text.strip()
            print(f"Gemini CAL Response: '{error_segment}'")
            return error_segment
        except Exception as e:
            print(f"CRITICAL: Gemini API call failed: {e}")
            # You can inspect response.prompt_feedback here for blocked prompts
            return "" # Return empty string on failure

In [None]:
# Cell 2: Test the Gemini-powered CALOracle

# --- 1. Instantiate the Oracle ---
# We use a modern Gemini model. "gemini-1.5-flash" is fast and effective.
# Note: Ensure your API key has access to this model.
try:
    genai.configure(api_key=os.environ["GEMINI_API_KEY"])
    cal_oracle = CALOracle(
        cal_model_name="gemini-1.5-flash-latest", 
        few_shot_path="cal_examples.json"
    )

    # --- 2. Define a New Test Case ---
    test_question = "What is the integral of 2x + 3 with respect to x?"
    
    test_correct_solution = "To find the integral of 2x + 3, we integrate term by term. The integral of 2x is x^2. The integral of 3 is 3x. We also add the constant of integration, C. So the integral is x^2 + 3x + C."
    
    test_incorrect_solution = "To find the integral of 2x + 3, we find the derivative. The derivative of 2x is 2. The derivative of 3 is 0. So the integral is 2."
    
    # --- 3. Run the Test ---
    print("\n--- RUNNING GEMINI TEST CASE ---")
    identified_error = cal_oracle.get_error_segment(
        test_question,
        test_correct_solution,
        test_incorrect_solution
    )
    
    # --- 4. Verify the Result ---
    expected_error = "To find the integral of 2x + 3, we find the derivative."
    print("\n--- VERIFICATION ---")
    print(f"Expected Error: '{expected_error}'")
    
    if identified_error == expected_error:
        print("\n✅ SUCCESS: The Gemini CAL correctly identified the divergent sentence!")
    else:
        print(f"\n❌ FAILED: The Gemini CAL returned an incorrect segment.")
        
except KeyError:
    print("\n❌ FAILED: GOOGLE_API_KEY environment variable not set.")
    print("Please set your API key using: export GOOGLE_API_KEY='your_key_here'")
except Exception as e:
    print(f"\n❌ FAILED: An unexpected error occurred: {e}")


--- RUNNING GEMINI TEST CASE ---
Sending request to Gemini API...


Gemini CAL Response: 'To find the integral of 2x + 3, we find the derivative.'

--- VERIFICATION ---
Expected Error: 'To find the integral of 2x + 3, we find the derivative.'

✅ SUCCESS: The Gemini CAL correctly identified the divergent sentence!
