# Gemini 1.5 Pro Multiple Choice

In [1]:
import sys
from pathlib import Path

BASEDIR = Path("/workspaces/HARP/") / "src"  # Replace with your own basedir path for the repo

sys.path.insert(0, str(BASEDIR))

In [2]:
from __future__ import annotations

import copy
import itertools
import json
import math
import os
import pickle
import pprint
import re
import textwrap
import time
import traceback
from collections import Counter, defaultdict
from typing import Any

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tiktoken
from IPython.display import Markdown, clear_output, display
from tqdm.auto import tqdm

import vertexai
from vertexai.batch_prediction._batch_prediction import BatchPredictionJob

In [3]:
from eval.api import safe_unified_api_call
from eval.costs import count_tokens, get_pricing
from eval.eval import run_one, create_batch, make_answer_check_dict_from_jsonl
from eval.parsing_lib import *
from eval.latex_answer_check import *
from eval.prompt import create_prompt
from eval.prompts import *
from eval.utils import read_jsonl, write_jsonl, get_uid, upload_blob, download_blob

# Data

In [4]:
# We don't use HARP_mcq.jsonl here because we used the original ordering of answer choices
dataset = [x for x in read_jsonl(BASEDIR / "data/processed/HARP_raw.jsonl") if x["choices"] is not None and x["subject"] != "calculus"]
dataset_map = {get_uid(p): p for p in dataset}
len(dataset)

4110

In [5]:
all([p["answer_choice"] is not None for p in dataset])

True

In [6]:
Counter([p["level"] for p in dataset])

Counter({2: 1612, 3: 1136, 1: 858, 4: 504})

In [7]:
Counter([p["answer_choice"] for p in dataset])

Counter({'E': 903, 'A': 862, 'C': 833, 'B': 758, 'D': 754})

In [8]:
{c: x / len(dataset) for c, x in Counter([p["answer_choice"] for p in dataset]).items()}

{'D': 0.1834549878345499,
 'C': 0.202676399026764,
 'E': 0.2197080291970803,
 'B': 0.18442822384428223,
 'A': 0.2097323600973236}

In [9]:
sum((x / len(dataset))**2 for c, x in Counter([p["answer_choice"] for p in dataset]).items())

0.2010065059998461

# Run eval

In [10]:
vertexai.init(project=os.environ.get("VERTEXAI_PROJECT_ID"), location="us-central1")

In [11]:
BUCKET_NAME = os.environ.get("GCLOUD_BUCKET_NAME")  # Should have the form "cloud-ai-platform-<YOUR_BUCKET>"

## Create batch

In [15]:
batch = create_batch(
    dataset,
    api="google",
    model="gemini-1.5-pro-002",
    fewshot_messages=[],
    system_prompt=gemini_multiple_choice_0shot_sysprompt,
    prompt_choices="from_text",
    max_tokens=2048,
    temperature=0,
    seed=0,
    stop_sequences=["I hope it is correct."],
    # just to remove irrelevant params
    logprobs=None,
    top_p=None,
)
write_jsonl(
    batch,
    BASEDIR / "inputs/mcq/gemini-1.5-pro-002/batch_choices_from-text.jsonl",
)

In [18]:
batch[0]

{'request': {'contents': [{'role': 'user',
    'parts': [{'text': 'Problem:\nIf $64$ is divided into three parts proportional to $2$, $4$, and $6$, the smallest part is:\n$\\textbf{(A)}\\ 5\\frac{1}{3}\\qquad\\textbf{(B)}\\ 11\\qquad\\textbf{(C)}\\ 10\\frac{2}{3}\\qquad\\textbf{(D)}\\ 5\\qquad\\textbf{(E)}\\ \\text{None of these answers}$'}]},
   {'role': 'model', 'parts': [{'text': 'Solution:'}]}],
  'generation_config': {'temperature': 0,
   'candidate_count': 1,
   'max_output_tokens': 2048,
   'stop_sequences': ['I hope it is correct.'],
   'seed': 0},
  'system_instruction': {'parts': [{'text': 'You are a math expert. Solve the following math Problem, thinking step by step. End the Solution with the final answer in the form "Final Answer: The final answer is ?. I hope it is correct.", where ? is replaced by one of the letters A, B, C, D or E.'}]}},
 'model': 'gemini-1.5-pro-002'}

In [19]:
batch = create_batch(
    dataset,
    api="google",
    model="gemini-1.5-pro-002",
    fewshot_messages=[],
    system_prompt=gemini_multiple_choice_0shot_sysprompt,
    prompt_choices="newline_dot",
    max_tokens=2048,
    temperature=0,
    seed=0,
    stop_sequences=["I hope it is correct."],
    # just to remove irrelevant params
    logprobs=None,
    top_p=None,
)
write_jsonl(
    batch,
    BASEDIR / "inputs/mcq/gemini-1.5-pro-002/batch_choices_newline-dot.jsonl",
)
batch[0]

{'request': {'contents': [{'role': 'user',
    'parts': [{'text': 'Problem:\nIf $64$ is divided into three parts proportional to $2$, $4$, and $6$, the smallest part is:\nA. $5\\frac{1}{3}$\nB. $11$\nC. $10\\frac{2}{3}$\nD. $5$\nE. $\\text{None of these answers}$'}]},
   {'role': 'model', 'parts': [{'text': 'Solution:'}]}],
  'generation_config': {'temperature': 0,
   'candidate_count': 1,
   'max_output_tokens': 2048,
   'stop_sequences': ['I hope it is correct.'],
   'seed': 0},
  'system_instruction': {'parts': [{'text': 'You are a math expert. Solve the following math Problem, thinking step by step. End the Solution with the final answer in the form "Final Answer: The final answer is ?. I hope it is correct.", where ? is replaced by one of the letters A, B, C, D or E.'}]}},
 'model': 'gemini-1.5-pro-002'}

In [21]:
batch = create_batch(
    dataset,
    api="google",
    model="gemini-1.5-pro-002",
    fewshot_messages=[],
    system_prompt=gemini_multiple_choice_0shot_sysprompt,
    prompt_choices="newline_paren",
    max_tokens=2048,
    temperature=0,
    seed=0,
    stop_sequences=["I hope it is correct."],
    # just to remove irrelevant params
    logprobs=None,
    top_p=None,
)
write_jsonl(
    batch,
    BASEDIR / "inputs/mcq/gemini-1.5-pro-002/batch_choices_newline-paren.jsonl",
)
batch[0]

{'request': {'contents': [{'role': 'user',
    'parts': [{'text': 'Problem:\nIf $64$ is divided into three parts proportional to $2$, $4$, and $6$, the smallest part is:\n(A) $5\\frac{1}{3}$\n(B) $11$\n(C) $10\\frac{2}{3}$\n(D) $5$\n(E) $\\text{None of these answers}$'}]},
   {'role': 'model', 'parts': [{'text': 'Solution:'}]}],
  'generation_config': {'temperature': 0,
   'candidate_count': 1,
   'max_output_tokens': 2048,
   'stop_sequences': ['I hope it is correct.'],
   'seed': 0},
  'system_instruction': {'parts': [{'text': 'You are a math expert. Solve the following math Problem, thinking step by step. End the Solution with the final answer in the form "Final Answer: The final answer is ?. I hope it is correct.", where ? is replaced by one of the letters A, B, C, D or E.'}]}},
 'model': 'gemini-1.5-pro-002'}

In [23]:
batch = create_batch(
    dataset,
    api="google",
    model="gemini-1.5-pro-002",
    fewshot_messages=[],
    system_prompt=gemini_multiple_choice_0shot_sysprompt,
    prompt_choices=None,
    max_tokens=2048,
    temperature=0,
    seed=0,
    stop_sequences=["I hope it is correct."],
    # just to remove irrelevant params
    logprobs=None,
    top_p=None,
)
write_jsonl(
    batch,
    BASEDIR / "inputs/mcq/gemini-1.5-pro-002/batch_choices_none.jsonl",
)
batch[0]

{'request': {'contents': [{'role': 'user',
    'parts': [{'text': 'Problem:\nIf $64$ is divided into three parts proportional to $2$, $4$, and $6$, the smallest part is:'}]},
   {'role': 'model', 'parts': [{'text': 'Solution:'}]}],
  'generation_config': {'temperature': 0,
   'candidate_count': 1,
   'max_output_tokens': 2048,
   'stop_sequences': ['I hope it is correct.'],
   'seed': 0},
  'system_instruction': {'parts': [{'text': 'You are a math expert. Solve the following math Problem, thinking step by step. End the Solution with the final answer in the form "Final Answer: The final answer is ?. I hope it is correct.", where ? is replaced by one of the letters A, B, C, D or E.'}]}},
 'model': 'gemini-1.5-pro-002'}

## Upload to cloud

In [None]:
upload_blob(
    BUCKET_NAME,
    BASEDIR / "inputs/mcq/gemini-1.5-pro-002/batch_choices_from-text.jsonl",
    "prompt_data/mcq/gemini-1.5-pro-002/batch_choices_from-text.jsonl",
)
upload_blob(
    BUCKET_NAME,
    BASEDIR / "inputs/mcq/gemini-1.5-pro-002/batch_choices_newline-dot.jsonl",
    "prompt_data/mcq/gemini-1.5-pro-002/batch_choices_newline-dot.jsonl",
)
upload_blob(
    BUCKET_NAME,
    BASEDIR / "inputs/mcq/gemini-1.5-pro-002/batch_choices_newline-paren.jsonl",
    "prompt_data/mcq/gemini-1.5-pro-002/batch_choices_newline-paren.jsonl",
)
upload_blob(
    BUCKET_NAME,
    BASEDIR / "inputs/mcq/gemini-1.5-pro-002/batch_choices_none.jsonl",
    "prompt_data/mcq/gemini-1.5-pro-002/batch_choices_none.jsonl",
)

## Run batch job

In [None]:
batch_prediction_job = BatchPredictionJob.submit(
    source_model="gemini-1.5-pro-002",
    input_dataset=f"gs://{BUCKET_NAME}/prompt_data/mcq/gemini-1.5-pro-002/batch_choices_from-text.jsonl",
    output_uri_prefix=f"gs://{BUCKET_NAME}/outputs/mcq/gemini-1.5-pro-002/batch_choices_from-text",
)

In [None]:
batch_prediction_job = BatchPredictionJob.submit(
    source_model="gemini-1.5-pro-002",
    input_dataset=f"gs://{BUCKET_NAME}/prompt_data/mcq/gemini-1.5-pro-002/batch_choices_newline-dot.jsonl",
    output_uri_prefix=f"gs://{BUCKET_NAME}/outputs/mcq/gemini-1.5-pro-002/batch_choices_newline-dot",
)

In [None]:
batch_prediction_job = BatchPredictionJob.submit(
    source_model="gemini-1.5-pro-002",
    input_dataset=f"gs://{BUCKET_NAME}/prompt_data/mcq/gemini-1.5-pro-002/batch_choices_newline-paren.jsonl",
    output_uri_prefix=f"gs://{BUCKET_NAME}/outputs/mcq/gemini-1.5-pro-002/batch_choices_newline-paren",
)

In [None]:
batch_prediction_job = BatchPredictionJob.submit(
    source_model="gemini-1.5-pro-002",
    input_dataset=f"gs://{BUCKET_NAME}/prompt_data/mcq/gemini-1.5-pro-002/batch_choices_none.jsonl",
    output_uri_prefix=f"gs://{BUCKET_NAME}/outputs/mcq/gemini-1.5-pro-002/batch_choices_none",
)

In [None]:
download_blob(
    BUCKET_NAME,
    "outputs/mcq/gemini-1.5-pro-002/batch_choices_from-text/prediction-model-<TIMESTAMP>/predictions.jsonl",
    BASEDIR / "outputs/mcq/gemini-1.5-pro-002/outputs_from-text.jsonl",
)
download_blob(
    BUCKET_NAME,
    "outputs/mcq/gemini-1.5-pro-002/batch_choices_newline-dot/prediction-model-<TIMESTAMP>/predictions.jsonl",
    BASEDIR / "outputs/mcq/gemini-1.5-pro-002/outputs_newline-dot.jsonl",
)
download_blob(
    BUCKET_NAME,
    "outputs/mcq/gemini-1.5-pro-002/batch_choices_newline-paren/prediction-model-<TIMESTAMP>/predictions.jsonl",
    BASEDIR / "outputs/mcq/gemini-1.5-pro-002/outputs_newline-paren.jsonl",
)
download_blob(
    BUCKET_NAME,
    "outputs/mcq/gemini-1.5-pro-002/batch_choices_none/prediction-model-<TIMESTAMP>/predictions.jsonl",
    BASEDIR / "outputs/mcq/gemini-1.5-pro-002/outputs_none.jsonl",
)