In [1]:
import sys
from pathlib import Path

BASEDIR = Path("/workspaces/HARP/") / "src"  # Replace with your own basedir path for the repo

sys.path.insert(0, str(BASEDIR))

In [2]:
from __future__ import annotations

import copy
import itertools
import json
import math
import os
import pickle
import pprint
import re
import textwrap
import time
import traceback
from collections import Counter, defaultdict
from typing import Any

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tiktoken
from IPython.display import Markdown, clear_output, display
from tqdm.auto import tqdm

import vertexai
from vertexai.batch_prediction._batch_prediction import BatchPredictionJob

In [3]:
from eval.api import safe_unified_api_call
from eval.costs import count_tokens, get_pricing
from eval.eval import run_one, create_batch, make_answer_check_dict_from_jsonl
from eval.parsing_lib import *
from eval.latex_answer_check import *
from eval.prompt import create_prompt
from eval.prompts import *
from eval.utils import read_jsonl, write_jsonl, get_uid, upload_blob, download_blob

# Data

In [4]:
dataset = read_jsonl(BASEDIR / "data/processed/HARP.jsonl")
dataset_map = {get_uid(p): p for p in dataset}
len(dataset)

4780

# Run eval

In [5]:
vertexai.init(project=os.environ.get("VERTEXAI_PROJECT_ID"), location="us-central1")

In [6]:
BUCKET_NAME = os.environ.get("GCLOUD_BUCKET_NAME")  # Should have the form "cloud-ai-platform-<YOUR_BUCKET>"

## Create batch

In [7]:
batch = create_batch(
    dataset,
    api="google",
    model="gemini-1.5-flash-002",
    fewshot_messages=[],
    system_prompt=gemini_0shot_sysprompt,
    max_tokens=2048,
    temperature=0,
    seed=0,
    stop_sequences=["I hope it is correct."],
    # just to remove irrelevant params
    logprobs=None,
    top_p=None,
)
write_jsonl(batch, BASEDIR / "inputs/short_answer/gemini-1.5-flash-002/batch.jsonl")

In [8]:
batch = create_batch(
    dataset,
    api="google",
    model="gemini-1.5-flash-002",
    fewshot_messages=minerva_4shot_prompt,
    system_prompt=gemini_sysprompt,
    max_tokens=2048,
    temperature=0,
    seed=0,
    stop_sequences=["I hope it is correct."],
    # just to remove irrelevant params
    logprobs=None,
    top_p=None,
)
write_jsonl(batch, BASEDIR / "inputs/short_answer/gemini-1.5-flash-002/batch_minerva.jsonl")

## Upload to cloud

In [None]:
upload_blob(
    BUCKET_NAME,
    BASEDIR / "inputs/short_answer/gemini-1.5-flash-002/batch.jsonl",
    "prompt_data/short_answer/gemini-1.5-flash-002/batch.jsonl",
)

In [None]:
upload_blob(
    BUCKET_NAME,
    BASEDIR / "inputs/short_answer/gemini-1.5-flash-002/batch_minerva.jsonl",
    "prompt_data/short_answer/gemini-1.5-flash-002/batch_minerva.jsonl",
)

## Run batch job

In [None]:
batch_prediction_job = BatchPredictionJob.submit(
    source_model="gemini-1.5-flash-002",
    input_dataset=f"gs://{BUCKET_NAME}/prompt_data/short_answer/gemini-1.5-flash-002/batch.jsonl",
    output_uri_prefix=f"gs://{BUCKET_NAME}/outputs/short_answer/gemini-1.5-flash-002/batch",
)

In [None]:
batch_zeroshot_prediction_job = BatchPredictionJob.submit(
    source_model="gemini-1.5-flash-002",
    input_dataset=f"gs://{BUCKET_NAME}/prompt_data/short_answer/gemini-1.5-flash-002/batch_minerva.jsonl",
    output_uri_prefix=f"gs://{BUCKET_NAME}/outputs/short_answer/gemini-1.5-flash-002/batch_minerva",
)

## Get results

In [None]:
download_blob(
    BUCKET_NAME,
    "outputs/short_answer/gemini-1.5-flash-002/batch/prediction-model-<TIMESTAMP>/predictions.jsonl",
    BASEDIR / "outputs/short_answer/gemini-1.5-flash-002/outputs.jsonl",
)

In [None]:
download_blob(
    BUCKET_NAME,
    "outputs/short_answer/gemini-1.5-flash-002/batch_minerva/prediction-model-<TIMESTAMP>/predictions.jsonl",
    BASEDIR / "outputs/short_answer/gemini-1.5-flash-002/outputs_minerva.jsonl",
)