<a href="https://colab.research.google.com/github/henrykmichalewski/math-evals/blob/master/colabs/gemini_pro_vertex_evals_gsm8k_python.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!git clone https://github.com/henrykmichalewski/math-evals
!pip install -e math-evals -q

## The above cell clones the unlocked variant of the OpenAI HumanEval repo.

Unlocking means that the correctness code is actually executing completions. The repo contains some helper code to simplify gsm8k_python evaluations. You need to reset the runtime before moving forward.

## Loading gsm8k

In [None]:
!pip install datasets -q

In [None]:
from datasets import load_dataset

In [None]:
gsm8k = load_dataset("gsm8k", "main")

In [None]:
gsm8k_train, gsm8k_test = gsm8k['train'], gsm8k['test']

In [None]:
from human_eval.execution import check_gsm8k_correctness

In [None]:
check_gsm8k_correctness(gsm8k_test[5], " return 63", task_id=5)

In [None]:
check_gsm8k_correctness(gsm8k_test[5], " return 64", task_id=5)

In [None]:
check_gsm8k_correctness(gsm8k_test[5], " return 64.000001", task_id=5)

## Vertex API

In [None]:
# I am running this from the colab terminal - for some reason the
# !gcloud auth application-default login

In [None]:
!pip3 install --upgrade --user google-cloud-aiplatform

In [None]:
!gcloud auth application-default set-quota-project cloud-llm-preview2

In [None]:
import re
import time
from typing import Callable, Iterable, Match, Optional, Pattern, Protocol, Sequence, Union

import vertexai

In [None]:
from google.cloud.aiplatform_v1beta1.types import (
    content as gapic_content_types,
)
## Add your GCP project and location data
project_id = ""
location = ""

vertexai.init(project=project_id, location=location)

from vertexai.preview.generative_models import GenerativeModel

In [None]:
model_name = 'gemini-pro'

In [None]:
model = GenerativeModel(model_name)

## Loading/Saving samples into gdrive

In [None]:
from google.colab import drive
import os

drive.mount('/content/gdrive/', force_remount=True)

In [None]:
path = f'/content/gdrive/My Drive/vertex/{model_name}'
os.makedirs(path, exist_ok=True)
from human_eval.data import stream_jsonl, write_jsonl

### Read jsonl

In [None]:
try:
  samples = list(stream_jsonl(os.path.join(path, "samples_gsm8k.jsonl")))
except:
  samples = []

In [None]:
len(samples)

## Helper function to generate vertex completions

In [None]:
def generate_one_completion(prompt):
  while True:
    try:
      response = model.generate_content(
        prompt,
        generation_config={
            'temperature' : 0.,
            },
      )
      break
    except Exception as e:
      print(e)
      print("Waiting for vertex...")
      time.sleep(30)
  return response

In [None]:
generate_one_completion("Hi there, Gemini!")

## Full loop

In [None]:
from data import gsm8k_python_prompt

In [None]:
%%time
all_correct = 0
for task_id, problem in enumerate(gsm8k_test):

    # Check if task_id is less than samples length
    if task_id < len(samples):
        correctness = check_gsm8k_correctness(problem, samples[task_id]['completion'], task_id=task_id)
        correct = correctness['passed']
        all_correct += correct
        continue

    # Print Task ID
    print(f"task_id {task_id}")

    # Formulate and print the full prompt
    full_prompt = (gsm8k_python_prompt.PYTHON_PROMPT + '\n' +
                  gsm8k_python_prompt.PYTHON_TEMPLATE.format(question=problem['question']))

    text_empty = False
    while True:
      try:
        response = generate_one_completion(full_prompt)
        try:
          completion = response.text
        except:
          text_empty = True
        break
      except Exception as e:
        print(e)
        print("Waiting for vertex...")
        time.sleep(30)

    if text_empty: continue

    # Works for Pro, a model-dependent processing of outputs:
    try:
      completion = '\n'.join(completion.split('```')[1].splitlines()[2:])
    except:
      print("Failed to extract program.")

    print(completion)

    # New lines for visibility
    print("\n")
    print(problem['answer'])

    # Check for correctness of the problem solved
    correctness = check_gsm8k_correctness(problem, completion, task_id=task_id)
    correct = correctness['passed']
    all_correct += correct

    # Print out the correctness
    print(f"Correctnes: {correct}\nAll correct: {all_correct}")
    print("="*40)

    # Append the task results to the list and check if need to write to JSON file
    samples.append(dict(task_id=task_id, completion=completion))
    if task_id % 100 == 0 and task_id:
        write_jsonl(os.path.join(path, "samples_gsm8k.jsonl"), samples)

# Write all samples to JSONL file at the end
write_jsonl(os.path.join(path, "samples_gsm8k.jsonl"), samples)


In [None]:
1030 / 1319

### Write jsonl

In [None]:
write_jsonl(os.path.join(path, "samples_gsm8k.jsonl"), samples)