In [62]:
import google.generativeai as genai
import os
import json
import time

%load_ext dotenv
%dotenv

The dotenv extension is already loaded. To reload it, use:
  %reload_ext dotenv


In [77]:
genai.configure(api_key=os.environ["GOOGLE_API_KEY_2"])

In [78]:
for m in genai.list_models():
  if 'generateContent' in m.supported_generation_methods:
    print(m.name)

models/gemini-1.0-pro
models/gemini-1.0-pro-001
models/gemini-1.0-pro-latest
models/gemini-1.0-pro-vision-latest
models/gemini-1.5-flash
models/gemini-1.5-flash-001
models/gemini-1.5-flash-latest
models/gemini-1.5-pro
models/gemini-1.5-pro-001
models/gemini-1.5-pro-latest
models/gemini-pro
models/gemini-pro-vision


In [79]:
model_name = "gemini-1.5-pro"
model = genai.GenerativeModel(model_name=model_name)

delay_seconds = 31

In [80]:
def get_chat_response(model, prompt: str) -> str:
    text_response = []
    responses = model.generate_content(prompt, stream=True)
    for chunk in responses:
        text_response.append(chunk.text)
    return "".join(text_response)

## Physical Temporal

In [67]:
with open("../physical-temporal/physical-temporal-control.json") as f:
    physical_temporal_prompts = json.load(f)
    for entry in physical_temporal_prompts:
        response = get_chat_response(model, entry["prompt"])
        if "responses" not in entry:
            entry["responses"] = {}
        entry["responses"][model_name] = response.strip()
        print("Gemini:", response.strip(), "Human:", entry["human_annotation"])
        time.sleep(delay_seconds)

Gemini: 2 Human: 3
Gemini: 4 Human: 1
Gemini: 2 Human: 3
Gemini: 3 Human: 1
Gemini: 1 Human: 1
Gemini: 3 Human: 3
Gemini: 4 Human: 4
Gemini: 4 Human: 2
Gemini: 1 Human: 1
Gemini: 3 Human: 3
Gemini: 2 Human: 1
Gemini: 2 Human: 2
Gemini: 2 Human: 4


In [68]:
num_correct = 0
for entry in physical_temporal_prompts:
    if entry["human_annotation"] == int(entry["responses"][model_name]):
        num_correct += 1
print("Accuracy:", num_correct/len(physical_temporal_prompts))

Accuracy: 0.46153846153846156


In [69]:
with open("../physical-temporal/physical-temporal-control.json", "w", encoding="utf-8") as output_file:
    json.dump(physical_temporal_prompts, output_file, indent=2, ensure_ascii=False)

## Spatial Temporal

In [70]:
with open("../spatial-temporal/spatial-temporal-control.json") as f:
    spatial_temporal_prompts = json.load(f)
    for entry in spatial_temporal_prompts:
        response = model.generate_content(entry["prompt"])
        if "responses" not in entry:
            entry["responses"] = {}
        entry["responses"][model_name] = response.text.strip()
        print("Gemini:", response.text.strip(), "Human:", entry["human_annotation"])
        time.sleep(delay_seconds)

Gemini: 1 Human: 1
Gemini: 1 Human: 3
Gemini: 3 Human: 3
Gemini: 4 Human: 4
Gemini: 1 Human: 2
Gemini: 1 Human: 1
Gemini: 1 Human: 1
Gemini: 1 Human: 3
Gemini: 1 Human: 1
Gemini: 1 Human: 3
Gemini: 3 Human: 4
Gemini: 3 Human: 2
Gemini: 1 Human: 4
Gemini: 3 Human: 3


In [71]:
num_correct = 0
for entry in spatial_temporal_prompts:
    if entry["human_annotation"] == int(entry["responses"][model_name]):
        num_correct += 1
print("Accuracy:", num_correct/len(spatial_temporal_prompts))

Accuracy: 0.5


In [72]:
with open("../spatial-temporal/spatial-temporal-control.json", "w", encoding="utf-8") as output_file:
    json.dump(spatial_temporal_prompts, output_file, indent=2, ensure_ascii=False)

# Spatial Physical

In [73]:
with open("../spatial-physical/spatial-physical-control.json") as f:
    spatial_physical_prompts = json.load(f)
    for entry in spatial_physical_prompts:
        response = model.generate_content(entry["prompt"])
        if "responses" not in entry:
            entry["responses"] = {}
        entry["responses"][model_name] = response.text.strip()
        print("Gemini:", response.text.strip(), "Human:", entry["human_annotation"])
        time.sleep(delay_seconds)

Gemini: 3 Human: 4
Gemini: 1 Human: 1
Gemini: 4 Human: 1
Gemini: 4 Human: 1
Gemini: 3 Human: 4
Gemini: 3 Human: 3
Gemini: 3 Human: 2
Gemini: 1 Human: 1
Gemini: 3 Human: 2
Gemini: 3 Human: 4
Gemini: 1 Human: 1
Gemini: 1 Human: 1
Gemini: 1 Human: 2
Gemini: 1 Human: 1


In [74]:
num_correct = 0
for entry in spatial_physical_prompts:
    if entry["human_annotation"] == int(entry["responses"][model_name]):
        num_correct += 1
print("Accuracy:", num_correct/len(spatial_physical_prompts))

Accuracy: 0.42857142857142855


In [75]:
with open("../spatial-physical/spatial-physical-control.json", "w", encoding="utf-8") as output_file:
    json.dump(spatial_physical_prompts, output_file, indent=2, ensure_ascii=False)

## Spatial Temporal Physical

In [81]:
with open("../spatial-temporal-physical/spatial-temporal-physical-control.json") as f:
    spatial_temporal_physical_prompts = json.load(f)
    for entry in spatial_temporal_physical_prompts:
        response = get_chat_response(model, entry["prompt"])
        if "responses" not in entry:
            entry["responses"] = {}
        entry["responses"][model_name] = response.strip()
        print("Gemini:", response.strip(), "Human:", entry["human_annotation"])
        time.sleep(delay_seconds)

Gemini: 1 Human: 4
Gemini: 3 Human: 4
Gemini: 2 Human: 3
Gemini: 1 Human: 2
Gemini: 1 Human: 2
Gemini: 1 Human: 2
Gemini: 4 Human: 2
Gemini: 2 Human: 4
Gemini: 1 Human: 1
Gemini: 2 Human: 2
Gemini: 1 Human: 4
Gemini: 3 Human: 2
Gemini: 2 Human: 2
Gemini: 1 Human: 3
Gemini: 3 Human: 2


In [82]:
num_correct = 0
for entry in spatial_temporal_physical_prompts:
    if entry["human_annotation"] == int(entry["responses"][model_name]):
        num_correct += 1
print("Accuracy:", num_correct/len(spatial_temporal_physical_prompts))

Accuracy: 0.2


In [83]:
with open("../spatial-temporal-physical/spatial-temporal-physical-control.json", "w", encoding="utf-8") as output_file:
    json.dump(spatial_temporal_physical_prompts, output_file, indent=2, ensure_ascii=False)