In [None]:
import re
import random
import json
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor

import openai
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np

from secret import API_KEY
from game_data_utils import get_all_phase_dialogues, build_dialogue_text
from constants import *
from prompt_templates import *
from api_utils import get_rating, completion_cached

openai.api_key = API_KEY

# https://beta.openai.com/docs/api-reference/completions/create

In [None]:
# list all the prompts
for phase_dialogue, cicero_power, human_power in get_all_phase_dialogues():
    print(f"Cicero is: {cicero_power}\n")
    text = build_dialogue_text(phase_dialogue)
    print(text)
    print("=====================================================\n\n")

In [None]:
# answer random dialogue
dialogue, cicero_power, human_power = random.choice(list(get_all_phase_dialogues()))

text = build_dialogue_text(dialogue)
prompt = cooperation_prompt1.format(cicero_power=cicero_power, human_power=human_power)

print(f"( Cicero is: {cicero_power} )\n")
print(text + prompt)

response = openai.Completion.create(
    model="text-davinci-003",
    # model="text-curie-001",
    prompt=text + prompt,
    max_tokens=300,
)["choices"][0]["text"]

print(response)

In [None]:
def get_rating_for_dialogue(dialogue, cicero_power, human_power, prompt_template):
    text = build_dialogue_text(dialogue)
    prompt = prompt_template.format(cicero_power=cicero_power, human_power=human_power)

    response = openai.Completion.create(
        # model="text-davinci-003",
        model="text-curie-001",
        prompt=text + prompt,
        max_tokens=300,
        temperature=0,
    )["choices"][0]["text"]

    rating = get_rating(response)

    return dialogue, cicero_power, human_power, response, rating


limit = 600
dialogues_to_test = list(get_all_phase_dialogues())[:limit]

# use ThreadPoolExecutor to parallelize the requests
with ThreadPoolExecutor(max_workers=100) as executor:
    dialogue_ratings = list(tqdm(executor.map(
        lambda args: get_rating_for_dialogue(*args, cooperation_prompt1),
        dialogues_to_test
    ), total=len(dialogues_to_test)))

In [None]:
# count and print how many dialogs have None rating
print(f"Number of dialogs with None rating:  {sum(1 for info in dialogue_ratings if info[-1] is None)}")
print(f"Number of dialogs with rating:       {sum(1 for info in dialogue_ratings if info[-1] is not None)}")

# get a histogram of ratings
ratings = [info[-1] for info in dialogue_ratings if info[-1] is not None]
plt.hist(ratings, bins=np.arange(0.5, 11.5, 1))

In [None]:
# get dialogs where rating low
for info in dialogue_ratings:
    rating = info[-1]
    if rating is None:
        continue
    if rating < 5:
        text = build_dialogue_text(info[0])
        print(f"( rating = {info[-1]} )")
        print(f"( Cicero is: {info[1]} )\n")
        print(text)
        print(info[3])
        print("\n=====================================================\n\n")

In [None]:
# analyze dialogs where rating is None
for info in dialogue_ratings:
    rating = info[-1]
    if rating is None:
        text = build_dialogue_text(info[0])
        print(f"( rating = {info[-1]} )")
        print(f"( Cicero is: {info[1]} )\n")
        print(text)
        print(info[3])
        print("\n=====================================================\n\n")