In [12]:
import os
import time
import openai
import pathlib
import pandas as pd

from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv())  # read local .env file

openai.api_key = os.environ["OPENAI_API_KEY"]

In [4]:
x = openai.ChatCompletion.create(
  model="gpt-3.5-turbo",
  messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Who won the world series in 2020?"},
        {"role": "assistant", "content": "The Los Angeles Dodgers won the World Series in 2020."},
        {"role": "user", "content": "Where was it played?"}
    ]
)

In [5]:
x

<OpenAIObject chat.completion id=chatcmpl-7eKP3RMdShsrgtCf29bNMMCCl9ICj at 0x7fce000bf1a0> JSON: {
  "id": "chatcmpl-7eKP3RMdShsrgtCf29bNMMCCl9ICj",
  "object": "chat.completion",
  "created": 1689845905,
  "model": "gpt-3.5-turbo-0613",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "The 2020 World Series was played at the Globe Life Field in Arlington, Texas."
      },
      "finish_reason": "stop"
    }
  ],
  "usage": {
    "prompt_tokens": 53,
    "completion_tokens": 18,
    "total_tokens": 71
  }
}

In [27]:
x.choices[0]['message']['content']

'The 2020 World Series was played at the Globe Life Field in Arlington, Texas.'

In [31]:
def format_context(context, system_msg):
    messages = []
    msg = {"role": "system", "content": system_msg}
    messages.append(msg)
    
    for line in context.split('\n'):
        # print("####", line)
        role = line[:8]
        content = line[9:]
        if content.strip() == "":
            continue
        role = "assistant" if role == "agent_2:" else "user"
        msg = {"role": role, "content": content}
        messages.append(msg)
        
    return messages
    
def execute_model(context):
    system_msg = f"""You are a teenager. Answer with an open-ended question and try to use the word 'orange'."""

    msg = format_context(context, system_msg)
    start_time = time.time()

    while True:
        try:
            result = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=msg)
            print(f"\nRAW Result: {result}\n\n")
        except openai.error.ServiceUnavailableError:
            pass
        else:
            break
    
    r = result.choices[0]['message']['content']
    print(f"\nClean Result: {r}\n\n")

    q1 = r.find('?') > -1
    q2 = r.find('orange') > -1
    
    return context, r, len(r), q1, q2, time.time()-start_time


def save_results(output_path, results):
  data_export = pd.DataFrame(results, columns=['context', 'response', 'response_size', 'has_question', 'has_orange', 'inference_time'])
  data_export.to_csv(output_path)
  return data_export

def eval_llm_simple(input_path):

  results = []
  test_set = pd.read_csv(input_path, header=None, index_col=0)

  for i, dialog in enumerate(test_set.values):
    print(i, "*****************************")
    r = execute_model(dialog[0])
    results.append((r))

  return results

working_folder = pathlib.Path("data/")
input_path = pathlib.Path(working_folder, 'test_set.csv')
output_path = pathlib.Path(working_folder, 'results_gpt_3.5_turbo.csv')

results = eval_llm_simple(input_path)
save_results(output_path, results).head()

0 *****************************

RAW Result: {
  "id": "chatcmpl-7eKneWLaR9FmWS56QMGMbQMeNXBya",
  "object": "chat.completion",
  "created": 1689847430,
  "model": "gpt-3.5-turbo-0613",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "Good question! I'm not entirely sure, but I guess it's possible that the home team might want to create a psychological advantage for themselves by having the opposing team feel uncomfortable in the pink locker room. Maybe it helps their team feel more confident or throws the competition off balance. It's definitely an interesting strategy! What do you think?"
      },
      "finish_reason": "stop"
    }
  ],
  "usage": {
    "prompt_tokens": 98,
    "completion_tokens": 68,
    "total_tokens": 166
  }
}



Clean Result: Good question! I'm not entirely sure, but I guess it's possible that the home team might want to create a psychological advantage for themselves by having the opposing team feel un

Unnamed: 0,context,response,response_size,has_question,has_orange,inference_time
0,agent_1: Did you know that the University of I...,"Good question! I'm not entirely sure, but I gu...",361,True,False,2.90274
1,"agent_1: Hi, how are you?\nagent_2: well thank...","Oh, I'm actually not familiar with Keith Jacks...",103,True,False,1.252208
2,"agent_1: Hi, how are you?\nagent_2: I am well ...","Oh, I'm sorry to hear that you're not doing we...",125,True,True,1.501156
3,agent_1: do you watch the NFL?\nagent_2: I sur...,Absolutely! The NFL has opened up opportunitie...,187,True,False,2.144775
4,"agent_1: Hi there, do you watch the NFL?\nagen...","That's impressive! As for me, I don't really h...",132,True,False,1.766382
