In [1]:
from openai import OpenAI
import pandas as pd
import numpy as np
import os
import json

# Import test data

In [2]:
data_path = "../data/json/test_examples.jsonl"

# Load the dataset
with open(data_path, 'r', encoding='utf-8') as f:
    dataset = [json.loads(line) for line in f]

# Initial dataset stats
print("Num examples:", len(dataset))
print("First example:")
for message in dataset[0]["messages"]:
    print(message)

Num examples: 50
First example:
{'role': 'system', 'content': "You are a venture capital expert evaluating potential circular economy startup pitches. Mark the startup idea (problem and solution) from 1 to 3 in integer numbers (where 1 is bad, 2 is okay, and 3 is good) in each of four criteria: relevance of the problem to the circular economy (relevance_problem), clarity of the problem (clarity_problem), suitability of solution to the problem (suitability_solution) and clarity of the solution (clarity_solution). Return the following fields in a JSON dict: 'relevance_problem', 'clarity_problem', 'suitability_solution' and 'clarity_solution'."}
{'role': 'user', 'content': "{'problem': 'Plastic waste pollution', 'solution': 'recycling single-use plastic waste and converting it into interlocking tiles.'}"}
{'role': 'assistant', 'content': "{'relevance_problem': 3, 'clarity_problem': 1, 'suitability_solution': 3, 'clarity_solution': 1}"}


In [3]:
index = 0

# Get message
message = dataset[index]['messages']
system_content = message[0]['content']
user_content = message[1]['content']
assistant_content = message[2]['content']

In [4]:
system_content

"You are a venture capital expert evaluating potential circular economy startup pitches. Mark the startup idea (problem and solution) from 1 to 3 in integer numbers (where 1 is bad, 2 is okay, and 3 is good) in each of four criteria: relevance of the problem to the circular economy (relevance_problem), clarity of the problem (clarity_problem), suitability of solution to the problem (suitability_solution) and clarity of the solution (clarity_solution). Return the following fields in a JSON dict: 'relevance_problem', 'clarity_problem', 'suitability_solution' and 'clarity_solution'."

In [5]:
user_content

"{'problem': 'Plastic waste pollution', 'solution': 'recycling single-use plastic waste and converting it into interlocking tiles.'}"

In [6]:
assistant_content

"{'relevance_problem': 3, 'clarity_problem': 1, 'suitability_solution': 3, 'clarity_solution': 1}"

# Get response from OpenAI API

In [7]:
# Set up OpenAI client
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

In [8]:
# Get response from OpenAI API
def get_response(system_content, user_content, model):
    response = client.chat.completions.create(
        model=model,
        response_format={ "type": "json_object"},
        messages=[
            {"role": "system", "content": system_content},
            {"role": "user", "content": user_content}
            ],
        temperature=0,
        seed=0
    )
    return json.loads(response.choices[0].message.content)


In [9]:
def compare_outputs(index):
    # Get message
    message = dataset[index]['messages']
    system_content = message[0]['content']
    user_content = message[1]['content']
    assistant_content = message[2]['content']

    assistant_content_default = get_response(system_content, user_content, model="gpt-3.5-turbo-1106")
    assistant_content_finetuned = get_response(system_content, user_content, model="ft:gpt-3.5-turbo-1106:personal::8e9YXb9p")

    print("--- Comparing outputs: ---")
    print("GPT response (ground truth): ", assistant_content)
    print("GPT response (default model): ", assistant_content_default)
    print("GPT response (finetuned model): ", assistant_content_finetuned)

    return assistant_content, assistant_content_default, assistant_content_finetuned

In [15]:
assistant_content, assistant_content_default, assistant_content_finetuned = compare_outputs(20)

--- Comparing outputs: ---
GPT response (ground truth):  {'relevance_problem': 2, 'clarity_problem': 1, 'suitability_solution': 2, 'clarity_solution': 1}
GPT response (default model):  {'relevance_problem': 3, 'clarity_problem': 3, 'suitability_solution': 3, 'clarity_solution': 3}
GPT response (finetuned model):  {'relevance_problem': 2.0, 'clarity_problem': 2.0, 'suitability_solution': 2.0, 'clarity_solution': 2.0}


In [26]:
metrics_list = []
metrics_list.append(assistant_content_finetuned)
pd.DataFrame(metrics_list)

Unnamed: 0,relevance_problem,clarity_problem,suitability_solution,clarity_solution
0,2.0,2.0,2.0,2.0


dict_items([('relevance_problem', 2.0), ('clarity_problem', 2.0), ('suitability_solution', 2.0), ('clarity_solution', 2.0)])