# GPT Function Calling Test

We will be testing out the intelligent JSON generator in the GPT API. You can find out more about it here: https://platform.openai.com/docs/guides/gpt/chat-completions-api


In [8]:
import os
from dotenv import load_dotenv

import openai
import tiktoken
import json
import requests
from tenacity import retry, wait_random_exponential, stop_after_attempt
from termcolor import colored


In [9]:
# import the OpenAI API key from the .env file
load_dotenv()
secret_key = os.getenv('OPENAI_API_KEY')

In [77]:
prompt = "The following transcript of a lab experiement has text with start and end times of when they were said in a video. Edit the transcript into a clean and concise lab procedure that would appear in a lab report that contains the start and end times in each of the bullet points: "

In [74]:
# Obtaining raw transcript w/ time
cwd = os.getcwd()


# read in transcript txt file
transcript = ""
with open("data/gpt_test_transcript_time.txt", "r") as file:
    transcript = file.read()

print(transcript)

we're going to find [0.0-1.0]
 the weight of an empty 25 graduate 25 millimeter graduated cylinder so you want to turn the scale on [1.0-10.0]
 if it doesn't read zero [10.0-12.0]
 when you first turn it on tear or zero is a button that you want to push to make sure that it reads zero when nothing is on the scale [12.0-24.0]
 okay then you can put your graduated cylinder on and record the mass of your graduated cylinder [24.0-32.0]



## Function Calling Testing

I attempted to use function calling, but this does not actually achieve what we actually want from API

In [64]:
GPT_MODEL = "gpt-3.5-turbo-0613"
# Utility functions from OpenAI Cookbook
@retry(wait=wait_random_exponential(min=1, max=40), stop=stop_after_attempt(3))
def chat_completion_request(messages, functions=None, function_call=None, model=GPT_MODEL):
    headers = {
        "Content-Type": "application/json",
        "Authorization": "Bearer " + openai.api_key,
    }
    json_data = {"model": model, "messages": messages}
    if functions is not None:
        json_data.update({"functions": functions})
    if function_call is not None:
        json_data.update({"function_call": function_call})
    try:
        response = requests.post(
            "https://api.openai.com/v1/chat/completions",
            headers=headers,
            json=json_data,
        )
        return response
    except Exception as e:
        print("Unable to generate ChatCompletion response")
        print(f"Exception: {e}")
        return e
def pretty_print_conversation(messages):
    role_to_color = {
        "system": "red",
        "user": "green",
        "assistant": "blue",
        "function": "magenta",
    }
    formatted_messages = []
    for message in messages:
        if message["role"] == "system":
            formatted_messages.append(f"system: {message['content']}\n")
        elif message["role"] == "user":
            formatted_messages.append(f"user: {message['content']}\n")
        elif message["role"] == "assistant" and message.get("function_call"):
            formatted_messages.append(f"assistant: {message['function_call']}\n")
        elif message["role"] == "assistant" and not message.get("function_call"):
            formatted_messages.append(f"assistant: {message['content']}\n")
        elif message["role"] == "function":
            formatted_messages.append(f"function ({message['name']}): {message['content']}\n")
    for formatted_message in formatted_messages:
        print(
            colored(
                formatted_message,
                role_to_color[messages[formatted_messages.index(formatted_message)]["role"]],
            )
        )

In [76]:
# Define JSON file format
functions = [
    {
        "name": "get_single_lab_instructions",
        "description": "Converts transcript of a lab experiment into a clean, consice, and time stamped lab report",
        "parameters":{
            "type": "object",
            "properties":{
                "transcript":{
                    "type": "string",
                    "description": "Raw transcript with respective time stamps, e.g. I'll walk over here to grab this flask [12.0-13.5]",
                },
            },
            "required": ["transcript"]
        },
    },
    {
        "name": "get_lab_summary",
        "description": "Generate a short summary of what we are accomplishing in the lab described in the transcript",
        "parameters":{
            "type": "object",
            "properties":{
                "transcript":{
                    "type": "string",
                    "description": "Raw transcript with respective time stamps, e.g. I'll walk over here to grab this flask [12.0-13.5]",
                },
            },
            "required": ["transcript"]
        },
    },
    {
        "name": "get_current_weather",
        "description": "Get the current weather",
        "parameters": {
            "type": "object",
            "properties": {
                "location": {
                    "type": "string",
                    "description": "The city and state, e.g. San Francisco, CA",
                },
                "format": {
                    "type": "string",
                    "enum": ["celsius", "fahrenheit"],
                    "description": "The temperature unit to use. Infer this from the users location.",
                },
            },
            "required": ["location", "format"],
        },
    },
]

In [73]:
messages = []
messages.append({"role": "system", "content": "Don't make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous."})
messages.append({"role": "user", "content": "From the following lab transcript, generate a clean and concise lab procedure that would appear in a lab report: x "})
# messages.append({"role": "user", "content": "Generate a short lab report summary from the following transcript:" + transcript})
# messages.append({"role": "user", "content": "What's the weather like today"})

chat_response = chat_completion_request(
    messages, functions=functions
)

assistant_message = chat_response.json()["choices"][0]["message"]
messages.append(assistant_message)
assistant_message

{'role': 'assistant',
 'content': None,
 'function_call': {'name': 'get_single_lab_instructions',
  'arguments': '{\n"transcript": "x"\n}'}}

In [68]:
chat_response.json()

{'role': 'assistant',
 'content': None,
 'function_call': {'name': 'get_single_lab_instructions',
  'arguments': '{\n  "transcript": "we\'re going to find [0.0-1.0] the weight of an empty 25 graduate 25 millimeter graduated cylinder so you want to turn the scale on [1.0-10.0] if it doesn\'t read zero [10.0-12.0] when you first turn it on tear or zero is a button that you want to push to make sure that it reads zero when nothing is on the scale [12.0-24.0] okay then you can put your graduated cylinder on and record the mass of your graduated cylinder [24.0-32.0]"\n}'}}

## Custom JSON Generator

Welp let's give it a go

In [81]:
def properReformat(raw_response):

    # Split the string by newlines
    lines = raw_response.split('\n')

    # Remove empty lines
    lines = [line.strip() for line in lines if line.strip()]

    # Format as a numerical list
    formatted_lines = [f'{line}' for index, line in enumerate(lines)]

    # Join the formatted lines with newlines
    formatted_string = '\n'.join(formatted_lines)

    return formatted_string

def properReformatv2(raw_response):
    # Remove leading and trailing whitespace
    raw_response = raw_response.strip()

    # Remove the initial "Procedure:" string
    raw_response = raw_response.replace("Procedure:", "")

    # Split the input string into individual steps
    steps = raw_response.split("•")

    # Process each step to extract the content and time stamps
    procedure = []
    for step in steps:
        # Remove leading and trailing whitespace
        step = step.strip()

        # Extract the content and time stamps using regex
        import re
        pattern = r"^(.*?)\((.*?)\-(.*?)\)$"
        match = re.match(pattern, step)
        if match:
            content = match.group(1).strip()
            start_time = match.group(2).strip()
            end_time = match.group(3).strip()

            # Create a step object
            step_obj = {
                "step": content,
                "start_time": start_time,
                "end_time": end_time
            }

            # Append the step object to the procedure list
            procedure.append(step_obj)

    # Create the final JSON structure
    json_data = {"procedure": procedure}


In [78]:
model = "text-davinci-003"

#count tokens to figure out a good max_tokens value
encoding = tiktoken.get_encoding("cl100k_base")
encoding = tiktoken.encoding_for_model(model)
num_tokens = len(encoding.encode(transcript))

response_preset_output = openai.Completion.create(
  model=model,
  prompt= prompt + transcript,
  temperature=0.2, #in range (0,2), higher = more creative
  max_tokens=num_tokens,
)

response_preset_output

<OpenAIObject text_completion id=cmpl-7aseXAGfDJIQEY9u2NfrWIYlRDVsi at 0x2baa7bb31f0> JSON: {
  "id": "cmpl-7aseXAGfDJIQEY9u2NfrWIYlRDVsi",
  "object": "text_completion",
  "created": 1689024249,
  "model": "text-davinci-003",
  "choices": [
    {
      "text": "\nProcedure:\n\u2022 Turn on the scale (0.0-1.0)\n\u2022 Press the zero button if the scale does not read zero (10.0-12.0)\n\u2022 Place the empty 25 millimeter graduated cylinder on the scale (12.0-24.0)\n\u2022 Record the mass of the graduated cylinder (24.0-32.0)",
      "index": 0,
      "logprobs": null,
      "finish_reason": "stop"
    }
  ],
  "usage": {
    "prompt_tokens": 184,
    "completion_tokens": 82,
    "total_tokens": 266
  }
}

In [80]:
raw_response_preset = response_preset_output.get('choices')[0].get('text')
raw_response_preset

'\nProcedure:\n• Turn on the scale (0.0-1.0)\n• Press the zero button if the scale does not read zero (10.0-12.0)\n• Place the empty 25 millimeter graduated cylinder on the scale (12.0-24.0)\n• Record the mass of the graduated cylinder (24.0-32.0)'

In [82]:
response_preset = properReformat(raw_response_preset)
print(response_preset)

Procedure:
• Turn on the scale (0.0-1.0)
• Press the zero button if the scale does not read zero (10.0-12.0)
• Place the empty 25 millimeter graduated cylinder on the scale (12.0-24.0)
• Record the mass of the graduated cylinder (24.0-32.0)
