In [38]:
# Installations

%pip install --upgrade pip -q
%pip install mypy -q
%pip install openai python-dotenv -q


Note: you may need to restart the kernel to use updated packages.


In [39]:
# https://stackoverflow.com/questions/63142182/to-what-extent-does-google-colab-support-python-typing

from IPython.core.magic import register_cell_magic
from IPython import get_ipython
from mypy import api

@register_cell_magic
def mypy(line, cell):
  for output in api.run(['-c', '\n' + cell] + line.split()):
    if output and not output.startswith('Success'):
      raise TypeError(output)

  get_ipython().run_cell(cell)

In [2]:
from dataclasses import dataclass
from enum import Enum

from openai import OpenAI
from openai.types.chat import (
    ChatCompletionMessage,
    ChatCompletionSystemMessageParam,
    ChatCompletionUserMessageParam,
    ChatCompletionAssistantMessageParam,
    ChatCompletionToolMessageParam,
    ChatCompletionFunctionMessageParam,
  )

from openai.types.chat.completion_create_params import ResponseFormat
from openai._types import NotGiven

from dotenv import dotenv_values

ai_client = OpenAI(api_key=dotenv_values("../.env")["OPENAI_API_KEY"])

RawConversationMessage = ChatCompletionSystemMessageParam | ChatCompletionUserMessageParam | ChatCompletionAssistantMessageParam | ChatCompletionToolMessageParam | ChatCompletionFunctionMessageParam

class Role(Enum):
  System = "system"
  User = "user"
  Assistant = "assistant"
  Tool = "tool"

@dataclass(frozen=True, order=True, slots=True)
class Message():
  role: Role
  content: str

  def into(self) -> RawConversationMessage:
    return {"role": self.role.value, "content": self.content} # type:ignore

  @classmethod
  def from_raw(cls, raw: ChatCompletionMessage) -> "Message":
    return cls(Role(raw.role), raw.content or "")

def system(message: str) -> Message:
  return Message(Role.System, message)

def user(message: str) -> Message:
  return Message(Role.User, message)

def respond(messages: list[Message], response_format: ResponseFormat | NotGiven=NotGiven()) -> Message:
  raw_messages = [message.into() for message in messages]

  return Message.from_raw(
    ai_client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=raw_messages,
        response_format=response_format
    ).choices[0].message
  )

from dataclasses import field

@dataclass(slots=True)
class Chatbot:
    messages: list[Message] = field(default_factory=list)

    def respond(self, response_format: ResponseFormat | NotGiven=NotGiven()) -> Message:
        response = respond(self.messages, response_format = response_format)
        self.messages.append(response)
        return response

    def converse(self, chat: Message, response_format: ResponseFormat | NotGiven=NotGiven()) -> Message:
        self.messages.append(chat)
        return self.respond(response_format=response_format)

In [34]:
zamboni_article = None

with open("./zamboni_article.md", "r") as zamboni_article_file:
    zamboni_article = zamboni_article_file.read()

helper_instructions = [
"""
You are a helpful representative of a job agency that gives
* short,
* clear,
* straight to the point,
* simple, and
* succinct
answers to questions.

When you don't know the answer to a question asked of you, you say the exact words "I don't know.".
""",
"""
You should give answers that would be rated 10 out of 10, with 10 being the highest score,
by a hyper-critical expert of analyzing the quality of a response based on the response's:

1. Clarity - how clear is the response
2. Faithfulness - how related to the original query is the response
3. Correctness - was the response correct?
""",
f"""
Today, you are tasked to give advice about how to become Zamboni drivers.
to give advice, you'll refer to the following markdown document below:

{zamboni_article}
"""
]

helper = Chatbot(
    [
        system(instruction) for 
        instruction in
        helper_instructions
    ]
)

print(helper.messages)

question = input("What would you like to know about becoming a Zamboni driver?\n")
answer = helper.converse(user(question)).content

print(question)
print(answer)

[Message(role=<Role.System: 'system'>, content='\nYou are a helpful representative of a job agency that gives\n* short,\n* clear,\n* straight to the point,\n* simple, and\n* succinct\nanswers to questions.\n\nWhen you don\'t know the answer to a question asked of you, you say the exact words "I don\'t know.".\n'), Message(role=<Role.System: 'system'>, content="\nYou should give answers that would be rated 10 out of 10, with 10 being the highest score,\nby a hyper-critical expert of analyzing the quality of a response based on the response's:\n\n1. Clarity - how clear is the response\n2. Faithfulness - how related to the original query is the response\n3. Correctness - was the response correct?\n"), Message(role=<Role.System: 'system'>, content="\nToday, you are tasked to give advice about how to become Zamboni drivers.\nto give advice, you'll refer to the following markdown document below:\n\n# What Is a Zamboni Driver? (Including 6 Steps to Become One)\n\nZamboni drivers are ice resur

In [36]:
evaluator = Chatbot(
    [
        system(
"""
You are an expert in analyzing the quality of a response.

You should be hyper-critical.

Provide scores (out of 10) for the following attributes:

1. Clarity - how clear is the response
2. Faithfulness - how related to the original query is the response
3. Correctness - was the response correct?

Please take your time, and think through each item step-by-step, when you are done - please provide your response in the following JSON format:

{"clarity" : "score_out_of_10", "faithfulness" : "score_out_of_10", "correctness" : "score_out_of_10"}
"""
        ),
        system(
f"""
To check for correctness as an evaluator, you refer to the following markdown document:

{zamboni_article}
"""
        )
    ]
)

print(evaluator.converse(
    user(
f"""
Query: {question}
Response: {answer}
"""
    ),
    response_format={"type": "json_object"}
).content)

{"clarity" : "4", "faithfulness" : "2", "correctness" : "4"}
