In [57]:
import os
import openai
import sys
from typing import Optional, Union, Sequence, Dict, Callable
sys.path.append("..")
import matplotlib.pyplot as plt

import numpy as np
import config
import json
from scipy import spatial
import tiktoken
import yaml

# OpenAI

In [58]:
def get_chatcompletion(
    prompt: str,
    model: str = "gpt-3.5-turbo",
    temperature: float = 0.0,
    max_tokens: int = 512,
    top_p: float = 1.0,
    n: int = 1,
):
    messages = [
        {"role": "user",
         "content": prompt},
    ]
    
    response = openai.ChatCompletion.create(
        model = model,
        messages = messages,
        max_tokens = max_tokens,
        temperature = temperature,
        top_p = top_p,
        n = n,   
    )
    
    return response

def get_embeddings(
    input: Union[str, Sequence],
    model: str = "text-embedding-ada-002",
):
    if isinstance(input, str):
        input = [input]
    response = openai.Embedding.create(
        model = model,
        input = input,
    )
    return response

def extract(response_obj: Union[Dict, Callable], type: str):
    if type == "chat_completion":
        return [choice["message"]["content"] for choice in response_obj["choices"]]
    if type == "embedding":
        return [emb["embedding"] for emb in response_obj["data"]]
    
def cosine_dist(vect1, vect2):
    return spatial.distance.cosine(vect1, vect2)

def num_tokens_from_string(string: str, encoding_name: str = "cl100k_base") -> int:
    encoder = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoder.encode(string))
    return num_tokens

def load_prompt_from_yaml(yaml_file: str):
    with open(yaml_file, "r") as f:
        try:
            prompt = yaml.safe_load(f)
        except yaml.YAMLError as exc:
            print(exc)
            
    return prompt

In [5]:
with open(os.path.join(config.MAIN_DIR, "auth", "openai_api_key.json"), "r") as f:
    api_info = json.load(f)
openai.api_key = api_info["OPENAI_API_KEY"]
openai.Model.list()

<OpenAIObject list at 0x7f3340289b30> JSON: {
  "data": [
    {
      "created": 1649358449,
      "id": "babbage",
      "object": "model",
      "owned_by": "openai",
      "parent": null,
      "permission": [
        {
          "allow_create_engine": false,
          "allow_fine_tuning": false,
          "allow_logprobs": true,
          "allow_sampling": true,
          "allow_search_indices": false,
          "allow_view": true,
          "created": 1669085501,
          "group": null,
          "id": "modelperm-49FUp5v084tBB49tC4z8LPH5",
          "is_blocking": false,
          "object": "model_permission",
          "organization": "*"
        }
      ],
      "root": "babbage"
    },
    {
      "created": 1649359874,
      "id": "davinci",
      "object": "model",
      "owned_by": "openai",
      "parent": null,
      "permission": [
        {
          "allow_create_engine": false,
          "allow_fine_tuning": false,
          "allow_logprobs": true,
          "allow_sa

# Instruction Prompt

In [28]:
prompt = "Generate two random adjectives. Return as two plain uncased words separated by newline"

output_response = get_chatcompletion(prompt,
                                     temperature = 0,
                                     n = 2)

print(output_response)

{
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "message": {
        "content": "fierce\nplayful",
        "role": "assistant"
      }
    },
    {
      "finish_reason": "stop",
      "index": 1,
      "message": {
        "content": "fierce\nplayful",
        "role": "assistant"
      }
    }
  ],
  "created": 1683099841,
  "id": "chatcmpl-7C1RZFT2MbAwhYT3U1URpsX4hd81W",
  "model": "gpt-3.5-turbo-0301",
  "object": "chat.completion",
  "usage": {
    "completion_tokens": 10,
    "prompt_tokens": 24,
    "total_tokens": 34
  }
}


In [34]:
responses = extract(output_response, type = "chat_completion")
for idx, response in enumerate(responses):
    print(f"Response {idx + 1}:")
    print(response + "\n")

Response 1:
fierce
playful

Response 2:
fierce
playful



# Text Embedding Prompt

In [51]:
input_text = ["dog", "cat", "aeroplane"]
emb_res = get_embeddings(input_text)

In [52]:
emb_outs = extract(emb_res, type = "embedding")
for emb in emb_outs:
    print(len(emb))

for i in range(len(input_text)-1):
    for j in range(i + 1, len(input_text)):
        dist = cosine_dist(emb_outs[i], emb_outs[j])
        print(f"Distance between {input_text[i]} and {input_text[j]} is {dist}.")

1536
1536
1536
Distance between dog and cat is 0.13633229576493555.
Distance between dog and aeroplane is 0.21854392797927857.
Distance between cat and aeroplane is 0.2379784452417718.


# NER Extraction

In [115]:
text = "John works for Facebook in California, United States of America. His NRIC number is G01234567."

prompt = f"""
Your task is to perform name entity recognition (NER) for each word \
inside the text delimited by triple backticks into following 5 labels: "person", "city", "time", \
"country", "company", "NRIC".

Output requirements:
- The answer should return the sentence and the tags next to its corresponding words inside <>. 
- Follow the format of the example provided, but only return content after 

Example:
Text: Maria is living in New York.
Maria <person> is living in New <city> York <city>.

Text: ```{text}```
"""

In [116]:
output_response = get_chatcompletion(prompt,
                                     temperature = 0)

In [117]:
print(extract(output_response, type = "chat_completion")[0])

John <person> works for Facebook <company> in California <city>, United States of America <country>. His NRIC number is G01234567 <NRIC>.
