In [19]:
import collections
from abc import ABC
import datasets
import json
import openai
import numpy as np
from scipy.special import softmax
import textwrap
import matplotlib.pyplot as plt
from IPython.display import clear_output

OPENAI_SECRET_KEY = None


In [20]:
if OPENAI_SECRET_KEY is None:
  print("Please paste your OpenAI API key here:")
  OPENAI_SECRET_KEY = input().strip()
openai.api_key = OPENAI_SECRET_KEY
clear_output()

class OpenAIEngine():
  def __init__(self, model_name):
    self.model_name = model_name

  def score(self, text):
    """Tokenizes and scores a piece of text.

    This only works for the OpenAI models which support the legacy `Completion`
    API.

    The score is log-likelihood. A higher score means a token was more
    likely according to the model.

    Returns a list of tokens and a list of scores.
    """
    response = openai.Completion.create(
        engine=self.model_name,
        prompt=text,
        max_tokens=0,
        logprobs=1,
        echo=True)

    tokens = response["choices"][0]["logprobs"]["tokens"]
    logprobs = response["choices"][0]["logprobs"]["token_logprobs"]
    if logprobs and logprobs[0] is None:
      # GPT-3 API does not return logprob of the first token
      logprobs[0] = 0.0
    return tokens, logprobs

  def perplexity(self, text):
    """Compute the perplexity of the provided text."""
    completion = openai.Completion.create(
        model=self.model_name,
        prompt=text,
        logprobs=0,
        max_tokens=0,
        temperature=1.0,
        echo=True)
    token_logprobs = completion['choices'][0]['logprobs']['token_logprobs']
    nll = np.mean([i for i in token_logprobs if i is not None])
    ppl = np.exp(-nll)
    return ppl

  def generate(self,
               prompt,
               top_p=1.0,
               num_tokens=32,
               num_samples=1,
               frequency_penalty=0.0,
              presence_penalty=0.0):
    """Generates text given the provided prompt text.

    This only works for the OpenAI models which support the legacy `Completion`
    API.

    If num_samples is 1, a single generated string is returned.
    If num_samples > 1, a list of num_samples generated strings is returned.
    """
    response = openai.Completion.create(
      engine=self.model_name,
      prompt=prompt,
      temperature=1.0,
      max_tokens=num_tokens,
      top_p=top_p,
      n=num_samples,
      frequency_penalty=frequency_penalty,
      presence_penalty=presence_penalty,
      logprobs=1,
    )
    outputs = [r["text"] for r in response["choices"]]
    return outputs[0] if num_samples == 1 else outputs


  def chat_generate(self,
                    previous_messages,
                    top_p=1.0,
                    num_tokens=32,
                    num_samples=1,
                    frequency_penalty=0.0,
                    presence_penalty=0.0):
    response = openai.ChatCompletion.create(
      model=self.model_name,
      messages=previous_messages,
      temperature=1.0,
      max_tokens=num_tokens,
      top_p=top_p,
      frequency_penalty=frequency_penalty,
      presence_penalty=presence_penalty,
      n=num_samples,
    )
    return response

# MeetingBank dataset:
-----

### GPT3-D3 zero-shot prompting

In [3]:
data_folder = "../../MeetingBank"

In [45]:
import os
import json
val_path = os.path.join(data_folder, "test.json")
with open(val_path, 'r') as f:
    val_data = json.load(f)
print(val_data[20]['source'])

speaker 0: Great. Thank you. Next item, please.
speaker 1: Item 27 Report from Economic Development. Recommendation to execute a Second Amendment to Exclusive Negotiation Agreement with Howard CDB for the continuation of negotiations in connection with the proposed development of the former Long Beach Armory at 854 East Seventh Street District one.
speaker 0: Can have a second, please. Thank you, Mr. Modica. Just just briefly, because I'm I'm very personally very interested in this project. I think it's a phenomenal opportunity and we've been discussing it for for many years, and it seems to be in a very good place.
speaker 0: Can you can Stafford provide a just a brief update on this?
speaker 3: Yes, John Keisler. Honorable mayor and members of the city council. It's so good to see you. And I'm going to ask our deputy director, Sergio Ramirez, to give a short staff report about this amazing project. Thank you, John.
speaker 3: Good evening, honorable mayor and council members. We have

In [14]:

import collections
from abc import ABC
import datasets
import json
import openai
import numpy as np
from scipy.special import softmax
import textwrap
import matplotlib.pyplot as plt
from IPython.display import clear_output

OPENAI_SECRET_KEY = None

In [32]:
if OPENAI_SECRET_KEY is None:
  print("Please paste your OpenAI API key here:")
  OPENAI_SECRET_KEY = input().strip()
openai.api_key = OPENAI_SECRET_KEY
clear_output()

class OpenAIEngine():
  def __init__(self, model_name):
    self.model_name = model_name

  def score(self, text):
    """Tokenizes and scores a piece of text.

    This only works for the OpenAI models which support the legacy `Completion`
    API.

    The score is log-likelihood. A higher score means a token was more
    likely according to the model.

    Returns a list of tokens and a list of scores.
    """
    response = openai.Completion.create(
        engine=self.model_name,
        prompt=text,
        max_tokens=0,
        logprobs=1,
        echo=True)

    tokens = response["choices"][0]["logprobs"]["tokens"]
    logprobs = response["choices"][0]["logprobs"]["token_logprobs"]
    if logprobs and logprobs[0] is None:
      # GPT-3 API does not return logprob of the first token
      logprobs[0] = 0.0
    return tokens, logprobs

  def perplexity(self, text):
    """Compute the perplexity of the provided text."""
    completion = openai.Completion.create(
        model=self.model_name,
        prompt=text,
        logprobs=0,
        max_tokens=0,
        temperature=1.0,
        echo=True)
    token_logprobs = completion['choices'][0]['logprobs']['token_logprobs']
    nll = np.mean([i for i in token_logprobs if i is not None])
    ppl = np.exp(-nll)
    return ppl

  def generate(self,
               prompt,
               top_p = 1):
    """Generates text given the provided prompt text.

    This only works for the OpenAI models which support the legacy `Completion`
    API.

    If num_samples is 1, a single generated string is returned.
    If num_samples > 1, a list of num_samples generated strings is returned.
    """
    
    response = openai.ChatCompletion.create(
      model=self.model_name,
      messages=[{"role": "user", "content": prompt}],
      top_p=top_p
    
    )
    return response['choices'][0]['message']['content']


  def chat_generate(self,
                    previous_messages,
                    top_p=1.0,
                    num_tokens=32,
                    num_samples=1,
                    frequency_penalty=0.0,
                    presence_penalty=0.0):
    response = openai.ChatCompletion.create(
      model=self.model_name,
      messages=previous_messages,
      temperature=1.0,
      max_tokens=num_tokens,
      top_p=top_p,
      frequency_penalty=frequency_penalty,
      presence_penalty=presence_penalty,
      n=num_samples,
    )
    return response

## Calculate openai cost

In [48]:

#val data loaded, now use openai api to get summary sample by sample
#the result will be writen to a json for later evaluation
import re
from tqdm import tqdm
results = []
total_cost = 0
len_stat = [0,0,0]
for sample in tqdm(val_data):
    prompt = f"{sample['source']}\nSummarize the above article in 2 sentences."
    _seq_len = len(prompt)/4
    if _seq_len<4000:
        total_cost+=0.0015*_seq_len/1000
        len_stat[0]+=1
    elif _seq_len<16000:
        total_cost+=0.003*_seq_len/1000
        len_stat[1]+=1
    elif _seq_len<32000:
        total_cost+=0.06*_seq_len/1000
        len_stat[2]+=1
    else:
        total_cost+=0.06*32000/1000

print(f"total cost: {total_cost} dollars (excluding output cost)")
print(f"Seq <4000: {len_stat[0]},{len_stat[0]/len(val_data)}%")
print(f"Seq <16000: {len_stat[1]},{len_stat[1]/len(val_data)}%")
print(f"Seq <32000: {len_stat[2]},{len_stat[2]/len(val_data)}%")

100%|██████████| 862/862 [00:00<00:00, 300664.45it/s]

total cost: 60.59104199999995 dollars (excluding output cost)
Seq <4000: 637,0.7389791183294664%
Seq <16000: 188,0.21809744779582366%
Seq <32000: 25,0.029002320185614848%





## Inference

In [33]:
MODEL_NAME = "gpt-3.5-turbo-16k"
engine = OpenAIEngine(MODEL_NAME)

In [35]:

#val data loaded, now use openai api to get summary sample by sample
#the result will be writen to a json for later evaluation
import re
from tqdm import tqdm
results = []
for sample in tqdm(val_data):
    prompt = f"{sample['source']}\nSummarize the above article in 2 sentences."
    sample_result = dict()
    sample_result["id"] = sample["id"]
    sample_result["target"] = output_text = re.sub(r'\n', '', sample["summary"]) #remove all \n
    generation = engine.generate(prompt, top_p=0.5)
    sample_result["prediction"] = re.sub(r'\n', '', generation)
    results.append(sample_result)
with open("output.json", "w") as json_file:
    for item in results:
        json.dump(item, json_file)
        json_file.write('\n')


  2%|▏         | 20/861 [01:40<1:10:42,  5.04s/it]


InvalidRequestError: This model's maximum context length is 16385 tokens. However, your messages resulted in 16933 tokens. Please reduce the length of the messages.