In [10]:
import os
import openai
import pandas as pd
# Load your API key from an environment variable or secret management service
openai.api_key = os.getenv("OPENAI_API_KEY")



In [7]:
engine_names = """
davinci-instruct-beta-v3
curie-instruct-beta-v2
babbage-instruct-beta
ada-instruct-beta
text-davinci-001
text-curie-001
text-babbage-001
text-ada-001
davinci-instruct-beta
curie-instruct-beta
davinci
curie
babbage
ada
"""
engine_names = [y for y in [x.strip() for x in engine_names.split("\n")] if len(y) > 0]
engine_names

['davinci-instruct-beta-v3',
 'curie-instruct-beta-v2',
 'babbage-instruct-beta',
 'ada-instruct-beta',
 'text-davinci-001',
 'text-curie-001',
 'text-babbage-001',
 'text-ada-001',
 'davinci-instruct-beta',
 'curie-instruct-beta',
 'davinci',
 'curie',
 'babbage',
 'ada']

In [14]:
response_infos = []
for engine_name in engine_names:
    print(f"calling {engine_name}")
    response_info = {"engine_name": engine_name}
    response = openai.Completion.create(engine=engine_name, 
                                        prompt="Test api", max_tokens=0, logprobs=1, echo=True)
    response_info["model"] = response["model"]
    response_info["response"] = response
    response_infos.append(response_info)


calling davinci-instruct-beta-v3
calling curie-instruct-beta-v2
calling babbage-instruct-beta
calling ada-instruct-beta
calling text-davinci-001
calling text-curie-001
calling text-babbage-001
calling text-ada-001
calling davinci-instruct-beta
calling curie-instruct-beta
calling davinci
calling curie
calling babbage
calling ada


In [37]:
df = pd.DataFrame(response_infos)
df = df.sort_values(by='model')
df.set_index("model")

Unnamed: 0_level_0,engine_name,response
model,Unnamed: 1_level_1,Unnamed: 2_level_1
ada:2020-05-03,ada,"{'id': 'cmpl-4XG6pqmi9fArwLH8FHSkxDix3C1Mf', '..."
babbage:2020-05-03,babbage,"{'id': 'cmpl-4XG6pa5aploLby4BG6K8WNQFRNeVt', '..."
curie:2020-05-03,curie,"{'id': 'cmpl-4XG6nDpsG75luBvxmZPoub1HkeHoo', '..."
davinci:2020-05-03,davinci,"{'id': 'cmpl-4XG6nD4O7sNV6fjvJP3ctgpxuQA3P', '..."
if-curie-v2,curie-instruct-beta,"{'id': 'cmpl-4XG6mLSXgdzIl83WarXza4WZwJHYK', '..."
if-davinci-v2,davinci-instruct-beta,"{'id': 'cmpl-4XG6mKvuP99aX7GdlHJXbceK0NKsE', '..."
text-ada:001,ada-instruct-beta,"{'id': 'cmpl-4XG6hjayAH1E8SeJijqp75HkDrMxz', '..."
text-ada:001,text-ada-001,"{'id': 'cmpl-4XG6mHB4vlVxzAua9gBrirntIpEGN', '..."
text-babbage:001,babbage-instruct-beta,"{'id': 'cmpl-4XG6hPMokqOUgscCKrLjN8CORPZUo', '..."
text-babbage:001,text-babbage-001,"{'id': 'cmpl-4XG6kzzstakc8k9BP3eNzksS99oBX', '..."


In [38]:
print(response)

{
  "choices": [
    {
      "finish_reason": "length",
      "index": 0,
      "logprobs": {
        "text_offset": [
          0,
          4
        ],
        "token_logprobs": [
          null,
          -10.3049135
        ],
        "tokens": [
          "Test",
          " api"
        ],
        "top_logprobs": [
          null,
          {
            "_": -2.4764898
          }
        ]
      },
      "text": "Test api"
    }
  ],
  "created": 1643832095,
  "id": "cmpl-4XG6pqmi9fArwLH8FHSkxDix3C1Mf",
  "model": "ada:2020-05-03",
  "object": "text_completion"
}


In [35]:
import sys, time
def call_gpt3(prompt, engine, max_tokens_to_generate=0, temperature=0, logprobs_per_token=1, echo=True, stop_token=None, n=None, max_tries=10):
    response = None
    success = False
    
    tries_cnt = 0
    while not success:
        try:
            if tries_cnt >= max_tries:
                print(f"Max tries {max_tries} reached!")
                break
                
            tries_cnt += 1
            response = openai.Completion.create(engine=engine, 
                                                prompt=prompt,
                                                max_tokens=max_tokens_to_generate,
                                                temperature=temperature,
                                                logprobs=logprobs_per_token,
                                                echo=echo,
                                                stop=stop_token,
                                                n=n)
            
            
            success = True
        except openai.error.InvalidRequestError as error: 
            print(f"InvalidRequestError:{error}\nPrompt sent:\n{prompt}\n")
            raise error
        except Exception as error:
            print(f"API error:{error}")
            time.sleep(1)
            
    return response

In [36]:
result = call_gpt3([23,56,78], "ada", max_tries=2)
print(result)

{
  "choices": [
    {
      "finish_reason": "length",
      "index": 0,
      "logprobs": {
        "text_offset": [
          0,
          1,
          2
        ],
        "token_logprobs": [
          null,
          -7.3669047,
          -6.4439845
        ],
        "tokens": [
          "8",
          "Y",
          "o"
        ],
        "top_logprobs": [
          null,
          {
            ".": -2.4928317
          },
          {
            "5": -3.7978659
          }
        ]
      },
      "text": "8Yo"
    }
  ],
  "created": 1622658654,
  "id": "cmpl-36Pvil7L3NGVRLYqni1p90VyUF9oB",
  "model": "ada:2020-05-03",
  "object": "text_completion"
}


In [49]:
#print(result)
import torch
import numpy as np
import json

def openai_result_to_json(result):
    try:
        return json.loads(json.dumps(result))
    except:
        return None
    
def openai_result_to_fairseq_result(result):
    result_tokens = result["choices"][0]["logprobs"]["tokens"]
    result_logprobs = result["choices"][0]["logprobs"]["token_logprobs"]
    if result_logprobs[0] is None and len(result_logprobs)>1:
        result_logprobs[0] = np.mean(result_logprobs[1:])
        
    fairseq_result = {'tokens': result_tokens,
     'score': None, 
     'attention': None, 
     'alignment': None, 
     'positional_scores': torch.tensor(result_logprobs, dtype=torch.float32)}
    fairseq_result["gpt3_response"] = openai_result_to_json(result)
    
    return fairseq_result

print(openai_result_to_fairseq_result(result))

{'tokens': ['8', 'Y', 'o'], 'score': None, 'attention': None, 'alignment': None, 'positional_scores': tensor([-6.9054, -7.3669, -6.4440]), 'gpt3_response': {'id': 'cmpl-36Pvil7L3NGVRLYqni1p90VyUF9oB', 'object': 'text_completion', 'created': 1622658654, 'model': 'ada:2020-05-03', 'choices': [{'text': '8Yo', 'index': 0, 'logprobs': {'tokens': ['8', 'Y', 'o'], 'token_logprobs': [-6.9054446, -7.3669047, -6.4439845], 'top_logprobs': [None, {'.': -2.4928317}, {'5': -3.7978659}], 'text_offset': [0, 1, 2]}, 'finish_reason': 'length'}]}}


In [None]:
a= [{'tokens': []
     'score': 0.6, 
     'attention': None, 
     'alignment': None, 
     'positional_scores': []}]

In [71]:
def get_common_prefix_and_suffix_lengths(tokens_list):
    tokens_list = [np.array(x) for x in tokens_list]
    n = min([len(tokens) for tokens in tokens_list])
    # Compute common prefix length
    prefix_len = n
    first = tokens_list[0][:n]
    for tokens in tokens_list[1:]:
        neq_inds = (first != tokens[:n]).nonzero()[0]
        if len(neq_inds) > 0:
            prefix_len = min(prefix_len, neq_inds[0].item())

    # Compute common suffix length
    suffix_len = n
    first = tokens_list[0][-n:]
    for tokens in tokens_list[1:]:
        neq_inds = (first != tokens[-n:]).nonzero()[0]
        if len(neq_inds) > 0:
            suffix_len = min(suffix_len, n - 1 - neq_inds[-1].item())

    return prefix_len, suffix_len

In [74]:
get_common_prefix_and_suffix_lengths([["a", "b", "c"], ["a", "b", "d", "e"]])

(2, 0)