# Summary

Experiment with ways to support passing a list of prompts to query method. Some backends don't support this natively, others do, but none automatically would return the format I want.

In [1]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [185]:
from collections import defaultdict, deque
import matplotlib.pyplot as plt
import numpy as np
import openai
import os
import pandas as pd
from pathlib import Path
from threading import Thread

from jabberwocky.config import C
from jabberwocky.openai_utils import load_prompt, load_openai_api_key, \
    GPTBackend
from htools import *

In [3]:
cd_root()

Current directory: /Users/hmamin/jabberwocky


## Option 1: make thread that returns value so we can run a separate query for each thread

In [4]:
class ReturningThread(Thread):

    @add_docstring(Thread)
    def __init__(self, group=None, target=None, name=None,
                 args=(), kwargs=None, *, daemon=None):
        """This is identical to a regular thread except that the join method
        returns the value returned by your target function. The
        Thread.__init__ docstring is shown below for the sake of convenience.
        """
        super().__init__(group=group, target=target, name=name,
                         args=args, kwargs=kwargs, daemon=daemon)
        self.result = None

    def run(self):
        self.result = self._target(*self._args, **self._kwargs)

    def join(self, timeout=None):
        super().join(timeout)
        return self.result

In [5]:
def foo(x, wait=2):
    time.sleep(wait)
    return x

In [14]:
# Returns values but is slow (sync execution).
res = [foo(i) for i in range(5)]
res

[0, 1, 2, 3, 4]

In [12]:
threads = [Thread(target=foo, args=(i,)) for i in range(5)]
for thread in threads:
    thread.start()

# Regular thread returns None.
res = [thread.join() for thread in threads]
res

[None, None, None, None, None]

In [18]:
threads = [ReturningThread(target=foo, args=(i,)) for i in range(5)]
for thread in threads:
    thread.start()

# ReturningThread returns values!
res = [thread.join() for thread in threads]
res

[0, 1, 2, 3, 4]

## Try integrating into GPTBackend

In [8]:
# TODO: no guarantees these threads return in the right order, though, right?

In [9]:
gpt = GPTBackend()
gpt.ls()


Base: https://api.openai.com
Key: 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
Query func: <function query_gpt3 at 0x1237e7378>


In [10]:
gpt.switch('huggingface')
gpt.ls()

Switching openai backend to "huggingface".

Base: https://api.openai.com
Key: VMDTuFkyIsfUlSqJOCoQPslfbhNOIYroqF
Query func: <function query_gpt_huggingface at 0x1237e7268>


In [11]:
prompts = [
    'Six million years after the pandemic,',
    'The stegosaurus'
]
kwargs = {'max_tokens': 10}

In [28]:
threads = [ReturningThread(target=gpt.query, args=(prompt,), kwargs=kwargs) 
           for prompt in prompts]
for thread in threads:
    thread.start()
res = [thread.join() for thread in threads]

In [29]:
res

[('the world is still in the grip of a global',
  {'generated_text': ' the world is still in the grip of a global'}),
 ('is a large, large, and highly intelligent animal',
  {'generated_text': ' is a large, large, and highly intelligent animal'})]

In [32]:
threads = [ReturningThread(target=gpt.query,
                           args=(prompt,), 
                           kwargs={**kwargs, 'n': 3, 
                                   'logprobs': 4, 'engine_i': 1}) 
           for prompt in prompts]
for thread in threads:
    thread.start()
res = [thread.join() for thread in threads]

In [34]:
lmap(len, *res)

[2, 2]

In [36]:
res[0][0]

['the population of New York City is poised to rise',
 'the pandemic strain of influenza spreads and mutates',
 'our species is still struggling to deal with the effects']

In [37]:
res[0][1]

[{'generated_text': ' the population of New York City is poised to rise'},
 {'generated_text': ' the pandemic strain of influenza spreads and mutates'},
 {'generated_text': ' our species is still struggling to deal with the effects'}]

In [38]:
res[1][0]

['is one of the more remarkable prehistoric dinosaurs, and',
 ', or giant pterosaur from the late',
 'fossil, or dinosaur\nFossil bones of']

In [39]:
res[1][1]

[{'generated_text': ' is one of the more remarkable prehistoric dinosaurs, and'},
 {'generated_text': ', or giant pterosaur from the late'},
 {'generated_text': ' fossil, or dinosaur\nFossil bones of'}]

In [40]:
gpt.switch('gooseai')
gpt.ls()

Switching openai backend to "gooseai".

Base: https://api.goose.ai/v1
Key: sk-QtTMOXuKKuewX8khBHcoCGhzge9GvOpLxdHCmOjpCqCGNVD4
Query func: <function query_gpt3 at 0x125238bf8>


In [41]:
threads = [ReturningThread(target=gpt.query,
                           args=(prompt,), 
                           kwargs={'max_tokens': 8, 'n': 2, 
                                   'logprobs': 5, 'engine_i': 0}) 
           for prompt in prompts]
for thread in threads:
    thread.start()
res = [thread.join() for thread in threads]

In [50]:
len(res)

2

In [43]:
res[0][0]

['the world is still in the grip of',
 'scientists still do not know whether humans are']

In [44]:
res[1][0]

['is a fossilized dinosaur named by the',
 'is a famous carnivorous dinosaur from the']

In [52]:
len(res[0])#[1][1]

2

In [75]:
texts, resps = list(zip(*res))
texts

(['the world is still in the grip of',
  'scientists still do not know whether humans are'],
 ['is a fossilized dinosaur named by the',
  'is a famous carnivorous dinosaur from the'])

In [80]:
# resps[i][j] corresponds to prompt i, completion j.
[completion['logprobs'].tokens for completion in resps[0]]

[[' the', ' world', ' is', ' still', ' in', ' the', ' grip', ' of'],
 [' scientists',
  ' still',
  ' do',
  ' not',
  ' know',
  ' whether',
  ' humans',
  ' are']]

In [81]:
[completion['logprobs'].tokens for completion in resps[1]]

[[' is', ' a', ' fossil', 'ized', ' dinosaur', ' named', ' by', ' the'],
 [' is', ' a', ' famous', ' carniv', 'orous', ' dinosaur', ' from', ' the']]

In [83]:
gpt.ls()


Base: https://api.goose.ai/v1
Key: sk-QtTMOXuKKuewX8khBHcoCGhzge9GvOpLxdHCmOjpCqCGNVD4
Query func: <function query_gpt3 at 0x125238bf8>


In [88]:
threads2 = [ReturningThread(target=gpt.query,
                           args=(prompt,), 
                           kwargs={'max_tokens': 8, 'n': 1, 
                                   'logprobs': 5, 'engine_i': 0}) 
            for prompt in prompts]
for thread in threads2:
    thread.start()
res2 = [thread.join() for thread in threads2]

In [92]:
texts2, resps2 = list(zip(*res2))

In [93]:
texts2

('the future of the world’s', 'The stegosaurus (Ste')

In [95]:
# Because only 1 completion per prompt, resps is a dict instead of a list of 
# dicts.
resps2[0]

{'finish_reason': 'length',
 'index': 0,
 'logprobs': <OpenAIObject at 0x125e93d58> JSON: {
   "text_offset": [
     0,
     4,
     11,
     14,
     18,
     24,
     24,
     25
   ],
   "token_logprobs": [
     -1.7412109375,
     -5.03515625,
     -0.361083984375,
     -1.7802734375,
     -1.8515625,
     -1.89453125,
     -0.0006651878356933594,
     -0.00013065338134765625
   ],
   "tokens": [
     " the",
     " future",
     " of",
     " the",
     " world",
     "\ufffd",
     "\ufffd",
     "s"
   ],
   "top_logprobs": [
     {
       " a": -2.90234375,
       " it": -3.947265625,
       " scientists": -4.09375,
       " the": -1.7412109375,
       " we": -2.748046875
     },
     {
       " city": -4.51171875,
       " human": -4.06640625,
       " pand": -4.2109375,
       " virus": -2.9921875,
       " world": -1.6845703125
     },
     {
       " is": -2.072265625,
       " looks": -3.4296875,
       " of": -0.361083984375,
       " remains": -3.44921875,
       " still

In [89]:
with gpt('huggingface'):
    hf_res = gpt.query('I want', engine_i=1, max_tokens=5, n=2)

Switching openai backend to "huggingface".
Switching  backend back to "huggingface".


In [167]:
Results(text=hf_res[0], full=hf_res[1])

Results(text=['to give you one big', 'to show you some pictures'], full=[{'generated_text': ' to give you one big'}, {'generated_text': ' to show you some pictures'}])

In [None]:
# Better interface
# texts, full_resps = gpt.query([p1, p2, p3], n=2)

## Test streaming mode

Need a better understanding of what using streaming mode is like before I decide about streaming interface for np or nc > 1.

In [21]:
from base64 import b64encode

from jabberwocky.openai_utils import query_gpt3, query_gpt_huggingface, \
    query_gpt_banana, query_gpt_j, query_gpt_repeat

In [70]:
# Was toying with idea of adding this to gpt.query warnings to make the 
# messages unique, in the hope that this would ensure they're always shown
# rather than just once. But a. I'm not sure if that's how they define 
# duplicates, and b. I'm seeing code defined in nb seems to always show 
# warnings, not just once, so I'm not sure what to make of that. Still 
# eventually want to write a func like this (maybe moreso for creating new
# file paths when encountering collisions) but that should have a more limited
# set of possible characters.
def random_str(length, lower=True):
    rand = b64encode(os.urandom(length)).decode()[:length]
    return rand.lower() if lower else rand

In [55]:
for i in range(15):
    rand = random_str(i)
    print(i, rand)

0 
1 3
2 l6
3 oym
4 jo/0
5 xqztr
6 4mtzog
7 ge5korg
8 660d5aiz
9 fcdkd1iyz
10 z4t+jhlyb5
11 shkuswnafmu
12 nqgpfuek1/r1
13 hwn6vlcf1v54n
14 bptgfd0uds2s13


In [20]:
os.urandom(10)

b'P\xe3\x87#\x99i\xc4\x88d\x92'

In [12]:
with gpt('repeat'):
    repeat_res = gpt.query('I want', max_tokens=5, stream=True)

Switching openai backend to "repeat".
Switching  backend back to "huggingface".


  'Streaming mode does not support manual truncation of '


In [13]:
with gpt('repeat'):
    repeat_res = gpt.query('I want', max_tokens=5, stream=True)

Switching openai backend to "repeat".
Switching  backend back to "huggingface".


In [114]:
repeat_res

('I want', {})

In [133]:
with gpt('banana'):
    banana_res = gpt.query('I want', max_tokens=5, stream=True)

Switching openai backend to "banana".


  
  params = {


Switching  backend back to "gooseai".


In [141]:
[row.choices[0].keys() for row in load(C.mock_stream_paths[True])]

Object loaded from /Users/hmamin/jabberwocky/data/misc/sample_stream_response.pkl.


[dict_keys(['text', 'index', 'logprobs', 'finish_reason']),
 dict_keys(['text', 'index', 'logprobs', 'finish_reason']),
 dict_keys(['text', 'index', 'logprobs', 'finish_reason']),
 dict_keys(['text', 'index', 'logprobs', 'finish_reason']),
 dict_keys(['text', 'index', 'logprobs', 'finish_reason'])]

In [None]:
with gpt('gooseai'):
    query_gpt

## Experimenting with streaming text AND dict

In [71]:
from itertools import cycle

In [72]:
def stream_words(text):
    """Like stream_chars but splits on spaces. Realized stream_chars was a bad
    idea because we risk giving SPEAKER turns like
    "This is over. W" and "hat are you doing next?", neither of which would be
    pronounced as intended. We yield with a space for consistency with the
    other streaming interfaces which require no further postprocessing.
    """
    for word in text.split(' '):
        yield word + ' '

In [85]:
def stream_response(text:str, full:dict):
    yield from zip(stream_words(text), cycle([full]))

In [247]:
# Note: this is probably massively over-engineered for mock streaming, but 
# I'll need to do something like this if I want to support real streaming 
# where nc and/or np > 1 so it was probably useful to work through this logic
# anyway.
def stream_multi_response(texts:list, fulls:list):
    for i, (text, full) in enumerate(zip(texts, fulls)):
        queue = deque()
        gen = stream_response(text, 
                              {**full, 'index': i, 'finish_reason': None})
        done = False
        # Yield items while checking if we're at the last item so we can mark
        # it with a finish_reason. This lets us know when one completion ends.
        while True:
            try:
                tok, tok_full = next(gen)
                queue.append((tok, tok_full))
            except StopIteration:
                done = True
            
            while len(queue) > 1:
                tok, tok_full = queue.popleft()
                yield tok, tok_full
            if done: break
        tok, tok_full = queue.popleft()
        tok_full['finish_reason'] = 'dummy'    
        yield tok, tok_full

In [179]:
txt = 'Santa is coming to town.'
for tok in stream_words(txt):
    print(repr(tok))

'Santa '
'is '
'coming '
'to '
'town. '


In [87]:
for tok, full in stream_response(txt, {}):
    print(repr(tok), full)

'Santa ' {}
'is ' {}
'coming ' {}
'to ' {}
'town. ' {}


In [89]:
# np > 1
with gpt('huggingface'):
    hf_res = gpt.query('I want', engine_i=1, max_tokens=5, n=2)

Switching openai backend to "huggingface".
Switching  backend back to "huggingface".


In [205]:
hf_res

(['to give you one big', 'to show you some pictures'],
 [{'generated_text': ' to give you one big'},
  {'generated_text': ' to show you some pictures'}])

In [245]:
for tok, full in stream_multi_response(*hf_res):
    print('>>> ', tok, full)

>>>  to  {'generated_text': ' to give you one big', 'index': 0, 'finish_reason': None}
>>>  give  {'generated_text': ' to give you one big', 'index': 0, 'finish_reason': None}
>>>  you  {'generated_text': ' to give you one big', 'index': 0, 'finish_reason': None}
>>>  one  {'generated_text': ' to give you one big', 'index': 0, 'finish_reason': None}
>>>  big  {'generated_text': ' to give you one big', 'index': 0, 'finish_reason': 'dummy'}
>>>  to  {'generated_text': ' to show you some pictures', 'index': 1, 'finish_reason': None}
>>>  show  {'generated_text': ' to show you some pictures', 'index': 1, 'finish_reason': None}
>>>  you  {'generated_text': ' to show you some pictures', 'index': 1, 'finish_reason': None}
>>>  some  {'generated_text': ' to show you some pictures', 'index': 1, 'finish_reason': None}
>>>  pictures  {'generated_text': ' to show you some pictures', 'index': 1, 'finish_reason': 'dummy'}


In [100]:
# np > 1, stream=True
with gpt('gooseai'):
    goose_res = openai.Completion.create(
        prompt=txt,
        engine=GPTBackend.engine(0),
        max_tokens=5,
        logprobs=3,
        n=2,
        stream=True
    )

Switching openai backend to "gooseai".
Switching  backend back to "huggingface".


In [116]:
# np > 1, stream=True
with gpt('openai'):
    open_res = openai.Completion.create(
        prompt=txt,
        engine=GPTBackend.engine(0),
        max_tokens=5,
        logprobs=3,
        n=2,
        stream=True
    )

Switching openai backend to "openai".
Switching  backend back to "huggingface".


In [102]:
_goose_res = []
for obj in goose_res:
    print(obj)
    _goose_res.append(obj)
    print(spacer())

{
  "choices": [
    {
      "finish_reason": null,
      "index": 0,
      "logprobs": {
        "text_offset": [
          0
        ],
        "token_logprobs": [
          -3.80078125
        ],
        "tokens": [
          " In"
        ],
        "top_logprobs": [
          {
            " And": -2.826171875,
            " The": -3.03125,
            "bytes:'\\n'": -1.30859375
          }
        ]
      },
      "text": " In",
      "token_index": 0
    }
  ],
  "created": 1649026112,
  "id": "01184be1-0c83-4906-a88c-9b71165a0da0",
  "model": "gpt-neo-2-7b",
  "object": "text_completion"
}

-------------------------------------------------------------------------------

{
  "choices": [
    {
      "finish_reason": null,
      "index": 0,
      "logprobs": {
        "text_offset": [
          3
        ],
        "token_logprobs": [
          -5.8125
        ],
        "tokens": [
          " mid"
        ],
        "top_logprobs": [
          {
            " a": -2.478515625,


In [117]:
_open_res = []
for obj in open_res:
    print(obj)
    _open_res.append(obj)
    print(spacer())

{
  "choices": [
    {
      "finish_reason": null,
      "index": 0,
      "logprobs": {
        "text_offset": [
          24
        ],
        "token_logprobs": [
          -8.621929
        ],
        "tokens": [
          " Feeling"
        ],
        "top_logprobs": [
          {
            "\n": -2.4186804,
            " I": -2.6238666,
            " She": -2.4244554
          }
        ]
      },
      "text": " Feeling"
    }
  ],
  "created": 1649026406,
  "id": "cmpl-4t3O2F9Xf3GFJFEVA8q86pCQpTCaD",
  "model": "ada:2020-05-03",
  "object": "text_completion"
}

-------------------------------------------------------------------------------

{
  "choices": [
    {
      "finish_reason": null,
      "index": 0,
      "logprobs": {
        "text_offset": [
          32
        ],
        "token_logprobs": [
          -5.186215
        ],
        "tokens": [
          " her"
        ],
        "top_logprobs": [
          {
            " a": -2.1202018,
            " like": -2.64

In [118]:
len(_goose_res), len(_open_res)

(10, 10)

In [115]:
[(row.choices[0].logprobs.tokens, row.choices[0].finish_reason) 
 for row in _goose_res]

[([' In'], None),
 ([' mid'], None),
 (['-'], None),
 (['January'], None),
 ([','], 'length'),
 ([' There'], None),
 ([' is'], None),
 ([' not'], None),
 ([' a'], None),
 ([' Santa'], 'length')]

In [119]:
[(row.choices[0].logprobs.tokens, row.choices[0].finish_reason) 
 for row in _open_res]

[([' Feeling'], None),
 ([' her'], None),
 ([' I'], None),
 (["'m"], None),
 ([' sure'], None),
 ([' presence'], None),
 ([' you'], None),
 ([','], None),
 ([' Des'], 'length'),
 ([' can'], 'length')]

In [120]:
[row.choices[0].text for row in _open_res]

[' Feeling',
 ' her',
 ' I',
 "'m",
 ' sure',
 ' presence',
 ' you',
 ',',
 ' Des',
 ' can']

In [151]:
# Thought we might be able to use id to reconstruct each completion but that
# doesn't work.
[row['id'] for row in _open_res]

['cmpl-4t3O2F9Xf3GFJFEVA8q86pCQpTCaD',
 'cmpl-4t3O2F9Xf3GFJFEVA8q86pCQpTCaD',
 'cmpl-4t3O2F9Xf3GFJFEVA8q86pCQpTCaD',
 'cmpl-4t3O2F9Xf3GFJFEVA8q86pCQpTCaD',
 'cmpl-4t3O2F9Xf3GFJFEVA8q86pCQpTCaD',
 'cmpl-4t3O2F9Xf3GFJFEVA8q86pCQpTCaD',
 'cmpl-4t3O2F9Xf3GFJFEVA8q86pCQpTCaD',
 'cmpl-4t3O2F9Xf3GFJFEVA8q86pCQpTCaD',
 'cmpl-4t3O2F9Xf3GFJFEVA8q86pCQpTCaD',
 'cmpl-4t3O2F9Xf3GFJFEVA8q86pCQpTCaD']

In [159]:
# index points to which completion each new token belongs to.
completions = defaultdict(list)
for row in _open_res:
    completions[row['choices'][0]['index']].append(row['choices'][0].text)

In [160]:
completions

defaultdict(list,
            {0: [' Feeling', ' her', ' presence', ',', ' Des'],
             1: [' I', "'m", ' sure', ' you', ' can']})

In [163]:
# index points to which completion each new token belongs to.
# completions = defaultdict(list)
for row in _goose_res:
    print(row['choices'][0]['index'], row['choices'][0]['finish_reason'])

0 None
0 None
0 None
0 None
0 length
1 None
1 None
1 None
1 None
1 length


In [164]:
# index points to which completion each new token belongs to.
# completions = defaultdict(list)
for row in _open_res:
    print(row['choices'][0]['index'], row['choices'][0]['finish_reason'])

0 None
0 None
1 None
1 None
1 None
0 None
1 None
0 None
0 length
1 length


In [165]:
with gpt('repeat'):
    print('stream=False\n', gpt.query(txt))
    print('\nstream=True')
    for tok, full in gpt.query(txt, stream=True):
        print(repr(tok), full)

Object loaded from /Users/hmamin/jabberwocky/data/misc/sample_response.pkl.
Object loaded from /Users/hmamin/jabberwocky/data/misc/sample_stream_response.pkl.
Switching openai backend to "repeat".
stream=False
 ('Santa is coming to town.', {})

stream=True
'Santa ' {}
'is ' {}
'coming ' {}
'to ' {}
'town. ' {}
Switching  backend back to "huggingface".


  'Streaming mode does not support manual truncation of '


In [166]:
with gpt('repeat'):
    print('stream=False\n', gpt.query(txt, n=3))
#     print('\nstream=True')
#     for tok, full in gpt.query(txt, stream=True):
#         print(repr(tok), full)

Switching openai backend to "repeat".
stream=False
 (['Santa is coming to town.', 'Santa is coming to town.', 'Santa is coming to town.'], [{}, {}, {}])
Switching  backend back to "huggingface".


In [145]:
with gpt('huggingface'):
    tmp = gpt.query(txt, max_tokens=5)
    print('stream=False\n', tmp)
    print('\nstream=True')
    for tok, full in gpt.query(txt, stream=True, max_tokens=5):
        print(repr(tok), full)

Object loaded from /Users/hmamin/jabberwocky/data/misc/sample_response.pkl.
Object loaded from /Users/hmamin/jabberwocky/data/misc/sample_stream_response.pkl.
Switching openai backend to "huggingface".
stream=False
 ('The city is', {'generated_text': '\n\nThe city is'})

stream=True


  'Streaming mode does not support manual truncation of '


'\n\nThe ' {'generated_text': '\n\nThe city is'}
'city ' {'generated_text': '\n\nThe city is'}
'is ' {'generated_text': '\n\nThe city is'}
Switching  backend back to "huggingface".


In [142]:
for tok, full in stream_response(tmp[0], tmp[1]):
    print(full, tok)

{'generated_text': '\n\nThe city is'} The 
{'generated_text': '\n\nThe city is'} city 
{'generated_text': '\n\nThe city is'} is 


In [139]:
for row in stream_response(*tmp):
    print(row)

('The ', {'generated_text': '\n\nThe city is'})
('city ', {'generated_text': '\n\nThe city is'})
('is ', {'generated_text': '\n\nThe city is'})


In [171]:
hf_res

Object loaded from /Users/hmamin/jabberwocky/data/misc/sample_response.pkl.
Object loaded from /Users/hmamin/jabberwocky/data/misc/sample_stream_response.pkl.


(['to give you one big', 'to show you some pictures'],
 [{'generated_text': ' to give you one big'},
  {'generated_text': ' to show you some pictures'}])

In [170]:
for row in stream_response(*hf_res):
    print(row)

AttributeError: 'list' object has no attribute 'split'