# Summary

Monitor api usage over time so we can shut things down before things get out of hand if someone figures out a way to start calling the api and using my api key.

In [1]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [51]:
from collections import deque
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
from nltk.tokenize import sent_tokenize
import numpy as np
import os
import pandas as pd
from pathlib import Path

from jabberwocky.config import C
from jabberwocky.openai_utils import load_prompt, load_openai_api_key, GPT, \
    ConversationManager, EngineMap, MockTokenizer
from htools import *

In [3]:
cd_root()

Current directory: /Users/hmamin/jabberwocky


In [156]:
# lib has slightly more up to date version exposing running_cost attr from 
# monitor.
class QueryAllowedResult:
    
    def __init__(self, error: bool, warn: bool, monitor):
        self.error = error
        self.warn = warn
        self.message = self._construct_message(monitor)
        
    def _construct_message(self, monitor):
        if self.error:
            adverb = 'extremely'
            limit = monitor.max_cost
        elif self.warn:
            adverb = 'suspiciously'
            limit = monitor.warn_cost
        else:
            return ''
        queue_str = '\n'.join(
            f"{dt.strftime('%Y/%m/%d %H:%M:%S')}, ${cost:.2f}"
            for dt, cost in monitor.q
        )
        return f'Your recent api usage looks {adverb} high. In the last ' \
               f'{monitor.time_window} seconds, you spent an estimated ' \
               f'${monitor.running_cost:.2f} (> ${limit:.2f}). Here is your ' \
               f'query queue:\n{queue_str}'
        
    def __bool__(self):
        """Returns True if no error was raised (i.e. we are allowed to
        proceed), False otherwise. If there is no error message but there is a
        warning, this will be falsy because we are allowed to proceed. This
        let us do something like:
        
        ```
        allowed = monitor.allowed(query)
        if not allowed:
            raise RuntimeError(allowed.message)
        elif allowed.warn:
            warnings.warn(allowed.message)
        GPT.query(query)
        ```
        """
        return not self.error

Object loaded from /Users/hmamin/jabberwocky/data/misc/sample_response.pkl.
Object loaded from /Users/hmamin/jabberwocky/data/misc/sample_stream_response.pkl.
Object loaded from /Users/hmamin/jabberwocky/data/misc/gooseai_sample_responses.pkl.


In [157]:
class PriceMonitor:
    
    def __init__(self, time_window=125, max_cost=0.75, warn_cost=0.5, 
                 tokenizer=MockTokenizer):
        """
        time_window: int
            Specifies how long to look back when computing total price. 
            Units are seconds. (One probably useless detail: for the default I
            added a few extra seconds instead of an exact multiple of minutes
            on the off chance this might flag a lazy bad actor who uses some
            fixed, human sensical wait time between queries.
        max_cost: float
            The amount (specified in dollars) where, if we spend at least this 
            much in `time_window`, we should assume something fishy is going
            on).
        warn_cost: float
            Similar to max_cost but a lower number. If total cost exceeds this
            within `time_window`, the user should be warned but the query 
            should be allowed to proceed.
            
        Examples
        --------
        # Allow at most $2 in the last 5 minutes.
        
        price_monitor = PriceMonitor(time_window=300, max_price=2.0)
        allowed = monitor.allowed(query, **query_kwargs)
        if not allowed:
            raise RuntimeError(allowed.message)
        elif allowed.warn:
            warnings.warn(allowed.message)
        GPT.query(query)
        """
        # This will store (datetime, price_in_dollars) tuples.
        self.q = deque()
        self.time_window = time_window
        self.max_cost = max_cost
        self.warn_cost = warn_cost or max_cost
        self.tokenizer = tokenizer
        self.running_cost = 0
        if self.warn_cost > self.max_cost:
            raise ValueError('warn_cost must be <= max_cost.')
        
    def allowed(self, prompt, model, max_tokens, dt=None, backend=None, 
                verbose=False):
        """
        Examples
        --------
        # Allow at most $2 in the last 5 minutes.
        
        price_monitor = PriceMonitor(time_window=300, max_price=2.0)
        allowed = monitor.allowed(query, **query_kwargs)
        if not allowed:
            raise RuntimeError(allowed.message)
        elif allowed.warn:
            warnings.warn(allowed.message)
        GPT.query(query)
        """
        dt = dt or datetime.now()
        backend = backend or GPT.current()
        if backend not in EngineMap.paid_backends:
            return True
        
        cost = EngineMap.estimate_cost(
            max_tokens, prompt=prompt, tokenizer=self.tokenizer, 
            engines=[model], return_full=True
        )['full'].loc[lambda x: x.backend == backend, 'cost'].values[0]
        self.q.append((dt, cost))
        self.running_cost += cost
        while (dt - self.q[0][0]).total_seconds() > self.time_window:
            _, cur_cost = self.q.popleft()
            self.running_cost -= cur_cost
        if verbose:
            eprint([(dt.strftime('%H:%M:%S'), cost) for dt, cost in self.q])
            print('Running cost:', self.running_cost)
        error = self.running_cost >= self.max_cost
        warn = not error and (self.running_cost >= self.warn_cost)
        return QueryAllowedResult(error, warn, self)

Object loaded from /Users/hmamin/jabberwocky/data/misc/sample_response.pkl.
Object loaded from /Users/hmamin/jabberwocky/data/misc/sample_stream_response.pkl.
Object loaded from /Users/hmamin/jabberwocky/data/misc/gooseai_sample_responses.pkl.


In [23]:
GPT.switch('banana')

Switching openai backend to "banana".


In [24]:
text_blob = GPT.query('Chapter one of the recently discovered unpublished '
                      'J.K. Rowling novel is printed below. It follow '
                      'Voldemort in the years between Hogwarts and the night '
                      'he murdered Harry\'s parents.',
                      max_tokens=1_000)

{'max_tokens': 1000, 'prompt': "Chapter one of the recently discovered unpublished J.K. Rowling novel is printed below. It follow Voldemort in the years between Hogwarts and the night he murdered Harry's parents.", 'meta': {'backend_name': 'banana', 'query_func': 'query_gpt_banana', 'datetime': 'Mon Jun 27 19:58:29 2022', 'version': None}}


In [32]:
texts = sent_tokenize(text_blob[0][0])
len(texts)

53

In [34]:
conv = ConversationManager()

  'You\'ve chosen to load >=1 custom personas but you are using '


In [36]:
conv.start_conversation('J.K. Rowling')

{'persona': 'jk_rowling',
 'gender': 'F',
 'img_path': '/Users/hmamin/jabberwocky/data/conversation_personas/jk_rowling/profile.jpg',
 'img_url': 'https://upload.wikimedia.org/wikipedia/commons/5/5d/J._K._Rowling_2010.jpg',
 'nationality': 'English'}

Filling in a couple turns to make the prompt lengths more realistic.

In [39]:
_ = conv.query('Let\'s write a story together.')

{'model': 3, 'temperature': 0.7, 'max_tokens': 100, 'frequency_penalty': 0.5, 'stop': ['\n\nMe:', 'This is a conversation with'], 'prompt': "The following is a transcript of a conversation with Jk Rowling. Joanne Rowling (born 31 July 1965), also known by her pen name J. K. Rowling, is a British author and philanthropist. She wrote a seven-volume children's fantasy series, Harry Potter, published from 1997 to 2007.\n\nMe: Let's write a story together.\n\nJk Rowling:", 'meta': {'backend_name': 'banana', 'query_func': 'query_gpt_banana', 'datetime': 'Mon Jun 27 20:03:55 2022', 'version': 0}}




In [43]:
_[0]

['What kind of story?']

In [44]:
print(conv._format_prompt(texts[0]))

The following is a transcript of a conversation with Jk Rowling. Joanne Rowling (born 31 July 1965), also known by her pen name J. K. Rowling, is a British author and philanthropist. She wrote a seven-volume children's fantasy series, Harry Potter, published from 1997 to 2007.

Me: Let's write a story together.

Jk Rowling: What kind of story?

Me: The door of the small, one-room house opened and a young man entered.

Jk Rowling:


In [56]:
now = datetime.now()
queries = []
for text in texts:
    now += timedelta(seconds=np.random.randint(3, 20))
    queries.append((conv._format_prompt(text), now))

In [116]:
# Same length queries but sent in more rapid succession.
now = datetime.now()
queries_suspicious = []
for text in texts:
    now += timedelta(seconds=np.random.randint(1, 4))
    queries_suspicious.append((conv._format_prompt(text), now))

In [57]:
queries[:3]

[("The following is a transcript of a conversation with Jk Rowling. Joanne Rowling (born 31 July 1965), also known by her pen name J. K. Rowling, is a British author and philanthropist. She wrote a seven-volume children's fantasy series, Harry Potter, published from 1997 to 2007.\n\nMe: Let's write a story together.\n\nJk Rowling: What kind of story?\n\nMe: The door of the small, one-room house opened and a young man entered.\n\nJk Rowling:",
  datetime.datetime(2022, 6, 27, 20, 15, 24, 78242)),
 ("The following is a transcript of a conversation with Jk Rowling. Joanne Rowling (born 31 July 1965), also known by her pen name J. K. Rowling, is a British author and philanthropist. She wrote a seven-volume children's fantasy series, Harry Potter, published from 1997 to 2007.\n\nMe: Let's write a story together.\n\nJk Rowling: What kind of story?\n\nMe: He was tall and thin, with pale, freckled skin, and a mop of dark, unruly hair.\n\nJk Rowling:",
  datetime.datetime(2022, 6, 27, 20, 15, 4

In [98]:
state = {
    'model': 3,
    'max_tokens': 100
}

In [150]:
# Turn off verbose mode - just check that this passes.
# Max cost of $0.30/min for testing. Prob use diff values in reality.
monitor = PriceMonitor(time_window=60, max_cost=0.3)
for query, dt in queries:
    # Normally dt and backend should be resolved automatically but here we
    # want to test with a paid version.
    allowed = monitor.allowed(query, model=state['model'], 
                              max_tokens=state['max_tokens'], 
                              dt=dt, backend='openai')
    if not allowed:
        raise RuntimeError(allowed.message)
    elif allowed.warn:
        warnings.warn(allowed.message)
else:
    print('SUCCESS! No suspicious behavior.')

SUCCESS! No suspicious behavior.


In [151]:
# Normal traffic but in verbose mode.
# Max cost of $0.30/min for testing. Prob use diff values in reality.
monitor = PriceMonitor(time_window=60, max_cost=0.3)
for query, dt in queries:
    # Normally dt and backend should be resolved automatically but here we
    # want to test with a paid version.
    allowed = monitor.allowed(query, model=state['model'], 
                              max_tokens=state['max_tokens'], 
                              dt=dt, backend='openai', 
                              verbose=True)
    if not allowed:
        raise RuntimeError(allowed.message)
    elif allowed.warn:
        warnings.warn(allowed.message)
    hr()
else:
    print('SUCCESS! No suspicious behavior.')

 0: ('20:15:24', 0.011879999999999998)
Running cost: 0.011879999999999998

-------------------------------------------------------------------------------

 0: ('20:15:24', 0.011879999999999998)
 1: ('20:15:42', 0.012119999999999999)
Running cost: 0.023999999999999997

-------------------------------------------------------------------------------

 0: ('20:15:24', 0.011879999999999998)
 1: ('20:15:42', 0.012119999999999999)
 2: ('20:15:58', 0.012479999999999998)
Running cost: 0.03648

-------------------------------------------------------------------------------

 0: ('20:15:24', 0.011879999999999998)
 1: ('20:15:42', 0.012119999999999999)
 2: ('20:15:58', 0.012479999999999998)
 3: ('20:16:17', 0.012839999999999999)
Running cost: 0.049319999999999996

-------------------------------------------------------------------------------

 0: ('20:15:42', 0.012119999999999999)
 1: ('20:15:58', 0.012479999999999998)
 2: ('20:16:17', 0.012839999999999999)
 3: ('20:16:34', 0.011999999999999999)

In [152]:
# Lots of long queries.
# Max cost of $0.30/min for testing. Prob use diff values in reality.
monitor = PriceMonitor(time_window=60, max_cost=.3)
for query, dt in queries:
    # Normally dt and backend should be resolved automatically but here we
    # want to test with a paid version.
    allowed = monitor.allowed(
        query, model=state['model'], 
        max_tokens=state['max_tokens'] + np.random.randint(100, 800), 
        dt=dt, backend='openai',
        verbose=True
    )
    if not allowed:
        raise RuntimeError(allowed.message)
    elif allowed.warn:
        warnings.warn(allowed.message)
    hr()
print('SUCCESS! No suspicious behavior.')

 0: ('20:15:24', 0.05334)
Running cost: 0.05334

-------------------------------------------------------------------------------

 0: ('20:15:24', 0.05334)
 1: ('20:15:42', 0.052079999999999994)
Running cost: 0.10541999999999999

-------------------------------------------------------------------------------

 0: ('20:15:24', 0.05334)
 1: ('20:15:42', 0.052079999999999994)
 2: ('20:15:58', 0.02634)
Running cost: 0.13176

-------------------------------------------------------------------------------

 0: ('20:15:24', 0.05334)
 1: ('20:15:42', 0.052079999999999994)
 2: ('20:15:58', 0.02634)
 3: ('20:16:17', 0.04602)
Running cost: 0.17778

-------------------------------------------------------------------------------

 0: ('20:15:42', 0.052079999999999994)
 1: ('20:15:58', 0.02634)
 2: ('20:16:17', 0.04602)
 3: ('20:16:34', 0.057839999999999996)
Running cost: 0.18228

-------------------------------------------------------------------------------

 0: ('20:15:58', 0.02634)
 1: ('20:16:1

RuntimeError: Your recent api usage looks extremely high. In the last 60 seconds, you spent an estimated $0.31. Here is your query queue:
2022/06/27 20:19:28, $0.04
2022/06/27 20:19:34, $0.05
2022/06/27 20:19:38, $0.04
2022/06/27 20:19:48, $0.03
2022/06/27 20:20:00, $0.05
2022/06/27 20:20:09, $0.02
2022/06/27 20:20:23, $0.03
2022/06/27 20:20:27, $0.04

In [154]:
# Long queries but with more lax price limits.
# Max cost of $0.30/min for testing. Prob use diff values in reality.
monitor = PriceMonitor(time_window=60, max_cost=.5, warn_cost=.35)
for query, dt in queries:
    # Normally dt and backend should be resolved automatically but here we
    # want to test with a paid version.
    allowed = monitor.allowed(
        query, model=state['model'], 
        max_tokens=state['max_tokens'] + np.random.randint(100, 800), 
        dt=dt, backend='openai',
        verbose=True
    )
    if not allowed:
        raise RuntimeError(allowed.message)
    elif allowed.warn:
        warnings.warn(allowed.message)
    hr()
print('SUCCESS! No suspicious behavior.')

 0: ('20:15:24', 0.05112)
Running cost: 0.05112

-------------------------------------------------------------------------------

 0: ('20:15:24', 0.05112)
 1: ('20:15:42', 0.0294)
Running cost: 0.08052

-------------------------------------------------------------------------------

 0: ('20:15:24', 0.05112)
 1: ('20:15:42', 0.0294)
 2: ('20:15:58', 0.059399999999999994)
Running cost: 0.13992

-------------------------------------------------------------------------------

 0: ('20:15:24', 0.05112)
 1: ('20:15:42', 0.0294)
 2: ('20:15:58', 0.059399999999999994)
 3: ('20:16:17', 0.021419999999999998)
Running cost: 0.16133999999999998

-------------------------------------------------------------------------------

 0: ('20:15:42', 0.0294)
 1: ('20:15:58', 0.059399999999999994)
 2: ('20:16:17', 0.021419999999999998)
 3: ('20:16:34', 0.03582)
Running cost: 0.14603999999999998

-------------------------------------------------------------------------------

 0: ('20:15:58', 0.059399999999

2022/06/27 20:18:44, $0.06
2022/06/27 20:18:47, $0.04
2022/06/27 20:18:50, $0.04
2022/06/27 20:18:54, $0.03
2022/06/27 20:19:11, $0.04
2022/06/27 20:19:28, $0.04
2022/06/27 20:19:34, $0.04
2022/06/27 20:19:38, $0.05
  app.launch_new_instance()


In [153]:
# High frequency queries.
# Max cost of $0.30/min for testing. Prob use diff values in reality.
monitor = PriceMonitor(time_window=60, max_cost=1, warn_cost=.3)
for query, dt in queries_suspicious:
    # Normally dt and backend should be resolved automatically but here we
    # want to test with a paid version.
    allowed = monitor.allowed(
        query, model=state['model'], 
        max_tokens=state['max_tokens'] + np.random.randint(100, 800), 
        dt=dt, backend='openai',
        verbose=True
    )
    if not allowed:
        raise RuntimeError(allowed.message)
    elif allowed.warn:
        warnings.warn(allowed.message)
    hr()
print('SUCCESS! No suspicious behavior.')

 0: ('19:38:41', 0.05399999999999999)
Running cost: 0.05399999999999999

-------------------------------------------------------------------------------

 0: ('19:38:41', 0.05399999999999999)
 1: ('19:38:44', 0.058019999999999995)
Running cost: 0.11201999999999998

-------------------------------------------------------------------------------

 0: ('19:38:41', 0.05399999999999999)
 1: ('19:38:44', 0.058019999999999995)
 2: ('19:38:45', 0.02046)
Running cost: 0.13248

-------------------------------------------------------------------------------

 0: ('19:38:41', 0.05399999999999999)
 1: ('19:38:44', 0.058019999999999995)
 2: ('19:38:45', 0.02046)
 3: ('19:38:46', 0.0411)
Running cost: 0.17357999999999998

-------------------------------------------------------------------------------

 0: ('19:38:41', 0.05399999999999999)
 1: ('19:38:44', 0.058019999999999995)
 2: ('19:38:45', 0.02046)
 3: ('19:38:46', 0.0411)
 4: ('19:38:47', 0.0213)
Running cost: 0.19488

--------------------------

2022/06/28 19:38:41, $0.05
2022/06/28 19:38:44, $0.06
2022/06/28 19:38:45, $0.02
2022/06/28 19:38:46, $0.04
2022/06/28 19:38:47, $0.02
2022/06/28 19:38:50, $0.02
2022/06/28 19:38:52, $0.04
2022/06/28 19:38:55, $0.03
2022/06/28 19:38:58, $0.05
  app.launch_new_instance()
2022/06/28 19:38:41, $0.05
2022/06/28 19:38:44, $0.06
2022/06/28 19:38:45, $0.02
2022/06/28 19:38:46, $0.04
2022/06/28 19:38:47, $0.02
2022/06/28 19:38:50, $0.02
2022/06/28 19:38:52, $0.04
2022/06/28 19:38:55, $0.03
2022/06/28 19:38:58, $0.05
2022/06/28 19:38:59, $0.03
  app.launch_new_instance()
2022/06/28 19:38:41, $0.05
2022/06/28 19:38:44, $0.06
2022/06/28 19:38:45, $0.02
2022/06/28 19:38:46, $0.04
2022/06/28 19:38:47, $0.02
2022/06/28 19:38:50, $0.02
2022/06/28 19:38:52, $0.04
2022/06/28 19:38:55, $0.03
2022/06/28 19:38:58, $0.05
2022/06/28 19:38:59, $0.03
2022/06/28 19:39:00, $0.05
  app.launch_new_instance()
2022/06/28 19:38:41, $0.05
2022/06/28 19:38:44, $0.06
2022/06/28 19:38:45, $0.02
2022/06/28 19:38:46, $0.0

RuntimeError: Your recent api usage looks extremely high. In the last 60 seconds, you spent an estimated $1.02. Here is your query queue:
2022/06/28 19:38:41, $0.05
2022/06/28 19:38:44, $0.06
2022/06/28 19:38:45, $0.02
2022/06/28 19:38:46, $0.04
2022/06/28 19:38:47, $0.02
2022/06/28 19:38:50, $0.02
2022/06/28 19:38:52, $0.04
2022/06/28 19:38:55, $0.03
2022/06/28 19:38:58, $0.05
2022/06/28 19:38:59, $0.03
2022/06/28 19:39:00, $0.05
2022/06/28 19:39:02, $0.03
2022/06/28 19:39:04, $0.04
2022/06/28 19:39:06, $0.02
2022/06/28 19:39:07, $0.02
2022/06/28 19:39:08, $0.02
2022/06/28 19:39:09, $0.04
2022/06/28 19:39:11, $0.02
2022/06/28 19:39:13, $0.04
2022/06/28 19:39:15, $0.03
2022/06/28 19:39:17, $0.05
2022/06/28 19:39:18, $0.03
2022/06/28 19:39:19, $0.05
2022/06/28 19:39:22, $0.04
2022/06/28 19:39:25, $0.04
2022/06/28 19:39:28, $0.05
2022/06/28 19:39:31, $0.05
2022/06/28 19:39:34, $0.02

In [106]:
EngineMap.estimate_cost(100, prompt_length=500, engines=[3])

{'backend': 'gooseai',
 'engine': 'gpt-neo-20b',
 'cost_cents': 0.007375,
 'full':    backend       engine  cost_cents
 0  gooseai  gpt-neo-20b    0.007375
 1   openai      davinci    0.036000}

- On each reply call, get state['max_tokens'], pass to enginemap.estimatecost, and incr some hourly total.
- If price is above some pre-specified threshold, email me and shutdown the app?
- Maintain a queue of (time, price) tuples.
- When a new reply is called, we add a new tuple to the queue.
- Then we pop from the front of the queue until the first time is within some margin of the new time.
- Each time we pop, we subtract from a running total.
- If we reach the end and our total price for the given timeframe is too high, we raise an alert.