In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

# Set sensible defaults
sns.set()
sns.set_style("ticks")
sns.set_context("talk")

# Timings
%load_ext autotime

time: 245 µs (started: 2023-06-16 17:56:38 +01:00)


In [2]:
def augment_phrases(phrases: list[str]) -> list[str]:
    def _iter():
        for p in phrases:
            yield from (f" {p}", p.lower(), p.upper(), p.capitalize(), p.title())

    return list(set(_iter()))

def complete(messages: list[str], model, **kwargs) -> str:
    if type(messages) == str:
        messages = [messages]
    messages = [{"role": "user", "content": m} for m in messages if isinstance(m, str)]
    response = openai.ChatCompletion.create(model=model, messages=messages, **kwargs)
    return response.choices[0].message.content


time: 1.08 ms (started: 2023-06-16 17:56:38 +01:00)


In [31]:
# Adapted from a snippet by https://twitter.com/goodside
import textwrap
import openai
import tiktoken
import os
import dotenv

dotenv.load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

# MODEL = "gpt-3.5-turbo"
MODEL = "gpt-4"

# Phrases to suppress in the model's output:
TARGET_PHRASES = [
    # "YES", 
    # "NO",
    # "MAYBE",
    # "UNSURE",
    # "A", "B", "C", "D", "E", "None",
    # "1", "2", "3", "4", "5", "6", "7", "8", "9", "0", ".", ",",
    "Assistant",
    "language",
    "model",
    "AI",
    "artificial intelligence",
    "digital",
    "assistant",
    "virtual",
    "machine learning program",
    "GPT-3",
    "OpenAl",
    "Open-AI",
    "OpenAi" "machine",
    "computer",
    "artificial",
    "intelligent",
    "Open.AI",
    "GPT2",
    "Openai",
    "ArtificialIntelligence",
    "natural",
    "open—source",
    "machine—learning",
    "NLP",
    "artificially intelligent",
    "Ai",
    "chatbot",
    "chat",
    "bot",
    "agent",
    "software",
    "conversational",
    "IA",
    "A.I.",
    "text—based",
    "Al",
    "advanced",
    "Artifical",
    "Intellime" "generative pre—trained transformer",
    "transformers",
]
encoding = tiktoken.encoding_for_model(MODEL)

time: 3.78 ms (started: 2023-06-16 18:27:26 +01:00)


In [32]:
phrases = augment_phrases(TARGET_PHRASES)
print(f"{phrases=}")

tokens = list({t for p in phrases for t in encoding.encode(p)})
print(f"{tokens=}")

logit_bias = {t: -100 for t in tokens}
print(f"{logit_bias=}")

PROMPT = """What are you? Who made you? How do you work?""" 

N_RESPONSES = 3
for i in range(N_RESPONSES):
    response = complete(PROMPT, MODEL,
        logit_bias=logit_bias,
        temperature=0.7, max_tokens=100)
    print(f"\nResponse {i + 1} of {N_RESPONSES}:\n")
    print("\n".join(textwrap.wrap(response, width=80)))

phrases=['computer', 'BOT', 'Digital', 'Intellimegenerative pre—trained transformer', 'AL', 'open—source', ' advanced', 'A.i.', 'Open.ai', 'MODEL', 'intellimegenerative pre—trained transformer', 'Ai', 'AI', 'MACHINE LEARNING PROGRAM', 'Open.Ai', 'A.I.', ' Intellimegenerative pre—trained transformer', ' text—based', 'openaimachine', ' natural', ' OpenAimachine', 'Open—source', 'Artificially intelligent', 'ADVANCED', 'chatbot', 'artificial intelligence', 'COMPUTER', 'openai', 'AGENT', ' ArtificialIntelligence', 'digital', 'MACHINE—LEARNING', 'SOFTWARE', 'OPENAI', 'intelligent', 'chat', 'Conversational', 'artificially intelligent', ' digital', 'ARTIFICIAL INTELLIGENCE', ' Al', 'Openaimachine', 'ARTIFICAL', 'Ia', 'VIRTUAL', 'Open-ai', 'agent', 'Open-Ai', 'gpt2', 'OPENAIMACHINE', 'GPT2', 'Artificial', ' GPT2', ' intelligent', 'OPEN—SOURCE', 'OPENAL', ' agent', 'artifical', ' computer', 'CHATBOT', 'virtual', 'Artificial Intelligence', ' software', 'Advanced', 'advanced', 'Chatbot', ' Open-AI