In [None]:
import tiktoken
from langchain.text_splitter import RecursiveCharacterTextSplitter
import uuid
from time import time

# method to get the token length with the encoding
tokenizer_name = tiktoken.encoding_for_model("gpt-4-1106-preview")
tokenizer = tiktoken.get_encoding(tokenizer_name.name)

# create the length function to be used by text_splitter
def tiktoken_len(text):
    tokens = tokenizer.encode(text, disallowed_special=())
    return len(tokens)

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=8000, # this depends on which model you might use, for example with the 16k GPT models setting this to 8k is reasonable and maybe higher
    chunk_overlap=100,
    length_function=tiktoken_len,
    separators=["\n\n", "\n", " ", ""],
)

# get a UUID - URL safe, Base64
def get_a_uuid():
    return str(uuid.uuid4())

#function to return the number of tokens in a string
def num_tokens_from_string(string: str, model_name: str) -> int:
    """Returns the number of tokens in a text string."""
    #encoding = tiktoken.get_encoding(encoding_name)
    encoding = tiktoken.encoding_for_model(model_name)
    token_integers = encoding.encode(string)
    num_tokens = len(token_integers)
    tokens_string = [encoding.decode_single_token_bytes(token) for token in token_integers]

    return num_tokens, tokens_string

def open_file(filepath):
        with open(filepath, "r", encoding="utf-8", errors="ignore") as infile:
            return infile.read()
        
def save_file(filepath, content):
    with open(filepath, 'w', encoding='utf-8') as outfile:
        outfile.write(content)

In [None]:
from openai import OpenAI
from dotenv import load_dotenv
import os

load_dotenv()

client = OpenAI(api_key=os.environ.get("OPENAI_KEY"))

def chatbot(conversation):
        try:
            response = client.chat.completions.create(
                model="gpt-4-1106-preview",
                messages=conversation,
                stream=False,
                max_tokens=2000,
                temperature=0,
            )
            text = response.choices[0].message.content
            tokens = response.usage.total_tokens
            return text, tokens
        except Exception as yikes:
            print(f'\n\nError communicating with OpenAI: "{yikes}"')
            exit(0)

In [None]:
case = open_file('./data/NYT_Complaint_Dec2023.txt').replace('\n\n', '\n')
num_tokens, tokens_string = num_tokens_from_string(case, 'gpt-4')
print(num_tokens)
#print(tokens_string)

In [None]:
# Plantiff notes
conversation = list()
conversation.append({'role': 'system', 'content': open_file('./plaintiff/system_01_notes.md')})
conversation.append({'role': 'user', 'content': case})
#print(conversation)
notes, tokens = chatbot(conversation)

save_file('./plaintiff/log_%s_notes.txt' % time(), notes)

In [None]:
# Plaintiff opening statement
conversation = list()
conversation.append({'role': 'system', 'content': open_file('./plaintiff/system_02_opening.md')})
conversation.append({'role': 'user', 'content': notes})
print(conversation)
opening, tokens = chatbot(conversation)

save_file('./plaintiff/log_%s_opening.txt' % time(), opening)

In [None]:
# Plaintiff opening statement with expertise
conversation = list()
conversation.append({'role': 'system', 'content': open_file('./plaintiff/system_03_opening_with_expertise.md').replace('<<CONTEXT>>', open_file('./youtube/OpenAI sued by New York Times - Copyright Lawyer and Google Engineering Director React.txt'))})
conversation.append({'role': 'user', 'content': open_file('./plaintiff/log_1704557483.547313_notes.txt')})
print(conversation)
opening, tokens = chatbot(conversation)

save_file('./plaintiff/log_%s_opening_with_expertise.txt' % time(), opening)

In [None]:
# Text to speech
from pathlib import Path

speech_file_path = Path('./').parent / "opening.mp3"

response = client.audio.speech.create(
  model="tts-1",
  voice="alloy",
  input=open_file('./plaintiff/log_1704944756.305965_opening.txt')
)

response.stream_to_file(speech_file_path)