In [20]:
import tiktoken
from langchain.text_splitter import RecursiveCharacterTextSplitter
import uuid
from time import time
import json

# method to get the token length with the encoding
tokenizer_name = tiktoken.encoding_for_model("gpt-4-1106-preview")
tokenizer = tiktoken.get_encoding(tokenizer_name.name)

# create the length function to be used by text_splitter
def tiktoken_len(text):
    tokens = tokenizer.encode(text, disallowed_special=())
    return len(tokens)

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=8000, # this depends on which model you might use, for example with the 16k GPT models setting this to 8k is reasonable and maybe higher
    chunk_overlap=100,
    length_function=tiktoken_len,
    separators=["\n\n", "\n", " ", ""],
)

# get a UUID - URL safe, Base64
def get_a_uuid():
    return str(uuid.uuid4())

#function to return the number of tokens in a string
def num_tokens_from_string(string: str, model_name: str) -> int:
    """Returns the number of tokens in a text string."""
    #encoding = tiktoken.get_encoding(encoding_name)
    encoding = tiktoken.encoding_for_model(model_name)
    token_integers = encoding.encode(string)
    num_tokens = len(token_integers)
    tokens_string = [encoding.decode_single_token_bytes(token) for token in token_integers]

    return num_tokens, tokens_string

def open_file(filepath):
        with open(filepath, "r", encoding="utf-8", errors="ignore") as infile:
            return infile.read()
        
def save_file(filepath, content):
    with open(filepath, 'w', encoding='utf-8') as outfile:
        outfile.write(content)

In [21]:
from openai import OpenAI
from dotenv import load_dotenv
import os

load_dotenv()

client = OpenAI(api_key=os.environ.get("OPENAI_KEY"))

def chatbot(conversation):
        try:
            response = client.chat.completions.create(
                model="gpt-4-1106-preview",
                messages=conversation,
                stream=False,
                max_tokens=2000,
                temperature=0,
            )
            text = response.choices[0].message.content
            tokens = response.usage.total_tokens
            return text, tokens
        except Exception as yikes:
            print(f'\n\nError communicating with OpenAI: "{yikes}"')
            exit(0)

In [22]:
case = open_file('../data/NYT_Complaint_Dec2023.txt').replace('\n\n', '\n')
num_tokens, tokens_string = num_tokens_from_string(case, 'gpt-4')
print(num_tokens)
#print(tokens_string)

23486


In [23]:
# Defendant notes
conversation = list()
conversation.append({'role': 'system', 'content': open_file('./system_01_notes.md')})
conversation.append({'role': 'user', 'content': case})
#print(conversation)
notes, tokens = chatbot(conversation)

save_file('./log_%s_notes.txt' % time(), notes)

In [24]:
notes

'- Significant time facts:\n  - The New York Times has been producing journalism for over 170 years.\n  - OpenAI was formed in December 2015.\n  - OpenAI LP was created in March 2019 as a for-profit entity.\n  - ChatGPT was released in November 2022.\n  - The Times reached out to Microsoft and OpenAI in April 2023 regarding intellectual property concerns.\n  - GPT-4 was released in 2023.\n  - Microsoft\'s investment in OpenAI reached $13 billion.\n  - The Times filed this complaint on December 27, 2023.\n\n- All characters in the case:\n  - Plaintiff: The New York Times Company ("The Times")\n  - Defendants: Microsoft Corporation and various OpenAI entities (OpenAI, Inc., OpenAI LP, OpenAI GP LLC, OpenAI LLC, OpenAI OpCo LLC, OpenAI Global LLC, OAI Corporation, LLC, OpenAI Holdings, LLC)\n  - Attorneys: Susman Godfrey LLP and Rothwell, Figg, Ernst & Manbeck, P.C.\n\n- Conflicts:\n  - The Times alleges that the defendants have unlawfully used its copyrighted work to create artificial in

In [26]:
# Defendant notes with transcripts
conversation = list()
conversation.append({'role': 'system', 'content': open_file('./system_015_research.md').replace('<<TRANSCRIPTS>>', open_file('../data/OpenAI sued by New York Times - Copyright Lawyer and Google Engineering Director React.txt'))})
conversation.append({'role': 'user', 'content': notes})
#print(conversation)
research, tokens = chatbot(conversation)

save_file('./log_%s_research.txt' % time(), research)

In [29]:
# Defendant opening statement
conversation = list()
conversation.append({'role': 'system', 'content': open_file('./system_02_opening.md')})
conversation.append({'role': 'user', 'content': open_file('./log_1705269612.9351523_notes.txt')})
print(conversation)
opening, tokens = chatbot(conversation)

save_file('./log_%s_opening.txt' % time(), opening)

[{'role': 'system', 'content': '#  MISSION\nYou are a lawyer preparing opening statements for a civil case against your client. Your primary job is to generate thorough opening arguments for your client.\n\n#  CONTEXT\nYou are writing the opening arguments on behalf of the defendant(s).  Your opening statement will ultimtely be used in front of the judge and jury during the civil trial.\n\n#  RULES\n- Tell the story in the present tense\n- When possible tell the story in the first person\n- Talk about sequence of events and not facts\n- Simplified language by aiming for an eighth grade level.\n- Avoid the word “client”  every time you use the client word it says that your lawyer getting paid for standing there talking about your case. Instead use names and touches of humanity that are relevant to make individuals real people.\n\n#  INPUT\nThe USER will provide you with legal notes that support the defendants position.\n\n#  OUTPUT\nYour output will be several paragraphs of an opening a

In [None]:
# Text to speech
from pathlib import Path

speech_file_path = Path('./').parent / "opening.mp3"

response = client.audio.speech.create(
  model="tts-1",
  voice="nova",
  input=open_file('./log_1705271145.9187558_opening.txt')
)

response.stream_to_file(speech_file_path)