# Prep

In [16]:
import openai
import tiktoken
import os
import httpx
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all" # show several outputs in jupyter cell

### Connect to model

Azure Work

In [14]:
#os.environ['HTTPS_PROXY'] = 'http://de001-surf.zone2.proxy.allianz:8080'
#os.environ['HTTP_PROXY'] = 'http://de001-surf.zone2.proxy.allianz:8080'
openai.api_type = "azure"
openai.api_version = os.environ['OPENAI_API_VERSION']

# openai.api_base = os.environ['AZURE_OPENAI_ENDPOINT_FEES']
# openai.api_key = os.environ['AZURE_OPENAI_API_KEY_FEES']

openai.api_base = os.environ['AZURE_OPENAI_ENDPOINT']
openai.api_key = os.environ['AZURE_OPENAI_API_KEY']

#os.environ['HTTPS_PROXY'] = ''
#print(os.environ['HTTP_PROXY'])
DefaultModel = 'gpt-4o' # the scope of available models depends on deployment

Azure Home

In [None]:
openai.api_type = "azure"
openai.api_base = os.environ['AZURE_OPENAI_ENDPOINT_FEES']
openai.api_version = os.environ['OPENAI_API_VERSION']
openai.api_key = os.environ['AZURE_OPENAI_API_KEY_FEES']
DefaultModel = 'gpt-4o' # the scope of available models depends on deployment

OpenAI Home

In [15]:
client = openai.OpenAI(api_key=os.environ.get("OPENAI_API_KEY_HOME"))

In [5]:
OpenAIModels = {
    'gpt-4o': {'input': 5.00, 'output': 15.00, 'context_window': 128, 'training': 'Oct23', 'output': 4, 'desc': 'Text and image input, text output'},
    'gpt-4-turbo': {'input': 10.00, 'output': 30.00, 'context_window': 128, 'training': 'Dec23', 'output': 4, 'desc': 'Text and image input, text output'},
    'gpt-3.5-turbo': {'input': 0.5, 'output': 1.5, 'context_window': 16, 'training': 'Sep21', 'output': 4, 'desc': 'Text input, text output'},
    'text-embedding-3-small': {'input': 0.02, 'output': 0, 'context_window': 16, 'training': '', 'output': 1.5, 'desc': 'convert text into a numerical form'},
    'text-embedding-3-large': {'input': 0.13, 'output': 0, 'context_window': 16, 'training': '', 'output': 3, 'desc': 'convert text into a numerical form'},
}
ModelNames=list(OpenAIModels.keys())
DefaultModel = ModelNames[1] # gpt-3.5-turbo
DefaultModel

'gpt-4-turbo'

### Roles

There are 3 roles:

- User - This is meant to mimic the end-user that is interacting with the assistant. This is the role that you will be using most of the time.
- System - This role can mimic sort of background nudges and prompts that you might want to inject into the conversation, but that dont need a response. At the moment, system is weighted less than "user," so it still seems more useful to use the user for encouraging specific behaviors in my opinion.
- Assistant - This is the agent's response. Often this will be actual responses, but keep in mind... you will be able to inject your own responses here, so you can actually have the agent say whatever you want. This is a bit of a hack, but it's a fun one and can be useful in certain situations.

### History

The way the ChatGPT API works is you need to query the model. Since these models often make use of chat history/context, every query needs to, or can, include a full message history context. 

Keep in mind, however that the maximum context length (XXXX tokens), so you need to stay under that. There are lots of options to work around this, the simplest being truncating earlier messages or to summarize and condense the previous message history. You might use a database or some other storage method. 

In [None]:
message_history = []

def chat(user_input, role="user", model=DefaultModel):
    message_history.append({"role": role, "content": f"{user_input}"})
    completion = client.chat.completions.create(messages=message_history, model=model)
    reply_content = completion.choices[0].message.content
    message_history.append({"role": "assistant", "content": f"{reply_content}"})
    return reply_content

## Costs

In [None]:
# Dictionary to hold different scenarios and their estimated output-to-input token ratios
token_ratio_estimates = {
    'email_response': 1.5,    # Outputs are generally 1.5 times longer than the inputs
    'content_summary': 0.8,   # Summaries are usually shorter than the original content
    'factual_answer': 0.5,    # Direct answers to factual questions tend to be concise
    'creative_story': 3.0,    # Creative stories may be much longer than the initial prompts
    'detailed_explanation': 20,  # Detailed explanations or complex answers can be much longer
    'limit': 0  # This will be treated specially to limit output tokens
}

# Maximum tokens for the 'limit' scenario
max_output_tokens = 100

def calculate_cost(text, model_name = DefaultModel, scenario='limit'):

    enc = tiktoken.encoding_for_model(model_name)
    input_tokens = enc.encode(text)   
    input_tokens_count = len(input_tokens) 

    # Determine the output token count based on the scenario
    if scenario == 'limit':
        output_token_count = max_output_tokens
    else:
        ratio = token_ratio_estimates.get(scenario, 1)  # Use 1 as a default ratio if the scenario is not found
        output_token_count = int(input_tokens_count * ratio)
    
    # Calculate the total token count (input + estimated output)
    total_token_count = input_tokens_count + output_token_count
    
    # Retrieve cost per million tokens for the model, assuming it can be a dictionary or a single value
    cost_per_million = ModelCosts[model_name]
    if isinstance(cost_per_million, dict):
        # Assuming the model has separate costs for input and output, typically not the case but for example
        input_cost_per_million = cost_per_million.get('input', 0)
        output_cost_per_million = cost_per_million.get('output', 0)
        total_cost = (input_tokens_count / 1_000_000) * input_cost_per_million + (output_token_count / 1_000_000) * output_cost_per_million
    else:
        total_cost = (total_token_count / 1_000_000) * cost_per_million
    
    return round(total_cost,3)

# Examples

### Check Schema output

In [19]:
messages=[
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "system", "content": "You are a helpful assistant designed to output JSON."}, # When using JSON mode, always instruct the model to produce JSON via some message in the conversation
    {"role": "user", "content": "Who won the world series in 2020?"},
    # {"role": "assistant", "content": "The Los Angeles Dodgers won the World Series in 2020."},
    # {"role": "user", "content": "Where was it played?"}
]
response = openai.chat.completions.create(
                model=DefaultModel,
                messages=messages,
                response_format={ "type": "json_object" }, # When using JSON mode, always instruct the model to produce JSON via some message in the conversation
                # seed = 1, # set it to any integer of your choice and use the same value across requests you'd like deterministic outputs for
                # temperature=0.5,
                # max_tokens=200
            )
response
response.usage.total_tokens
response.choices[0].message.content

ChatCompletion(id='chatcmpl-9gtWawsMOOE0XTzBUwyGgpGkdpTmH', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='{\n  "world_series_winner": "Los Angeles Dodgers",\n  "year": 2020\n}', role='assistant', function_call=None, tool_calls=None), content_filter_results={'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}})], created=1720010240, model='gpt-4o-2024-05-13', object='chat.completion', system_fingerprint='fp_abc28019ad', usage=CompletionUsage(completion_tokens=22, prompt_tokens=41, total_tokens=63), prompt_filter_results=[{'prompt_index': 0, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}])

63

'{\n  "world_series_winner": "Los Angeles Dodgers",\n  "year": 2020\n}'

: 

Every response will include a finish_reason. The possible values for finish_reason are:
- 'stop': API returned complete message, or a message terminated by one of the stop sequences provided via the stop parameter
- 'length': Incomplete model output due to max_tokens parameter or token limit
- 'function_call': The model decided to call a function
- 'content_filter': Omitted content due to a flag from our content filters
- 'null': API response still in progress or incomplete

In [18]:
response.choices[0].finish_reason

'stop'

### Derive Date from Filename

In [1]:
import os
import openai
openai.api_type = "azure"
openai.api_base = os.environ['AZURE_OPENAI_ENDPOINT']
openai.api_version = os.environ['OPENAI_API_VERSION']
openai.api_key = os.environ['AZURE_OPENAI_API_KEY']


sys_prompt = '''
Please read the filename provided and extract the end date of the quarter in the format DD.MM.YYYY  . The filename will typically indicate the quarter and year, but the name of the file could also have month and a year. Use the following examples for reference:

1. 'Fees Q3 2023 Valida corrected.xlsx' should return 30.09.2023
2. 'talktalktal 2022Q4_dummytext_not_rrr.xlsx' should return 31.12.2022
3. 'Q22023_bla-bla-bla_blaxxxx.xlsx' should return 30.06.2023
4. 'dfa fsd sds s September 22' should return 30.09.2022
5. 'Somedata_052015_blaxxxx.xlsx' should return 31.05.2015
6. 'Data excteract Mar 2021 final' should return 31.03.2021

Identify the quarter and year from the filename and return the corresponding end date of that quarter.
'''

name_of_file = 'sdfsfdsfd2012Q4_dummytext_not_rrr.xlsx'
#name_of_file = 'F 2014 Q1 Valida corrected.xlsx'
#name_of_file = 'File corrected Mar 2022.xlsx'

messages = [
                {"role": "system", "content": sys_prompt},
                {"role": "user", "content": name_of_file}
            ]
# os.environ['HTTPS_PROXY'] = ''
response = openai.chat.completions.create(
                model='gpt-4',
                messages=messages,
                temperature=0.1,
                max_tokens=200
            )
response.choices[0].message.content

'31.12.2012'

## Entity Extraction

In [None]:
import os
from openai import AzureOpenAI

client = AzureOpenAI(
    azure_endpoint=os.getenv("AOAI_ENDPOINT"),
    api_version="2024-05-01-preview",
    api_key=os.getenv("AOAI_KEY")
)
GPT_MODEL = "gpt-35-turbo"

# Sample input from Chapter 1:
#Hello. My name is Amit Bahree. I’m calling from Acme Insurance, Bellevue, WA. My colleague mentioned that you are interested in learning about our comprehensive benefits policy. Could you give me a call back at (555) 111-2222 when you get a chance so we can go over the benefits? I can be reached Monday to Friday during normal business hours of PST. If you want you can also try and reach me on emails at aweomseinsrance@acme.com. Thanks, Amit.

conversation=[{"role": "system", "content": "You are an AI assistant that extracts entities from text as JSON. \nHere is an example of your output format:\n{  \n   \"the_name\": \"\",\n   \"the_company\": \"\",\n   \"a_phone_number\": \"\"\n}"}]
print("Please enter what you want to talk about:")

while True:
    user_input = input("> ")
    if user_input.lower() == "exit":
        break
    
    conversation.append({"role": "user", "content": user_input})

    response = client.chat.completions.create(
        model = GPT_MODEL,
        messages = conversation)

    conversation.append({"role": "assistant", "content": response.choices[0].message.content})
    print("\nAI:" + response.choices[0].message.content + "\n")

## Chunking

Chunk into sentences

In [None]:
import os
import re
from openai import OpenAI
from tqdm import tqdm # for progress bars
import tiktoken as tk

# Initialize the OpenAI client
client = OpenAI(api_key=os.getenv("OPENAI_API_BOOK_KEY"))

# function that splits the text into chunks based on sentences
def split_sentences(text):
    sentences = re.split('[.!?]', text)
    sentences = [sentence.strip() for sentence in sentences if sentence]
    return sentences

# count tokens
def count_tokens(string: str, encoding_name="cl100k_base") -> int:
    # Get the encoding
    encoding = tk.get_encoding(encoding_name)
    
    # Encode the string
    encoded_string = encoding.encode(string)

    # Count the number of tokens
    num_tokens = len(encoded_string)
    return num_tokens

# OpenAI embeddings example from Chapter 2
def get_embedding(text):
    response = client.embeddings.create(
        model="text-embedding-ada-002",
        input=text)
    return response.data[0].embedding

if __name__ == "__main__":
    # Example usage:
    text = "This is the first sentence. This is the second sentence. Guess what? This is the fourth sentence."

    sentences = split_sentences(text)

    # Initialize an empty 2D array
    sentence_embeddings = []
    total_token_count = 0

    for sentence in tqdm(sentences):
        # Count the number of tokens in the sentence
        total_token_count += count_tokens(sentence, "cl100k_base")
        
        # Append the sentence and its embedding to the 2D array
        embedding = get_embedding(sentence)
        sentence_embeddings.append([sentence, embedding])

    # Now, sentence_embeddings is a 2D array where each element is a list of the form [sentence, embedding]
    print("Number of sentence embeddings:", len(sentence_embeddings))
    print("Total number of tokens:", total_token_count)

In [None]:
# Use spaCy to tokenize the text into sentences.
# Use tiktoken to count the tokens accurately.
# Slide through the sentences using a window (defined by the token limit), optionally allowing overlaps.

# pip install spacy
# python -m spacy download en_core_web_sm

import spacy
import tiktoken as tk
from openai import AzureOpenAI
import os

# Initialize the Azure OpenAI client
client = AzureOpenAI(
    api_key=os.getenv("AOAI_KEY"),
    azure_endpoint=os.getenv("AOAI_ENDPOINT"),
    api_version="2022-12-01")

# count tokens
def count_tokens(string: str, encoding_name="cl100k_base") -> int:
    # Get the encoding
    encoding = tk.get_encoding(encoding_name)
    
    # Encode the string
    encoded_string = encoding.encode(string)

    # Count the number of tokens
    num_tokens = len(encoded_string)
    return num_tokens

# OpenAI embeddings example from Chapter 2
def get_embedding(text):
    response = client.embeddings.create(
        model="ada-embedding",
        input=text)
    
    return response.data[0].embedding

# function that splits the text into chunks based on sentences
def chunking_with_spacy(text, max_tokens, 
                        overlap=0, 
                        model="en_core_web_sm"):
    # Load spaCy model
    nlp = spacy.load(model)
    
    # Tokenize the text into sentences using spaCy
    doc = nlp(text)
    sentences = [sent.text for sent in doc.sents]
    
    # Tokenize sentences into tokens and accumulate tokens
    tokens_lengths = [count_tokens(sent) for sent in sentences]
    
    chunks = []
    start_idx = 0
    
    while start_idx < len(sentences):
        current_chunk = []
        current_token_count = 0
        for idx in range(start_idx, len(sentences)):
            if current_token_count + tokens_lengths[idx] > max_tokens:
                break
            current_chunk.append(sentences[idx])
            current_token_count += tokens_lengths[idx]
        
        chunks.append(" ".join(current_chunk))
        
        # Sliding window adjustment
        if overlap >= len(current_chunk):
            start_idx += 1
        else:
            start_idx += len(current_chunk) - overlap

    return chunks

if __name__ == "__main__":
    # Example
    text = ("This is a demonstration of text chunking with spaCy and tiktoken. "
        "Using both allows for precise token counting and effective chunking. "
        "Overlap and sliding window strategies are useful for various applications. "
        "Choose your strategy based on your requirements.")

    max_tokens = 25
    overlap_sentences = 2
    chunks = chunking_with_spacy(text, max_tokens, overlap=overlap_sentences)

    for i, chunk in enumerate(chunks):
        print(f"Chunk {i + 1}:\n{chunk}\n")

In [None]:
import os
import re
from time import sleep
import time
import textwrap
from openai import AzureOpenAI
from tqdm import tqdm # for progress bars
import tiktoken as tk
import nltk
import spacy

# Initialize the Azure OpenAI client
client = AzureOpenAI(
    api_key=os.getenv("AOAI_KEY"),
    azure_endpoint=os.getenv("AOAI_ENDPOINT"),
    api_version="2024-05-01-preview")

GPT_MODEL = "gpt-35-turbo"

# function that splits the text into chunks based on sentences
def split_sentences(text):
    sentences = re.split('[.!?]', text)
    sentences = [sentence.strip() for sentence in sentences if sentence]
    return sentences

# function that splits the text into chunks based on sentences
def split_sentences_by_textwrap(text):
    # set the maximum chunk size to 2048 characters
    max_chunk_size = 2048
    # use the wrap function to split the text into chunks
    chunks = textwrap.wrap(
        text, 
        width=max_chunk_size,
        break_long_words=False, 
        break_on_hyphens=False)
    # return the list of chunks
    return chunks

def split_sentences_by_nltk(text):
  chunks = []

  for sentence in nltk.sent_tokenize(text):
    #num_tokens_in_sentence = len(nltk.word_tokenize(sentence))
    #print(sentence)
    chunks.append(sentence)
    
  return chunks

def split_sentences_by_spacy(text, max_tokens, 
                        overlap=0,
                        model="en_core_web_sm"):
    # Load spaCy model
    nlp = spacy.load(model)
    
    # Tokenize the text into sentences using spaCy
    doc = nlp(text)
    sentences = [sent.text for sent in doc.sents]
    
    # Tokenize sentences into tokens and accumulate tokens
    tokens_lengths = [count_tokens(sent) for sent in sentences]
    
    chunks = []
    start_idx = 0
    
    while start_idx < len(sentences):
        current_chunk = []
        current_token_count = 0
        for idx in range(start_idx, len(sentences)):
            if current_token_count + tokens_lengths[idx] > max_tokens:
                break
            current_chunk.append(sentences[idx])
            current_token_count += tokens_lengths[idx]
        
        chunks.append(" ".join(current_chunk))
        
        # Sliding window adjustment
        if overlap >= len(current_chunk):
            start_idx += 1
        else:
            start_idx += len(current_chunk) - overlap

    return chunks

# count tokens
def count_tokens(string: str, encoding_name="cl100k_base") -> int:
    # Get the encoding
    encoding = tk.get_encoding(encoding_name)
    
    # Encode the string
    encoded_string = encoding.encode(string)

    # Count the number of tokens
    num_tokens = len(encoded_string)
    return num_tokens

# OpenAI embeddings example from Chapter 2
def get_embedding(text):
    response = client.embeddings.create(model="ada-embedding",
    input=text)
    return response.data[0].embedding

def generate_summaries(chunks):
    # create an empty list to store the summaries
    summaries = []
    
    # loop through each chunk
    for chunk in tqdm(chunks):
        # create a prompt that instructs the model to summarize the chunk
        prompt = f"Summarize the following text in one sentence:\n{chunk}\nSummary:"
        
        # use the OpenAI.Completion class to generate a summary for the chunk
        response = client.completions.create(
            model=GPT_MODEL,
            prompt=prompt,
            max_tokens=1500,
            temperature=0.7)
        
        # get the summary from the response
        #summary = response["choices"][0]["text"]
        summary = response.choices[0].text
        # append the summary to the list of summaries
        summaries.append(summary)
        sleep(1) # sleep for 1 second(s) for rate limiting

    # return the list of summaries
    return summaries

def process_chunks(sentences):
    sentence_embeddings = []
    total_token_count = 0

    for sentence in tqdm(sentences):
        # Count the number of tokens in the sentence
        total_token_count += count_tokens(sentence, "cl100k_base")
        
        # Append the sentence and its embedding to the 2D array
        embedding = get_embedding(sentence)
        sentence_embeddings.append([sentence, embedding])

    #print("Simple Sentence Chunking:")
    print("\tNumber of sentence embeddings:", len(sentence_embeddings))
    print("\tTotal number of tokens:", total_token_count)

    return sentence_embeddings

def main():
    # load a text file that you want to chunk
    TEXT_FILE = "./data/women_fifa_worldcup_2023.txt"
    
    print("Reading the file ...")
    
    # read the text from the file
    with open(TEXT_FILE, "r") as f:
        text = f.read()

    print("1. Simple sentence chunking ...")
    start_time = time.time()
    sentences = split_sentences(text)

    #print("Number of sentences:", len(sentences))
    process_chunks(sentences)
    end_time = time.time()
    execution_time = end_time - start_time
    print("Execution time:", execution_time, "seconds")

    print("="*20)
    # ===================================

    #Reset variables
    summaries = []
    sentences = []
    sentence_embeddings = []
    total_token_count = 0
    chunks = []

    print("2. Sentence chunking using textwrap ...")
    start_time = time.time()
    # split the text into chunks by sentences
    chunks = split_sentences_by_textwrap(text)
    #print(f"Number of chunks: {len(chunks)}")
    process_chunks(chunks)
    end_time = time.time()
    execution_time = end_time - start_time
    print("Execution time:", execution_time, "seconds")


    print("="*20)
    # ===================================

    #Reset variables
    summaries = []
    sentences = []
    chunks = []

    print("3. Sentence chunking using NLTK ...")
    # split the text into chunks by sentences
    chunks = split_sentences_by_nltk(text)
    #print(f"Number of chunks: {len(chunks)}")
    start_time = time.time()
    process_chunks(chunks)
    end_time = time.time()
    execution_time = end_time - start_time
    print("Execution time:", execution_time, "seconds")

    print("="*20)
    # ===================================

    #Reset variables
    summaries = []
    sentences = []
    chunks = []

    print("4. Sentence chunking using spaCy ...")
    # split the text into chunks by sentences
    start_time = time.time()
    chunks = split_sentences_by_spacy(text, max_tokens=2000, overlap=0)
    #print(f"Number of chunks: {len(chunks)}")
    process_chunks(chunks)
    end_time = time.time()
    execution_time = end_time - start_time
    print("Execution time:", execution_time, "seconds")

    print("="*20)
    # ===================================
    # generate summaries for each chunk using OpenAI API
    summaries = generate_summaries(chunks)
    print("Summaries generated by OpenAI API:")
    # print the summaries
    print(summaries)

if __name__ == "__main__":
    main()