In [1]:
from langchain.chains.llm import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.schema.document import Document
from langchain.chains.mapreduce import MapReduceChain
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import ReduceDocumentsChain, MapReduceDocumentsChain

In [2]:
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.chat_models import AzureChatOpenAI, ChatOpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.docstore.document import Document
from langchain.prompts import PromptTemplate
from IPython.display import display, HTML
from langchain.chat_models import AzureChatOpenAI, ChatOpenAI
from langchain_openai import AzureOpenAIEmbeddings
#we'll use pickle to load text from our text cleaning notebook, but you can load any text into the full_text variable.  It should be stored as a single string.
import pickle, os, re, json
#we'll use time to track how long Bedrock takes to respond, which helps to estimate how long a job will take.
import time
from queue import Queue
from threading import Thread
from openai import OpenAI, AzureOpenAI, AsyncAzureOpenAI

In [3]:
llm = AzureChatOpenAI(
                azure_endpoint=os.getenv("OpenAiEndPoint"),
                api_version=os.getenv("OpenAiVersion"),
                azure_deployment=os.getenv("OpenAiChat4o"),
                temperature=0,
                api_key=os.getenv("OpenAiKey"),
                openai_api_type="azure",
                max_tokens=2000)

embeddings = AzureOpenAIEmbeddings(azure_endpoint=os.getenv("OpenAiEndPoint"), 
                                   azure_deployment=os.getenv('OpenAIEmbedding'), api_key=os.getenv("OpenAiKey"), openai_api_type="azure")

client = AzureOpenAI(
    api_key=os.getenv("OpenAiKey"),  
    api_version=os.getenv("OpenAiVersion"),
    #base_url=f"{os.getenv('OpenAiWestUsEp')}openai/deployments/{os.getenv('OpenAiGpt4v')}/extensions",
    azure_endpoint=os.getenv("OpenAiEndPoint"),
)

  warn_deprecated(


In [69]:
#wrapping in a python function to make it easy to use in other scripts.
def stuff_it_summary(llm, doc):
    # Define prompt
    prompt_template = """\n\nHuman:  Consider this text:
    <text>
    {text}
    </text>
    Please create a concise summary in narative format.

    Assistiant:  Here is the concise summary:"""
    prompt = PromptTemplate.from_template(prompt_template)

    # Define LLM chain
    llm_chain = LLMChain(llm=llm, prompt=prompt)

    # Define StuffDocumentsChain
    stuff_chain = StuffDocumentsChain(llm_chain=llm_chain, document_variable_name="text")

    #Note that although langchain often stores douments in small chunks for the 
    #convience of models with smaller context windows, this "stuff it" method will
    #combind all those chunks into a single prompt call.

    if type(doc) == str:
        docs = [Document(page_content=doc)]
    return stuff_chain.run(docs)

In [88]:
# Map
map_template = """\n\nHuman: The following is a set of documents
<documnets>
{docs}
</documents>
Based on this list of docs, please identify the main themes.

Assistant:  Here are the main themes:"""
map_prompt = PromptTemplate.from_template(map_template)
map_chain = LLMChain(llm=llm, prompt=map_prompt)

# Reduce
reduce_template = """\n\nHuman: The following is set of summaries:
<summaries>
{doc_summaries}
</summaries>
Please take these and distill them into a final, consolidated summary of the main themes in narative format. 

Assistant:  Here are the main themes:"""
reduce_prompt = PromptTemplate.from_template(reduce_template)
reduce_chain = LLMChain(llm=llm, prompt=reduce_prompt)

# Takes a list of documents, combines them into a single string, and passes this to an LLMChain
combine_documents_chain = StuffDocumentsChain(
    llm_chain=reduce_chain, document_variable_name="doc_summaries"
)

# Combines and iteravely reduces the mapped documents
reduce_documents_chain = ReduceDocumentsChain(
    # This is final chain that is called.
    combine_documents_chain=combine_documents_chain,
    # If documents exceed context for `StuffDocumentsChain`
    collapse_documents_chain=combine_documents_chain,
    # The maximum number of tokens to group documents into.
    token_max=4000,
)

# Combining documents by mapping a chain over them, then combining results
map_reduce_chain = MapReduceDocumentsChain(
    # Map chain
    llm_chain=map_chain,
    # Reduce chain
    reduce_documents_chain=reduce_documents_chain,
    # The variable name in the llm_chain to put the documents in
    document_variable_name="docs",
    # Return the results of the map steps in the output
    return_intermediate_steps=False,
)

#wrapping in a python function to make it easy to use in other scripts.
def map_reduce_summary(doc, DEBUG=False):
    if type(doc) == str:
        #use the LangChain built in text splitter to split our text
        from langchain.text_splitter import RecursiveCharacterTextSplitter
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size = 5000,
            chunk_overlap  = 200,
            length_function = len,
            add_start_index = True,
        )
        split_docs = text_splitter.create_documents([doc])
        if DEBUG: print("Text was split into %s docs"%len(split_docs))
    return map_reduce_chain.run(split_docs)

In [6]:
# Example count_tokens function (if you have your own, use it)
def count_tokens(text):
    return len(text.split())

In [7]:
def get_chunks(full_text, OVERLAP=True, DEBUG=False):
    '''
    This will take a text and return an array with sliced chunks of the text in optimal sizing for summarization.  Note that by default, this does include overlaping text in each chunk.
    Overlaping allows more cohesion between text, and should only be turned off when trying to count specific numbers and no duplicated text is a requirment.
    
    We could just drop text up to the maximum context window of our model, but that actually doesn't work very well.
    Part of the reason for this is because no matter the input length, the output length is about the same.
    For example, if you drop in a paragraph or 10 pages, you get about a paragraph in response.
    To mitigate this, we create chunks using the lesser of two values: 25% of the total token count or 2k tokens.
    We'll also overlap our chunks by about a paragraph of text or so, in order to provide continuity between chunks.
    (Logic taken from https://gist.github.com/Donavan/4fdb489a467efdc1faac0077a151407a)
    '''
    DEBUG = False #debugging at this level is usually not very helpful.
    
    #Following testing, it was found that chunks should be 2000 tokens, or 25% of the doc, whichever is shorter.
    #max chunk size in tokens
    chunk_length_tokens = 2000
    #chunk length may be shortened later for shorter docs.
    
    #a paragraph is about 200 words, which is about 260 tokens on average
    #we'll overlap our chunks by a paragraph to provide cohesion to the final summaries.
    overlap_tokens = 260
    if not OVERLAP: overlap_tokens = 0
    
    #anything this short doesn't need to be chunked further.
    min_chunk_length = 260 + overlap_tokens*2
    
    
    #grab basic info about the text to be chunked.
    char_count = len(full_text)
    word_count = len(full_text.split(" "))#rough estimate
    token_count = count_tokens(full_text)
    token_per_charater = token_count/char_count

    
    #don't chunk tiny texts
    if token_count <= min_chunk_length:
        if DEBUG: print("Text is too small to be chunked further")
        return [full_text]
    
    
    if DEBUG:
        print ("Chunk DEBUG mode is on, information about the text and chunking will be printed out.")
        print ("Estimated character count:",char_count)
        print ("Estimated word count:",word_count)
        print ("Estimated token count:",token_count)
        print ("Estimated tokens per character:",token_per_charater)

        print("Full text tokens: ",count_tokens(full_text))
        print("How many times bigger than max context window: ",round(count_tokens(full_text)/max_token_count,2))
    
    
    
    #if the text is shorter, use smaller chunks
    if (token_count/4<chunk_length_tokens):
        overlap_tokens = int((overlap_tokens/chunk_length_tokens)*int(token_count/4))
        chunk_length_tokens = int(token_count/4)
        
        if DEBUG: 
            print("Short doc detected:")
            print("New chunk length:",chunk_length_tokens)
            print("New overlap length:",overlap_tokens)
        
    #convert to charaters for easy slicing using our approximate tokens per character for this text.
    overlap_chars = int(overlap_tokens/token_per_charater)
    chunk_length_chars = int(chunk_length_tokens/token_per_charater)
    
    #itterate and create the chunks from the full text.
    chunks = []
    start_chunk = 0
    end_chunk = chunk_length_chars + overlap_chars
    
    last_chunk = False
    while not last_chunk:
        #the last chunk may not be the full length.
        if(end_chunk>=char_count):
            end_chunk=char_count
            last_chunk=True
        chunks.append(full_text[start_chunk:end_chunk])
        
        #move our slice location
        if start_chunk == 0:
            start_chunk += chunk_length_chars - overlap_chars
        else:
            start_chunk += chunk_length_chars
        
        end_chunk = start_chunk + chunk_length_chars + 2 * overlap_chars
        
    if DEBUG:print ("Created %s chunks."%len(chunks))
    return chunks

In [8]:
prompt_template = """\n\nHuman:  I am going to give you a text{{GUIDANCE_1}}.  This text is extracted from a larger document.  Here is the text:

<text>
{{TEXT}}
</text>
{{GUIDANCE_2}}
{{STYLE}}{{REQUEST}}{{FORMAT}}{{GUIDANCE_3}}
\nAssistant:  Here is what you asked for:
"""

merge_prompt_template = """\n\nHuman:  Here are a number of related summaries:

{{TEXT}}
Please merge these summaries into a highly detailed single summary in {{FORMAT}} format, preserving as much detail as possible, using less than 1000 tokens.
\nAssistant:  Here is what you asked for:
"""

#this is inserted into the prompt template above, in the {{GUIDANCE_2}} section.
guidance_tempate = """
Here is the additional guidance:
<guidance>
{{GUIDANCE}}
</guidance>
"""

#this prompt asks the LLM to be a newpaper reporter, extracting facts from a document to be used in a later report.  Good for summarizing factual sets of documents.
reporter_prompt = """\n\nHuman:  You are a newspaper reporter, collecting facts to be used in writing an article later.  Consider this source text:
<text>
{{TEXT}}
</text>
{{DOCS_DESCRIPTION}}  Please create a {{FORMAT}} of all the relevant facts from this text which will be useful in answering the question "{{GUIDANCE}}".  To make your list as clear as possible, do not use and pronouns or ambigious phrases.  For example, use a company's name rather than saying "the company" or they.
\nAssistant:  Here is the {{FORMAT}} of relevant facts:
"""

reporter_summary_prompt = """\n\nHuman:  You are a newspaper reporter, collecting facts to be used in writing an article later.  Consider these notes, each one derived from a different source text:
{{TEXT}}
Please create a {{FORMAT}} of all the relevant facts and trends from these notes which will be useful in answering the question "{{GUIDANCE}}"{{STYLE}}.  To make your list as clear as possible, do not use and pronouns or ambigious phrases.  For example, use a company's name rather than saying "the company" or "they".
\nAssistant:  Here is the list of relevant facts:

"""

reporter_final_prompt = """\n\nHuman:  You are a newspaper reporter, writing an article based on facts that were collected and summarized earlier.  Consider these summaries:
{{TEXT}}
Each summary is a collection of facts extracted from a number of source reports.  Each source report was written by an AWS team talking about their interactions with their individual customer.  Please create a {{FORMAT}} of all the relevant trends and details from these summaries which will be useful in answering the question "{{GUIDANCE}}".
\nAssistant:  Here is the narrative:

"""

def get_prompt(text,prompt_type,format_type, manual_guidance, style_guide, docs_description=""):
    '''
    text should be a single string of the raw text to be sent to the gen ai model.
    prompt_type must be "summary" or "interrogate" or "answers"
            -summary means summarize the text
            -interrogate means look at the text and ask questions about what is missing
            -answers means looking at the test, provide only details that may help answer the questions according to the Guidance.
            -merge_answers takes a summary as text, and merges in the facts in the guidance section
            -merge_summaries takes 2 or more summaries and merges them together.  The summaries to be merged must be in list format for best results.
            -reporter - like a new reporter, extract details that help answer the guidance questions
            -reporter_summary - like a news reporter looking at a bunch of notes, create a list summary.  Intended as an intermediate step. 
            reporter_final - generative a narrative based on the reporter_summary outputs.
    format_type must be "narrative" or "list"
    manual_guidance Extra instructions to guide the process, usually from the user.
    style_guide TBD
    
    Note that merge_summaries is handled differntly than all other options because it iteratively adds in multiple texts.
    '''
    
    #answers mode is a bit different, so handle that first.
    if prompt_type == "answers":
        format_type = "in list format, using less than 1000 tokens.  "
        prompt_type = "Please provide a list of any facts from the text that could be relevant to answering the questions from the guidance section "
        guidance_1 = " and some guidance"
        guidance_2 = guidance_tempate.replace("{{GUIDANCE}}",manual_guidance)
        guidance_3 = "You should ignore any questions that can not be answered by this text."
    elif prompt_type == "reporter":
        return reporter_prompt.replace("{{TEXT}}",text).replace("{{FORMAT}}",format_type).replace("{{GUIDANCE}}",manual_guidance).replace("{{DOCS_DESCRIPTION}}",docs_description)
    elif prompt_type == "reporter_summary":
        summaries_text = ""
        for x,summary in enumerate(text):
            summaries_text += "<note_%s>\n%s</note_%s>\n"%(x+1,summary,x+1)
        final_prompt = reporter_summary_prompt.replace("{{TEXT}}",summaries_text).replace("{{FORMAT}}",format_type).replace("{{GUIDANCE}}",manual_guidance).replace("{{STYLE}}",style_guide)
        return final_prompt
    elif prompt_type == "reporter_final":
        summaries_text = ""
        for x,summary in enumerate(text):
            summaries_text += "<summary_%s>\n%s</summary_%s>\n"%(x+1,summary,x+1)
        final_prompt = reporter_final_prompt.replace("{{TEXT}}",summaries_text).replace("{{FORMAT}}",format_type).replace("{{GUIDANCE}}",manual_guidance)
        return final_prompt
    elif prompt_type == "merge_summaries":
        summaries_text = ""
        for x,summary in enumerate(text):
            summaries_text += "<summary_%s>\n%s</summary_%s>\n"%(x+1,summary,x+1)
        final_prompt = merge_prompt_template.replace("{{TEXT}}",summaries_text).replace("{{FORMAT}}",format_type)
        return final_prompt
        
    elif prompt_type == "merge_answers":
        prompt_type = "The text is a good summary which may lack a few details.  However, the additional information found in the guidance section can be used to make the summary even better.  Starting with the text, please use the details in the guidance section to make the text more detailed.  The new summary shoud use less than 1000 tokens.  "
        format_type = ""
        guidance_1 = " and some guidance"
        guidance_2 = guidance_tempate.replace("{{GUIDANCE}}",manual_guidance)
        guidance_3 = "You should ignore any comments in the guidance section indicating that answers could not be found."
    else:
        #Based on the options passed in, grab the correct text to eventually use to build the prompt.
        #select the correct type of output format desired, list or summary.  Note that list for interrogate prompts is empty because the request for list is built into that prompt.
        if prompt_type == "interrogate" and format_type != "list":
            raise ValueError("Only list format is supported for interrogate prompts.")
        if format_type == "list":
            if prompt_type == "interrogate":
                format_type = ""#already in the prompt so no format needed.
            else:
                format_type = "in list format, using less than 1000 tokens."
        elif format_type == "narrative":
            format_type = "in narrative format, using less than 1000 tokens."
        else:
            raise ValueError("format_type must be 'narrative' or 'list'.")

        #select the correct prompt type language
        if prompt_type == "summary":
            prompt_type = "Please provide a highly detailed summary of this text "
        elif prompt_type == "interrogate":
            prompt_type = "This text is a summary that lacks detail.  Please provide a list of the top 10 most important questions about this text that can not be answered by the text."
        else:
            raise ValueError("prompt_type must be 'summary' or 'interrogate'.")

        if manual_guidance == "":
            guidance_1 = ""
            guidance_2 = ""
            guidance_3 = ""
        else:
            guidance_1 = " and some guidance"
            guidance_2 = guidance_tempate.replace("{{GUIDANCE}}",manual_guidance)
            guidance_3 = "  As much as possible, also follow the guidance from the guidance section above.  You should ignore guidance that does not seem relevant to this text."
        
    #TBD
    style_guide = ""
    #print (prompt_template.replace("{{GUIDANCE_1}}",guidance_1).replace("{{GUIDANCE_2}}",guidance_2).replace("{{GUIDANCE_3}}",guidance_3).replace("{{STYLE}}",style_guide).replace("{{REQUEST}}",prompt_type).replace("{{FORMAT}}",format_type))
    final_prompt = prompt_template.replace("{{TEXT}}",text).replace("{{GUIDANCE_1}}",guidance_1).replace("{{GUIDANCE_2}}",guidance_2).replace("{{GUIDANCE_3}}",guidance_3).replace("{{STYLE}}",style_guide).replace("{{REQUEST}}",prompt_type).replace("{{FORMAT}}",format_type)
    return final_prompt

In [9]:
def ask_azure_openai(prompt_text, DEBUG=False):
    '''
    Send a prompt to Azure OpenAI, and return the response.
    DEBUG is used to see exactly what is being sent to and from Azure OpenAI.
    '''

    # Ensure the prompt contains the expected format
    if "Assistant:" not in prompt_text:
        prompt_text = "\n\nHuman:" + prompt_text + "\nAssistant: "

    # Prompt payload for Azure OpenAI
    prompt_json = {
        "prompt": prompt_text,
        "max_tokens": 3000,
        "temperature": 0.7,
        "top_p": 0.7,
        "frequency_penalty": 0,
        "presence_penalty": 0,
        "stop": ["\n\nHuman:"]
    }
    
    # Cache results if present
    body = json.dumps(prompt_json)

    if DEBUG:
        print("sending:", prompt_text)

    start_time = time.time()
    attempt = 1
    MAX_ATTEMPTS = 3
    while True:
        try:
            query_start_time = time.time()

            messages=[
                {"role": "system", "content": prompt_text}]

            # Invoke the Azure OpenAI model
            response = client.chat.completions.create(
                messages=messages,
                temperature=0,
                max_tokens=500,
                model=os.getenv("OpenAiChat4o"),
            )
            
            # Extract the result from the response
            raw_results = response.choices[0].message.content.strip(" \n")

            # Remove any unwanted HTML-like tags if they appear
            results = re.sub('<[^<]+?>', '', raw_results)

            # Compute metrics
            request_time = round(time.time() - start_time, 2)
            if DEBUG:
                print("Received:", results)
                print("request time (sec):", request_time)

            total_tokens = count_tokens(prompt_text + raw_results)  # Assuming you have a count_tokens function
            output_tokens = count_tokens(raw_results)
            tokens_per_sec = round(total_tokens / request_time, 2)
            break

        except Exception as e:
            print(f"Error with Azure OpenAI API call: {str(e)}")
            attempt += 1
            if attempt > MAX_ATTEMPTS:
                print("Max attempts reached!")
                results = str(e)
                request_time = -1
                total_tokens = -1
                output_tokens = -1
                tokens_per_sec = -1
                break
            else:
                # Retry after 10 seconds
                time.sleep(10)

    return (prompt_text, results, total_tokens, output_tokens, request_time, tokens_per_sec, query_start_time)


In [10]:
# Threaded function for queue processing.
def thread_request(q, result):
    while not q.empty():
        work = q.get()                      #fetch new work from the Queue
        thread_start_time = time.time()
        try:
            data = ask_azure_openai(work[1])
            result[work[0]] = data          #Store data back at correct index
        except Exception as e:
            error_time = time.time()
            print('Error with prompt!',str(e))
            result[work[0]] = (work[1],str(e),count_tokens(work[1]),0,round(error_time-thread_start_time,2),0,thread_start_time)
        #signal to the queue that task has been processed
        q.task_done()
    return True

def ask_aoai_threaded(prompts,DEBUG=False):
    '''
    Call ask_claude, but multi-threaded.
    Returns a dict of the prompts and responces.
    '''
    q = Queue(maxsize=0)
    num_theads = min(50, len(prompts))
    
    #Populating Queue with tasks
    results = [{} for x in prompts];
    #load up the queue with the promts to fetch and the index for each job (as a tuple):
    for i in range(len(prompts)):
        #need the index and the url in each queue item.
        q.put((i,prompts[i]))
        
    #Starting worker threads on queue processing
    for i in range(num_theads):
        #print('Starting thread ', i)
        worker = Thread(target=thread_request, args=(q,results))
        worker.setDaemon(True)    #setting threads as "daemon" allows main program to 
                                  #exit eventually even if these dont finish 
                                  #correctly.
        worker.start()

    #now we wait until the queue has been processed
    q.join()

    if DEBUG:print('All tasks completed.')
    return results

In [11]:
def generate_summary_from_chunks(chunks, prompt_options,DEBUG=False, chunks_already_summarized=False):
    """
    This function itterates through a list of chunks, summarizes them, then merges those summaries together into one.
    chunks_already_summarized is used when the chunks passed in are chunks resulting from summerizing docs.
    If the chunks are taken from a source document directly, chunks_already_summarized should be set to False.
    """
    partial_summaries = {}
    if not chunks_already_summarized:#chunks are from a source doc, so summarize them.
        partial_summaries_prompts = []
        partial_summaries_prompt2chunk = {}
        for x,chunk in enumerate(chunks):
            #if DEBUG: print ("Working on chunk",x+1,end = '')
            start_chunk_time = time.time()
            #note that partial summaries are always done in list format to maximize information captured.
            custom_prompt = get_prompt(chunk,prompt_options['prompt_type'],'list', prompt_options['manual_guidance'], prompt_options['style_guide'])
            #partial_summaries[chunk] = ask_claude(custom_prompt,DEBUG=False)
            partial_summaries_prompts.append(custom_prompt)
            partial_summaries_prompt2chunk[custom_prompt]=chunk
        
        partial_summaries_results = ask_aoai_threaded(partial_summaries_prompts)
        for prompt_text,results,total_tokens,output_tokens,request_time,tokens_per_sec,query_start_time in partial_summaries_results:
            partial_summaries[partial_summaries_prompt2chunk[prompt_text]] = results

        if DEBUG: 
            print ("Partial summary chunks done!")
            print ("Creating joint summary...")
    else:
        for chunk in chunks:
            partial_summaries[chunk] = chunk
        if DEBUG: 
            print ("Summarized chunks detected!")
            print ("Creating joint summary...")
            
    summaries_list = []
    summaries_list_token_count = 0
    for chunk in chunks:
        summaries_list.append(partial_summaries[chunk]) 
        summaries_list_token_count+=count_tokens(partial_summaries[chunk])
        
    if DEBUG: print("Chunk summaries token count:",summaries_list_token_count)
    
    #check to see if the joint summary is too long.  If it is, recursivly itterate down.
    #we do this, rather than chunking again, so that summaries are not split.
    #it needs to be under 3000 tokens in order to be helpful to the summary (4000 is an expiremental number and may need to be adjusted.)
    #this may be higher than the 2000 used for text originally, because this data is in list format.
    recombine_token_target = 3000
    #summaries_list_token_count = recombine_token_target+1 #set this to target+1 so that we do at least one recombonation for shorter documents.
    while summaries_list_token_count>recombine_token_target:
        if DEBUG: print("Starting reduction loop to merge chunks.  Total token count is %s"%summaries_list_token_count)
        new_summaries_list = []
        summaries_list_token_count = 0
        temp_summary_group = []
        temp_summary_group_token_length = 0
        for summary in summaries_list:
            if temp_summary_group_token_length + count_tokens(summary) > recombine_token_target:
                #the next summary added would push us over the edge, so summarize the current list, and then add it.
                #note that partial summaries are always done in list format to maximize information captured.
                if DEBUG: print("Reducing %s partial summaries into one..."%(len(temp_summary_group)))
                custom_prompt = get_prompt(temp_summary_group,"merge_summaries","list", prompt_options['manual_guidance'], prompt_options['style_guide'])
                temp_summary = ask_azure_openai(custom_prompt,DEBUG=False)[1]
                new_summaries_list.append(temp_summary)
                summaries_list_token_count+= count_tokens(temp_summary)
                temp_summary_group = []
                temp_summary_group_token_length = 0
            
            temp_summary_group.append(summary)
            temp_summary_group_token_length+= count_tokens(summary)
        
        #summarize whever extra summaries are still in the temp list
        if len(temp_summary_group)>1:
            if DEBUG: print("Starting final reduction of %s partial summaries into one..."%(len(temp_summary_group)))
            custom_prompt = get_prompt(temp_summary_group,"merge_summaries","list", prompt_options['manual_guidance'], prompt_options['style_guide'])
            temp_summary = ask_azure_openai(custom_prompt,DEBUG=False)[1]
            new_summaries_list.append(temp_summary)
            summaries_list_token_count+= count_tokens(temp_summary)
        elif len(temp_summary_group)==1:
            if DEBUG: print("Tacking on an extra partial summary")
            new_summaries_list.append(temp_summary_group[0])
            summaries_list_token_count+= count_tokens(temp_summary_group[0])
            
        summaries_list = new_summaries_list
        
    if DEBUG: print ("Final merge of summary chunks, merging %s summaries."%(len(summaries_list)))
    custom_prompt = get_prompt(summaries_list,"merge_summaries",prompt_options['format_type'], prompt_options['manual_guidance'], prompt_options['style_guide'])
    full_summary = ask_azure_openai(custom_prompt,DEBUG=False)[1]
    #full_summary_prompt = get_prompt("/n".join(summaries_list),prompt_options['prompt_type'],prompt_options['format_type'], prompt_options['manual_guidance'], prompt_options['style_guide'])
    #full_summary = ask_claude(full_summary_prompt,DEBUG=False)
    
    return full_summary

In [12]:
def generate_single_doc_summary(full_text, prompt_options,AUTO_REFINE=True, DEBUG=False,ALREADY_CHUNKED_AND_SUMMED=False):
    """
    This function uses the three helper functions, as well as the generate_summary_from_chunks above, to iteratively generate high quality summaries.
    AUTO_REFINE, if true, has the LLM generate a list of questions, and then recursivly calls this function with those questions for guidance.
    ALREADY_CHUNKED_AND_SUMMED, if true, means that this is being called using a list of summarized documents which should not be chunked or summarized further.
    """
    #first break this document into chunks
    chunks = []        
    
    if ALREADY_CHUNKED_AND_SUMMED:
        chunks = full_text
    else:
        chunks = get_chunks(full_text,DEBUG=DEBUG)
        
    if DEBUG:
        if prompt_options['prompt_type'] == "answers":
            print ("Generating answers using %s chunks."%(len(chunks)))
        else:
            print ("Generating a new combined summary for %s chunks."%(len(chunks)))
        if ALREADY_CHUNKED_AND_SUMMED:
            print ("Input has already been chunked and summarized, skipping initial chunking.")
        
            
    first_summary = generate_summary_from_chunks(chunks,prompt_options,DEBUG=DEBUG, chunks_already_summarized=ALREADY_CHUNKED_AND_SUMMED)
    
    if DEBUG and AUTO_REFINE: 
        print ("First summary:")
        print (first_summary)
        
    if AUTO_REFINE: 
        if DEBUG: print ("Asking the LLM to find weaknesses in this summary...")
        #now that we have a rough summary, let's grab some questions about it.
        questions_prompt = get_prompt(first_summary,"interrogate","list", "", "")
        questions_list = ask_azure_openai(questions_prompt,DEBUG=False)[1]

        if DEBUG: 
            print ("Questions from the LLM:")
            print (questions_list)
            
        original_guidance = prompt_options['manual_guidance']
        original_prompt_type = prompt_options['prompt_type']
        prompt_options['manual_guidance'] = prompt_options['manual_guidance'] + questions_list
        prompt_options['prompt_type'] = "answers"
        add_details = generate_single_doc_summary(full_text, prompt_options,AUTO_REFINE=False, DEBUG=DEBUG, ALREADY_CHUNKED_AND_SUMMED=ALREADY_CHUNKED_AND_SUMMED)
        if DEBUG: 
            print("Additional Details:")
            print (add_details)
            print("Merging details into original summary...")
        
        prompt_options['manual_guidance'] = original_guidance + add_details
        prompt_options['prompt_type'] = "merge_answers"
        custom_prompt = get_prompt(first_summary,prompt_options['prompt_type'],prompt_options['format_type'], prompt_options['manual_guidance'], prompt_options['style_guide'])
        final_summary = ask_azure_openai(custom_prompt,DEBUG=False)[1]
        
        #return this back to the original to prevent weird errors between calls of this function.
        prompt_options['manual_guidance'] = original_guidance
        prompt_options['prompt_type'] = original_prompt_type
        return final_summary
    
    else:
        return first_summary

In [13]:
def grab_set_chunks(lst, n):
    """Yield successive n-sized chunks from lst.
    This is a helper function for the multidoc summarization function.
    """
    for i in range(0, len(lst), n):
        yield lst[i:i + n]

In [14]:
def generate_multiple_docs_summary(docs, questions, docs_description, DEBUG=False):
    """
    This function uses the three helper functions to read the documents passed in, and create a summary answer for each question passed in.
    If the documents are longer than two pages or so, it is reccoemended that you first summaize each document.
    docs_description is a single sentance describing what the documents are such as "The texts are a collection of product reviews for a Pickle Ball paddle."
    """
    #get answers from each doc for each question.
    answers = {}
    prompt2quetion_doc = {}
    prompts = []
    max_docs_to_scan = 500
    
    #build the queries to be passed into Bedrock
    for question in questions:
        for x,doc in enumerate(docs):
            if x>max_docs_to_scan:break#limit for testing
            
            #print ("Asking the LLM to find extract answers from this doc:",doc)
            questions_prompt = get_prompt(docs[doc],"reporter","list", question, "",docs_description)
            prompt2quetion_doc[questions_prompt] = (question,doc) 
            prompts.append(questions_prompt)
        
    if DEBUG:print("Starting %s worker threads."%len(prompts))
    prompts_answers = ask_aoai_threaded(prompts,DEBUG=False)
    
    for question in questions:
        answers[question] = []    
    
    for prompt,answer,total_tokens,output_tokens,request_time,tokens_per_sec,query_start_time in prompts_answers:
        question,doc = prompt2quetion_doc[prompt]
        answers[question].append(answer)
        
    
    current_answer_count = len(docs)
    if DEBUG: print("All documents have been read.  Reducing answers into the final summary...")
    #reduce this down to 5 or less docs for the final summary by combining the individual answers.
    while current_answer_count > 5:
        #summarize the answers
        prompts = []
        prompts2question = {}
        
        max_docs_to_scan = max(min(current_answer_count,8),3)
        if DEBUG: print("Combining %s chunks.  (Currently there are %s answers to each question.)"%(max_docs_to_scan,current_answer_count))
        for question in questions:
            #print ("Asking the LLM to summarize answers for this question:",question)
            #You want chunks of roughly 2K tokens
            for partial_chunks in grab_set_chunks(answers[question],max_docs_to_scan):
                questions_prompt = get_prompt(partial_chunks,"reporter_summary","list", question, " in less than 1000 tokens")
                prompts.append(questions_prompt)
                prompts2question[questions_prompt] = question
        
        if DEBUG:print("Starting %s worker threads."%len(prompts))
        prompts_answers = ask_aoai_threaded(prompts,DEBUG=False)
        
        for question in questions:
            answers[question] = []    
        for prompt,answer,total_tokens,output_tokens,request_time,tokens_per_sec,query_start_time in prompts_answers:
            answers[prompts2question[prompt]].append(answer)        

        current_answer_count = len(answers[questions[0]])
        
    if DEBUG: print("Creating the final summary for each question.")
    #write the final article:
    prompts = []
    prompts2question = {}
    for question in questions:
        #print ("Asking the LLM to finalize the answer for this question:",question)
        questions_prompt = get_prompt(answers[question],"reporter_final","narrative", question, "")
        prompts.append(questions_prompt)
        prompts2question[questions_prompt] = question

    if DEBUG:print("Starting %s worker threads."%len(prompts))
    prompts_answers = ask_aoai_threaded(prompts,DEBUG=False)
    
    answers = {}
    for prompt,answer,total_tokens,output_tokens,request_time,tokens_per_sec,query_start_time in prompts_answers:
        answers[prompts2question[prompt]] = answer
    return answers

In [15]:
text_to_open_short = './Data/Pickle/hills.pkl'  #2-3 page story, Hills like White Elephants
text_to_open_mid = './Data/Pickle/algernon.pkl'  #short story, Flowers for Algernon
text_to_open_long = './Data/Pickle/frankenstien.pkl' #short novel, Frankenstine
text_to_open_short_factual = './Data/Pickle/elvis.pkl'  #longest wikipedia article, Elvis.

with open(text_to_open_short, 'rb') as file:
    #note that here, we're loading a single text, but the examples below require each text to be in a list.
    doc = pickle.load(file)

In [86]:
text_to_open_short = './Data/Pickle/hills.pkl'  #2-3 page story, Hills like White Elephants
with open(text_to_open_short, 'rb') as file:
    #note that here, we're loading a single text, but the examples below require each text to be in a list.
    doc = pickle.load(file)

stuffSummary = stuff_it_summary(llm, doc)
print(stuffSummary)

In a small, sun-drenched train station between Barcelona and Madrid, an American man and a girl named Jig sit at a table in the shade, waiting for their train. They order drinks and engage in a seemingly casual conversation that gradually reveals underlying tensions. Jig comments on the distant hills, likening them to white elephants, which leads to a discussion about trying new drinks and the simplicity of life. However, their dialogue soon shifts to a more serious and ambiguous topic, hinting at an "operation" the man wants Jig to undergo, which is implied to be an abortion. The man tries to reassure her that everything will be fine afterward, but Jig expresses doubts and a sense of loss. Despite his insistence that he only wants her to be happy, Jig feels conflicted and ultimately asks him to stop talking. As the train's arrival approaches, the man moves their bags, and upon returning, finds Jig smiling and claiming to feel fine, though the tension between them remains unresolved.


In [87]:
print(map_reduce_summary(doc, DEBUG=True))

Text was split into 2 docs


TypeError: unsupported operand type(s) for +=: 'NoneType' and 'NoneType'

In [16]:
prompt_options = {}
prompt_options['prompt_type'] = "summary"
prompt_options['format_type'] = "narrative"
prompt_options['manual_guidance'] = ""
prompt_options['style_guide'] = ""

revised_summary = generate_single_doc_summary(doc, prompt_options, AUTO_REFINE=True, DEBUG=True)
print ("Final Summary:")
print (revised_summary)

Generating a new combined summary for 4 chunks.


  worker.setDaemon(True)    #setting threads as "daemon" allows main program to


Partial summary chunks done!
Creating joint summary...
Chunk summaries token count: 1523
Final merge of summary chunks, merging 4 summaries.
First summary:
In the valley of the Ebro, characterized by long, white hills devoid of shade and trees, a train station sits exposed to the sun. The station building offers a warm shadow, and a bamboo bead curtain hangs across the open door of the bar to keep out flies. At a table in the shade outside the station building, an American man and a girl named Jig are waiting for the express train from Barcelona, which will arrive in forty minutes and stop for two minutes before continuing to Madrid. The scene is hot, and the atmosphere is tense.

The girl asks what they should drink, and the man suggests beer. They order two large beers, which a woman from the bar brings to their table along with felt pads. The girl observes the white hills and comments that they look like white elephants, leading to a brief exchange where the man dismisses her observ

In [17]:
with open(text_to_open_long, 'rb') as file:
    #note that here, we're loading a single text, but the examples below require each text to be in a list.
    longdoc = pickle.load(file)

In [18]:
prompt_options = {}
prompt_options['prompt_type'] = "summary"
prompt_options['format_type'] = "narrative"
prompt_options['manual_guidance'] = ""
prompt_options['style_guide'] = ""

revised_summary = generate_single_doc_summary(longdoc, prompt_options, AUTO_REFINE=True, DEBUG=True)
print ("Final Summary:")
print (revised_summary)

Generating a new combined summary for 38 chunks.


  worker.setDaemon(True)    #setting threads as "daemon" allows main program to


Partial summary chunks done!
Creating joint summary...
Chunk summaries token count: 13145
Starting reduction loop to merge chunks.  Total token count is 13145
Reducing 8 partial summaries into one...
Reducing 9 partial summaries into one...
Reducing 8 partial summaries into one...
Reducing 8 partial summaries into one...
Starting final reduction of 5 partial summaries into one...
Final merge of summary chunks, merging 5 summaries.
First summary:
In the late 17th century, R. Walton embarks on an ambitious expedition towards the North Pole, driven by childhood dreams of exploration and scientific discovery. Writing to his sister, Margaret Saville, he describes his arrival in St. Petersburgh, invigorated by the cold northern breeze and filled with optimism about uncovering the secrets of the magnet and making celestial observations. Despite enduring six years of preparation and hardships, Walton remains resolute, though he laments the absence of a friend to share his journey.

Meanwhile, 

In [19]:
with open("./Data/AOAIFAQ.txt", 'rb') as file:
    # Read the file as a text string
    aoaifaq = file.read().decode('utf-8')

In [20]:
prompt_options = {}
prompt_options['prompt_type'] = "summary"
prompt_options['format_type'] = "narrative"
prompt_options['manual_guidance'] = ""
prompt_options['style_guide'] = ""

revised_summary = generate_single_doc_summary(aoaifaq, prompt_options, AUTO_REFINE=True, DEBUG=True)
print ("Final Summary:")
print (revised_summary)

Generating a new combined summary for 7 chunks.


  worker.setDaemon(True)    #setting threads as "daemon" allows main program to


Partial summary chunks done!
Creating joint summary...
Chunk summaries token count: 2305
Final merge of summary chunks, merging 7 summaries.
First summary:
The Azure OpenAI Service, detailed in the document titled "Azure OpenAI Service – Customer Conversation Guidance & FAQ" updated on May 23, 2023, encompasses a range of features and announcements, particularly those made at Build 2023. One of the key highlights is the introduction of Provisioned Throughput, which allows customers to reserve and deploy Azure OpenAI model processing capacity for high-volume workloads. This feature is quantified in Provisioned Throughput Units (PTUs), with specific requirements for different models: GPT-3.5 Turbo requires a minimum of 300 PTUs, GPT-4 8k requires 900 PTUs, and GPT-4 32k requires 1800 PTUs. These units can be reserved in increments and committed for either one month or one year. The benefits of this system include cost savings, reserved processing capacity, consistent performance, and fle

In [73]:
prospectusSummaryIndexName = 'summary'
selectedTopics = ['Growth Strategy', 'Positive Outlook']
summaryPromptTemplate = ''
temperature = 0.3
tokenLength = 2500
fileName = ''
topK = 3
SearchService = os.getenv("SearchService")
SearchKey = os.getenv("SearchKey")
indexNs = "1eac4c6dced74a6cb3657466ecbfc1d6"
fileName = "actuary-gpt-applications-of-large-language-models-to-insurance-and-actuarial-work.pdf"

In [22]:
if summaryPromptTemplate == '':
        summaryPromptTemplate = """You are an AI assistant tasked with summarizing documents from large documents that contains information about Initial Public Offerings. 
        IPO document contains sections with information about the company, its business, strategies, risk, management structure, financial, and other information.
        Your summary should accurately capture the key information in the document while avoiding the omission of any domain-specific words. 
        Please generate a concise and comprehensive summary that includes details. 
        Ensure that the summary is easy to understand and provides an accurate representation. 
        Begin the summary with a brief introduction, followed by the main points.
        Generate the summary with minimum of 7 paragraphs and maximum of 10 paragraphs.
        Please remember to use clear language and maintain the integrity of the original information without missing any important details:
        {text}

        """

In [23]:
llm = AzureChatOpenAI(
                azure_endpoint=os.getenv("OpenAiEndPoint"),
                api_version=os.getenv("OpenAiVersion"),
                azure_deployment=os.getenv("OpenAiChat4o"),
                temperature=0,
                api_key=os.getenv("OpenAiKey"),
                openai_api_type="azure",
                max_tokens=2000)
embeddings = AzureOpenAIEmbeddings(azure_endpoint=os.getenv("OpenAiEndPoint"), 
                                   azure_deployment=os.getenv("OpenAiEmbedding"), api_key=os.getenv("OpenAiKey"), openai_api_type="azure")

In [24]:
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import *
from azure.search.documents import SearchClient
from azure.core.credentials import AzureKeyCredential
import os
import logging
from azure.search.documents.models import QueryType
from azure.search.documents.indexes.models import (  
    SearchIndex,  
    SearchField,  
    SearchFieldDataType,  
    SimpleField,  
    SearchableField,  
    SearchIndex,  
    SemanticConfiguration,  
    SemanticField,  
    SearchField,  
    SemanticPrioritizedFields,
    VectorSearch,  
    HnswAlgorithmConfiguration,  
)
from tenacity import retry, wait_random_exponential, stop_after_attempt  
import openai
from openai import OpenAI, AzureOpenAI
from azure.search.documents.models import VectorizedQuery

In [37]:
def createProspectusSummary(SearchService, SearchKey, indexName):

    indexClient = SearchIndexClient(endpoint=f"https://{SearchService}.search.windows.net/",
            credential=AzureKeyCredential(SearchKey))
    if indexName not in indexClient.list_index_names():
        index = SearchIndex(
            name=indexName,
            fields=[
                            SimpleField(name="id", type=SearchFieldDataType.String, key=True),
                        SearchableField(name="fileName", type=SearchFieldDataType.String, sortable=True,
                                        searchable=True, retrievable=True, filterable=True, facetable=True, analyzer_name="en.microsoft"),
                        SearchableField(name="docType", type=SearchFieldDataType.String, sortable=True,
                                        searchable=True, retrievable=True, filterable=True, facetable=True, analyzer_name="en.microsoft"),
                        SearchableField(name="topic", type=SearchFieldDataType.String, sortable=True,
                                        searchable=True, retrievable=True, filterable=True, facetable=True, analyzer_name="en.microsoft"),
                        SimpleField(name="summary", type="Edm.String", retrievable=True),
            ],
            semantic_search = SemanticSearch(configurations=[SemanticConfiguration(
                name="semanticConfig",
                prioritized_fields=SemanticPrioritizedFields(
                    title_field=SemanticField(field_name="docType"),
                    keywords_fields=[SemanticField(field_name="topic")],
                    content_fields=[SemanticField(field_name="summary")]
                )
            )])
        )

        try:
            print(f"Creating {indexName} search index")
            indexClient.create_index(index)
        except Exception as e:
            print(e)
    else:
        print(f"Search index {indexName} already exists")

In [40]:
def findSummaryInIndex(SearchService, SearchKey, indexName, fileName, docType, returnFields=["id", "fileName", "docType", 'topic', "summary"]):
    searchClient = SearchClient(endpoint=f"https://{SearchService}.search.windows.net",
        index_name=indexName,
        credential=AzureKeyCredential(SearchKey))
    
    try:
        r = searchClient.search(
            search_text="",
            filter="fileName eq '" + fileName + "' and docType eq '" + docType + "'",
            select=returnFields,
            semantic_configuration_name="semanticConfig",
            include_total_count=True
        )
        return r
    except Exception as e:
        print(e)

    return None

In [41]:
def findTopicSummaryInIndex(SearchService, SearchKey, indexName, fileName, docType, topic, returnFields=["id", "fileName", "docType", 'topic', "summary"]):
    searchClient = SearchClient(endpoint=f"https://{SearchService}.search.windows.net",
        index_name=indexName,
        credential=AzureKeyCredential(SearchKey))
    
    try:
        r = searchClient.search(
            search_text="",
            filter="fileName eq '" + fileName + "' and docType eq '" + docType + "' and topic eq '" + topic + "'",
            select=returnFields,
            semantic_configuration_name="semanticConfig",
            include_total_count=True
        )
        return r
    except Exception as e:
        print(e)

    return None

In [42]:
import openai
from tenacity import retry, wait_random_exponential, stop_after_attempt  
from openai import OpenAI, AzureOpenAI

@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
# Function to generate embeddings for title and content fields, also used for query embeddings
def generateEmbeddings(embeddingModelType, text):
    if (embeddingModelType == 'azureopenai'):
        try:
            client = AzureOpenAI(
                        api_key = os.getenv('OpenAiKey'),  
                        api_version = os.getenv('OpenAiVersion'),
                        azure_endpoint = os.getenv('OpenAiEndPoint')
                        )

            response = client.embeddings.create(
                input=text, model=os.getenv('OpenAiEmbedding'))
            embeddings = response.data[0].embedding
        except Exception as e:
            logging.info(e)
        
    return embeddings

In [64]:
def performCogSearch(indexType, embeddingModelType, question, indexName, k, returnFields=["id", "content", "metadata"] ):
    searchClient = SearchClient(endpoint=f"https://{SearchService}.search.windows.net",
        index_name=indexName,
        credential=AzureKeyCredential(SearchKey))
    try:
        if indexType == "cogsearchvs":
            r = searchClient.search(  
                search_text=question,
                vector_queries=[VectorizedQuery(vector=generateEmbeddings(embeddingModelType, question), k_nearest_neighbors=k, fields="content_vector")],  
                select=returnFields,
                query_type="semantic", 
                semantic_configuration_name='mySemanticConfig', 
                query_caption="extractive", 
                query_answer="extractive",
                include_total_count=True,
                top=k
            )
        elif indexType == "cogsearch":
            #r = searchClient.search(question, filter=None, top=k)
            try:
                r = searchClient.search(question, 
                                    filter=None,
                                    query_type=QueryType.SEMANTIC, 
                                    query_speller="lexicon", 
                                    semantic_configuration_name="mySemanticConfig", 
                                    top=k, 
                                    query_caption="extractive|highlight-false")
            except Exception as e:
                 r = searchClient.search(question, 
                                filter=None,
                                query_type=QueryType.SEMANTIC, 
                                query_language="en-us", 
                                query_speller="lexicon", 
                                semantic_configuration_name="default", 
                                top=k, 
                                query_caption="extractive|highlight-false")
        return r
    except Exception as e:
        logging.info(e)

    return None

In [78]:
def summarizeTopic(llm, query, embeddingModelType, indexNs, indexType, topK):
    if indexType == 'cogsearchvs':
        r = performCogSearch(indexType, embeddingModelType, query, indexNs, topK, returnFields=["id", "content", "metadata"] )          
        if r == None:
            resultsDoc = [Document(page_content="No results found")]
        else :
            resultsDoc = [
                    Document(page_content=doc['content'], metadata={"id": doc['id']})
                    for doc in r
                    ]
        logging.info(f"Found {len(resultsDoc)} Cog Search results")

    docContent = ' '.join([doc.page_content for doc in resultsDoc])
    
    if len(docContent) == 0:
        return "I don't know"
    else:
        stuffSummary = stuff_it_summary(llm, docContent)
        return stuffSummary 

In [79]:
def mergeDocs(SearchService, SearchKey, indexName, docs):
    logging.info("Total docs: " + str(len(docs)))
    searchClient = SearchClient(endpoint=f"https://{SearchService}.search.windows.net/",
                                    index_name=indexName,
                                    credential=AzureKeyCredential(SearchKey))
    i = 0
    batch = []
    for s in docs:
        batch.append(s)
        i += 1
        if i % 1000 == 0:
            results = searchClient.merge_or_upload_documents(documents=batch)
            succeeded = sum([1 for r in results if r.succeeded])
            logging.info(f"\tIndexed {len(results)} sections, {succeeded} succeeded")
            batch = []

    if len(batch) > 0:
        results = searchClient.merge_or_upload_documents(documents=batch)
        succeeded = sum([1 for r in results if r.succeeded])
        logging.info(f"\tIndexed {len(results)} sections, {succeeded} succeeded")

In [82]:
import uuid

def processTopicSummary(llm, fileName, indexNs, indexType, prospectusSummaryIndexName, embeddings, embeddingModelType, selectedTopics, 
                        summaryPromptTemplate, topK, existingSummary):

    createProspectusSummary(SearchService, SearchKey, prospectusSummaryIndexName)
    topicSummary = []
    print(f"Existing Summary: {existingSummary}")
    if existingSummary == "true":
        logging.info(f"Found existing summary")
        r = findSummaryInIndex(SearchService, SearchKey, prospectusSummaryIndexName, fileName, 'prospectus')
        for s in r:
            topicSummary.append(
                {
                    'id' : s['id'],
                    'fileName': s['fileName'],
                    'docType': s['docType'],
                    'topic': s['topic'],
                    'summary': s['summary']
                })
    else:
        for topic in selectedTopics:
            r = findTopicSummaryInIndex(SearchService, SearchKey, prospectusSummaryIndexName, fileName, 'prospectus', topic)
            if r.get_count() == 0:
                logging.info(f"Summarize on Topic: {topic}")
                answer = summarizeTopic(llm, topic, embeddingModelType, indexNs, indexType, topK)
                if "I don't know" not in answer:
                    topicSummary.append({
                        'id' : str(uuid.uuid4()),
                        'fileName': fileName,
                        'docType': 'prospectus',
                        'topic': topic,
                        'summary': answer
                })
            else:
                for s in r:
                    topicSummary.append(
                        {
                            'id' : s['id'],
                            'fileName': s['fileName'],
                            'docType': s['docType'],
                            'topic': s['topic'],
                            'summary': s['summary']
                        })
        mergeDocs(SearchService, SearchKey, prospectusSummaryIndexName, topicSummary)
    return topicSummary

In [83]:
summaryTopicData = processTopicSummary(llm, fileName, "1eac4c6dced74a6cb3657466ecbfc1d6", "cogsearchvs",
                                       prospectusSummaryIndexName, embeddings, "azureopenai", 
                            selectedTopics, summaryPromptTemplate, topK, "False")

Search index summary already exists
Existing Summary: False
