In [38]:
from langchain.chat_models import ChatOpenAI
from langchain.docstore.document import Document
from langchain import PromptTemplate
from langchain.chains.summarize import load_summarize_chain
from langchain.text_splitter import CharacterTextSplitter
from langchain.callbacks import get_openai_callback

import tiktoken
import os

In [16]:
llm = ChatOpenAI(
        temperature=0,
        openai_api_key="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",
        model_name="gpt-3.5-turbo"
    )

In [87]:
sum_prompt_template = """
                      Write a summary of the text that includes the main points and any important details in paragraph form.
                      {text}
                      """
sum_prompt = PromptTemplate(template=sum_prompt_template, input_variables=["text"])
sum_chain = load_summarize_chain(llm, chain_type="stuff", prompt=sum_prompt)

refine_prompt_template = '''
Your assignment is to expand an existing summary by adding new information that follows it. Here's the current summary up to a specified point:

{existing}

Now, consider the following content which occurs after the existing summary:

{text}

Evaluate the additional content for its relevance and importance in relation to the existing summary. If this new information is significant and directly relates to what has already been summarized, integrate it smoothly into the existing summary to create a comprehensive and cohesive final version. If the additional content doesn't provide substantial value or isn't relevant to the existing summary, simply return the original summary as it is. If the summary is getting too long you can shorten it by removing unnecessary details.

Your final output must only be the comprehensive and cohesive final version of the summary. It should contain no other text, such as reasoning behind the summary.

Summary:
        '''
prompt = PromptTemplate(template=refine_prompt_template, input_variables=["existing", "text"])  

refine_chain = load_summarize_chain(llm, chain_type="stuff", prompt=prompt)

In [42]:
def save_to_file(text, name, iteration):
    # Make sure to replace 'path_to_directory' with the actual path where you want to save the files
    directory = "refine"
    if not os.path.exists(directory):
        os.makedirs(directory)
    filename = f"{name}_{iteration}.txt"
    with open(os.path.join(directory, filename), 'w', encoding='utf-8') as file:
        file.write(text)
    print(f"Saved iteration {iteration} to file")


In [88]:
file_path = 'Gatsby.txt'
with open(file_path, 'r', encoding='utf-8') as file:
    book_text = file.read()

text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=3500, chunk_overlap=0
)

In [86]:
def refine(text):
    texts = text_splitter.split_text(text)
    print("The text was split into " + str(len(texts)) + " chunks.")
    texts_docs = [[Document(page_content=text)] for text in texts]

    cur_summary = ""
    total_tokens = 0
    for num, chunk in enumerate(texts_docs):
        with get_openai_callback() as cb:
            print(f"Processing chunk {num}")
            chunk_sum = sum_chain.run(chunk)
            save_to_file(chunk_sum, "chunk_sum", num)
            input={'existing': cur_summary, 'input_documents': [Document(page_content=chunk_sum)]}
            
            cur_summary = refine_chain.run(input)
            total_tokens += cb.total_tokens
        
        save_to_file(cur_summary, "cur_summary", num)
    with get_openai_callback() as cb:
        final_sumamry = chunk_sum = sum_chain.run([Document(page_content=cur_summary)])
        total_tokens += cb.total_tokens
    save_to_file(final_sumamry, "final_summary", num)
    return final_sumamry, total_tokens

In [89]:
summary, total_tokens = refine(book_text[:60000])
print(f"Total tokens: {total_tokens}")
print(summary)

The text was split into 5 chunks.
Processing chunk 0
Saved iteration 0 to file
Saved iteration 0 to file
Saved iteration 0 to file
Processing chunk 1
Saved iteration 1 to file
Saved iteration 1 to file
Saved iteration 1 to file
Processing chunk 2
Saved iteration 2 to file
Saved iteration 2 to file
Saved iteration 2 to file
Processing chunk 3
Saved iteration 3 to file
Saved iteration 3 to file
Saved iteration 3 to file
Processing chunk 4
Saved iteration 4 to file
Saved iteration 4 to file
Saved iteration 4 to file
Total tokens: 22004
In this passage from F. Scott Fitzgerald's "The Great Gatsby," the narrator, Nick Carraway, reflects on his father's advice about not criticizing others. He introduces himself as a member of a prominent family from the Middle West who has moved to the East and is living in a small house in West Egg. He mentions his cousin Daisy and her husband Tom Buchanan, whom he is going to visit. Nick arrives at their elaborate mansion and meets Daisy and her friend, Jo