### LangChain - Summarization

In [1]:
import os

from dotenv import load_dotenv
from langchain.chains.summarize import load_summarize_chain
from langchain_openai import ChatOpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [2]:
load_dotenv()

openai_api_key = os.environ["OPENAI_API_KEY"]
temperature = 1.0
max_tokens = 512
model_name = "gpt-3.5-turbo-0125"

llm = ChatOpenAI(
    model=model_name,
    temperature=temperature,
    max_tokens=max_tokens,
    openai_api_key=openai_api_key
)

In [None]:
with open("./good.txt", "r") as file:
    text = file.read()

print(text[:200])

In [None]:
number_of_tokens = llm.get_num_tokens(text)

print(f"There are {number_of_tokens} tokens in the file!")

In [None]:
text_splitter = RecursiveCharacterTextSplitter(
    separators=["\n", "\n\n"],
    chunk_size = 3000,
    chunk_overlap=300
)

documents = text_splitter.create_documents([text])

print(f"Totally {len(text)} character(s) are present in the file!")
print(f"Now, you have {len(documents)} documents instead of 1 piece of text!")

In [6]:
chain = load_summarize_chain(
    llm=llm,
    chain_type="map_reduce",
    verbose=True
)

In [None]:
output = chain.invoke(documents)

In [None]:
print(output)

In [None]:
print(output["output_text"])