In [None]:
!pip3 install langchain


In [None]:
%load_ext dotenv
%dotenv

import os

API_KEY = os.environ.get("OPENAI_TOKEN")

In [None]:
from langchain import OpenAI

llm = OpenAI(temperature=0, openai_api_key=API_KEY)

prompt = """
Please provide a summary of the following text

TEXT:

Philosophy (from Greek: φιλοσοφία, philosophia, 'love of wisdom') \
is the systematized study of general and fundamental questions, \
such as those about existence, reason, knowledge, values, mind, and language. \
Some sources claim the term was coined by Pythagoras (c. 570 – c. 495 BCE), \
although this theory is disputed by some. Philosophical methods include questioning, \
critical discussion, rational argument, and systematic presentation.
"""

num_tokens = llm.get_num_tokens(prompt)
print(f"Our prompt has {num_tokens} tokens")

In [None]:
output = llm(prompt)
print(output)

OK, now we have basic summarization working. Now let's use map reduce for a full board transcript.

In [None]:
from langchain.chains.summarize import load_summarize_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter

board_transcript = './private/2023_q2_board_transcript.txt'

with open(board_transcript, 'r') as f:
    transcript = f.read()

llm.get_num_tokens(transcript)

text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n"], chunk_size=10000, chunk_overlap=500)

docs = text_splitter.create_documents([transcript])

In [None]:
num_docs = len(docs)

num_tokens_first_doc = llm.get_num_tokens(docs[0].page_content)

print(f"Our first document has {num_tokens_first_doc} tokens and we have {num_docs} documents in total")

In [None]:
summary_chain = load_summarize_chain(llm=llm, chain_type='map_reduce', verbose=True)

In [None]:
output = summary_chain.run(docs)

In [None]:
output

Output quality is not quite what I want. Now to customize the prompts.

In [None]:
from langchain import PromptTemplate

map_prompt = """
Write a summary of the following board meeting transcript. Focus on key, specific points and keep it concise:
"{text}"
SUMMARY BROKEN UP IN SECTIONS:
"""
map_prompt_template = PromptTemplate(template=map_prompt, input_variables=['text'])

In [None]:
combine_prompt = """
Create a summary (in bullet points) that summarizes the entire contents of the meeting.
Return your response in Markdown.
```{text}```
BOARD MINUTES MARKDOWN:
"""
combine_prompt_template = PromptTemplate(template=combine_prompt, input_variables=['text'])

In [None]:
summary_chain = load_summarize_chain(llm=llm,
                                     chain_type='map_reduce',
                                     map_prompt=map_prompt_template,
                                     combine_prompt=combine_prompt_template,
                                     verbose=True)

In [None]:
output = summary_chain.run(docs)

In [None]:
print(output)