# YT video summary playground
Playground for testing YT english video summary generation

In [78]:
import os
from dotenv import load_dotenv
from langchain_community.document_loaders import YoutubeLoader
from langchain_openai import ChatOpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.summarize import load_summarize_chain

load_dotenv()
openai_key = os.getenv("OPENAI_API_KEY")

url = "https://youtu.be/ThnVAgHzsLg?si=4s8wBcvXrfDPEiRn"
# url = "https://www.youtube.com/watch?v=Hkgz1ysv9Fk"
loader = YoutubeLoader.from_youtube_url(url, add_video_info=True)
results = loader.load()

llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0, openai_api_key=openai_key)

# text_splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=1000)
# texts = text_splitter.split_documents(results)

# chain = load_summarize_chain(llm=llm, chain_type="map_reduce", verbose=False)

# summary = chain.run(texts)

# print(summary)

In [79]:
for document in results:
    text_content = document.page_content

llm.get_num_tokens(text_content)

2461

In [80]:
from langchain import OpenAI
from langchain.chains.summarize import load_summarize_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n"], chunk_size=10000, chunk_overlap=500)

docs = text_splitter.create_documents([text_content])

num_docs = len(docs)

num_tokens_first_doc = llm.get_num_tokens(docs[0].page_content)

print (f"Now we have {num_docs} documents and the first one has {num_tokens_first_doc} tokens")

Now we have 1 documents and the first one has 2461 tokens


In [81]:
PROMPT_TEMPLATE = """
Please provide a concise and comprehensive summary of the given text.
The summary should capture the main points and key details of the text while conveying the author's intended meaning accurately.
Please ensure that the summary is well-organized and easy to read, with clear headings and subheadings to guide the reader through each section.
The length of the summary should be appropriate to capture the main points and key details of the text, without including unnecessary information or becoming overly long.”
"""

In [82]:
from langchain import PromptTemplate

map_prompt = """
Write a concise summary of the following:
"{text}"
CONCISE SUMMARY:
"""
map_prompt_template = PromptTemplate(template=map_prompt, input_variables=["text"])

summary_combine_prompt = """
Please provide a detailed and comprehensive summary of the given text.
The summary should capture the main points and key details of the text while conveying the author's intended meaning accurately.
Second part should be a bulletpoint summary of the text.
Example:
In the text, the speaker discusses creating a chatbot that reads AI news from different sources using Gradient AI's hosted retrieval solution. They demonstrate how to build the chatbot in a few lines of code and deploy it on Streamlit for easy access.

- The speaker created a chatbot that reads AI news from different sources
- The chatbot uses Gradient AI's hosted retrieval solution to answer user queries
- The chatbot was built in a few lines of code and deployed on Streamlit for easy access

```{text}```
SUMMARY:
"""
summary_combine_prompt_template = PromptTemplate(template=summary_combine_prompt, input_variables=["text"])

In [83]:
summary_chain = load_summarize_chain(llm=llm,
                                     chain_type='map_reduce',
                                     map_prompt=map_prompt_template,
                                     combine_prompt=summary_combine_prompt_template,
#                                      verbose=True
                                    )

In [84]:
output = summary_chain.run(docs)


In [85]:
print (output)

The Seiko Alpinist SPB121 is a modern iteration of the classic SARB 017, a watch known for its unique design and features. Originally introduced in 1959, the Alpinist collection gained popularity with the SARB 017 in 2006. The SPB121 boasts a 39.5mm case, dual crowns, an internal rotating compass bezel, and a 6R35 movement with a 70-hour power reserve. Despite its unconventional design, the SPB121 offers versatility, durability, and value for less than $1,000. It has become a favorite among watch enthusiasts and is highly regarded in its price range.

BULLETPONT SUMMARY:
- The Seiko Alpinist SPB121 is a successor to the SARB 017, known for its unique design and features
- The SPB121 features a 39.5mm case, dual crowns, internal rotating compass bezel, and a 6R35 movement with a 70-hour power reserve
- Despite its unconventional design, the SPB121 offers versatility, durability, and value for under $1,000
- The SPB121 has gained a strong following among watch enthusiasts and is consider