# YT video summary playground
Playground for testing YT english video summary generation

In [168]:
import os
from dotenv import load_dotenv
from langchain_community.document_loaders import YoutubeLoader
from langchain_openai import ChatOpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.summarize import load_summarize_chain

load_dotenv()
openai_key = os.getenv("OPENAI_API_KEY")

# url = "https://youtu.be/ThnVAgHzsLg?si=4s8wBcvXrfDPEiRn"
url = "https://www.youtube.com/watch?v=Hkgz1ysv9Fk"
# url = "https://www.youtube.com/watch?v=f9_BWhCI4Zo"
# url = "https://www.youtube.com/watch?v=8OJC21T2SL4"
loader = YoutubeLoader.from_youtube_url(url, add_video_info=True)
results = loader.load()

llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0, openai_api_key=openai_key)

In [169]:
for document in results:
    text_content = document.page_content

llm.get_num_tokens(text_content)

2308

In [170]:
from langchain import OpenAI
from langchain.chains.summarize import load_summarize_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=500)

docs = text_splitter.create_documents([text_content])

num_docs = len(docs)

num_tokens_first_doc = llm.get_num_tokens(docs[0].page_content)

print (f"Now we have {num_docs} documents and the first one has {num_tokens_first_doc} tokens")

Now we have 2 documents and the first one has 2169 tokens


In [171]:
from langchain import PromptTemplate

map_prompt = """
Write a concise summary of the following:
"{text}"
CONCISE SUMMARY:
"""
map_prompt_template = PromptTemplate(template=map_prompt, input_variables=["text"])

summary_combine_prompt = """"
Write detailed and comprehensive summary of the video transcript text.
The summary should cover the main points and key details of the text.
Return your response in bullet points.
```{text}```
BULLET POINT SUMMARY:
"""
summary_combine_prompt_template = PromptTemplate(template=summary_combine_prompt, input_variables=["text"])

In [172]:
summary_chain = load_summarize_chain(llm=llm,
                                     chain_type='map_reduce',
                                     map_prompt=map_prompt_template,
                                     combine_prompt=summary_combine_prompt_template,
#                                      verbose=True
                                    )

In [173]:
output = summary_chain.run(docs)


In [None]:
print (output)