# Imports

In [None]:
# !pip install openai
# !pip install langchain
# !pip install pinecone-client
# !pip install tiktoken yt_dlp pydub librosa
# !pip install python-dotenv
# !pip install youtube-transcript-api

In [None]:
from dotenv import load_dotenv,find_dotenv
load_dotenv(find_dotenv())

True

In [None]:
import os
from urllib.parse import urlparse, parse_qs

# Getting YT url to text

## Parsing the URL

In [None]:
YT_URL = "https://www.youtube.com/watch?v=tLS6t3FVOTI"
save_dir = 'downloads'

if os.path.exists(save_dir):
    print(f"Directory: {save_dir} exists")
else:
    os.mkdir(save_dir)

Directory: downloads exists


In [None]:
parsed_url = urlparse(YT_URL)
parsed_url

ParseResult(scheme='https', netloc='www.youtube.com', path='/watch', params='', query='v=tLS6t3FVOTI', fragment='')

In [None]:
parsed_query = parse_qs(parsed_url.query)
parsed_query

{'v': ['tLS6t3FVOTI']}

In [None]:
yt_id = parsed_query['v'][0]
yt_id

'tLS6t3FVOTI'

In [None]:
def get_yt_id(url):
    parsed_url = urlparse(url)    
    query = parse_qs(parsed_url.query)
    return query['v'][0]

In [None]:
yt_id = get_yt_id(YT_URL)
yt_id

'tLS6t3FVOTI'

## Getting the transcript

In [None]:
from langchain.document_loaders import YoutubeLoader

loader = YoutubeLoader(yt_id)
docs = loader.load()
docs

[Document(page_content="welcome to the huberman Lab podcast where we discuss science and science-based tools for everyday life I'm Andrew huberman and I'm a professor of neurobiology and Ophthalmology at Stanford school of medicine today we are discussing supplements or more specifically a rational guide to supplementation now to be forthright I want to tell you that I am not a fan of the word supplements because it stems from this idea that all supplements are somehow food supplements or designed to compensate for what one could otherwise get from food and that's simply not the case many supplements are compounds that are extremely efficacious for instance for enhancing sleep or for enhancing hormone function or for enhancing focus and many of those compounds are simply not found in food or are not found in enough abundance in food to have the desired effect now that raises the issue as to whether or not these compounds are good to take safe to take and whether or not they are actuall

In [None]:
# Other way to do this

# from langchain.document_loaders.generic import GenericLoader
# from langchain.document_loaders.parsers import OpenAIWhisperParser
# from langchain.document_loaders.blob_loaders.youtube_audio import YoutubeAudioLoader

# loader_audio = GenericLoader(YoutubeAudioLoader([YT_URL], save_dir), OpenAIWhisperParser())
# docs_openai = loader_audio.load()
# docs_openai

# Splitting the document

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
chunk_docs = text_splitter.split_documents(docs)
len(chunk_docs)

140

In [None]:
chunk_docs[3]

Document(page_content="such as enhanced Focus over the next four to six hours of physical work or mental work so on and so forth plan to cover everything in between that as well and I promise to cover how supplements interact with other things such as behavioral tools prescription drugs when supplements might be a good alternative to prescription drugs when they might not be a good alternative to prescription drugs when supplements can serve as an augment to already excellent nutrition and prescription protocols and every feature of supplements as it relates to mental health physical health and performance by the end of today's episode you should be armed with a number of different questions as I mentioned before that will allow you to develop the most biologically effective and cost-effective supplement regimen for you and of course I want to acknowledge that for some people the total amount or dosages of a given supplement were supplements that you might need to take could be zero th

## Getting Embeddings

In [None]:
from langchain.embeddings import OpenAIEmbeddings

embs = OpenAIEmbeddings()
doc_embs = embs.embed_query(chunk_docs[0].page_content)
doc_embs

[0.014610569464866665,
 0.013398664505071942,
 0.016871890598290296,
 -0.04660758154183375,
 -0.013506991293838422,
 0.0080297200741015,
 0.008957268028291493,
 -0.011150883870998144,
 -0.00566007215191139,
 -0.010920689794115352,
 0.002660775643131059,
 0.01752185133088917,
 -0.023601689207194042,
 0.01887593479348626,
 0.00595458569618175,
 -0.016926054458334838,
 0.05800897070440074,
 -0.006035830554925958,
 0.006946452652098008,
 0.0034698409683808946,
 -0.015138661978026623,
 0.005433263432696707,
 0.006536842069386252,
 -0.008483338443853471,
 -0.002708168555008731,
 0.010243648062816808,
 0.03493537494168926,
 -0.01949881452738547,
 -0.006584234748433272,
 0.0019651149103192163,
 0.016465664441924044,
 0.00042526710522714634,
 -0.006073068092291255,
 -0.014718896253633143,
 0.02060916341374993,
 -0.007366218376491488,
 -0.016438583443224377,
 -0.00017253995440986975,
 0.013974150162940225,
 -0.023886046212798773,
 0.01864574164792607,
 0.0019583444278136485,
 -0.0008640750590501

# Pinecone embeddings database

In [None]:
import pinecone
from langchain.vectorstores import Pinecone

pinecone.init(api_key=os.getenv('PINECONE_API_KEY'), environment=os.getenv('PINECONE_ENV'))

  from tqdm.autonotebook import tqdm


In [None]:
index_name="yt-knowledge-base"

# If the index doesn't exists, create it
if index_name not in pinecone.list_indexes():
    # Dimensions of OpenAI embeddings are 1536
    pinecone.create_index(name=index_name, metric='cosine', dimension=1536)

search = Pinecone.from_documents(chunk_docs, embs, index_name=index_name)

In [None]:
query = "I promise to cover how supplements interact"
similarity = search.similarity_search(query)

print(similarity)

[Document(page_content="such as enhanced Focus over the next four to six hours of physical work or mental work so on and so forth plan to cover everything in between that as well and I promise to cover how supplements interact with other things such as behavioral tools prescription drugs when supplements might be a good alternative to prescription drugs when they might not be a good alternative to prescription drugs when supplements can serve as an augment to already excellent nutrition and prescription protocols and every feature of supplements as it relates to mental health physical health and performance by the end of today's episode you should be armed with a number of different questions as I mentioned before that will allow you to develop the most biologically effective and cost-effective supplement regimen for you and of course I want to acknowledge that for some people the total amount or dosages of a given supplement were supplements that you might need to take could be zero t

# Language model & QA Chain

In [None]:
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(temperature=0.8, model_name='gpt-3.5-turbo')

In [None]:
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain, RetrievalQA

memory = ConversationBufferMemory(llm=llm, memory_key='chat_history', return_messages=True)
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=search.as_retriever(),
    chain_type='stuff'
)
# chain =  ConversationalRetrievalChain.from_llm(llm, retriever=search.as_retriever(), memory=memory)

In [None]:
chain("What Andrew Huberman talks about in the video")

Retrying langchain.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-K6IqOBIu4JLwqWHf3PvwDrBE on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..
Retrying langchain.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-text-embedding-ada-002 in organization org-K6IqOBIu4JLwqWHf3PvwDrBE on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/

{'query': 'What Andrew Huberman talks about in the video',
 'chat_history': [HumanMessage(content='What Andrew Huberman talks about in the video', additional_kwargs={}, example=False),
  AIMessage(content='Andrew Huberman talks about supplements and the benefits they can provide, as well as the partnership with momentous supplements. He also mentions ways to support the podcast, such as subscribing, leaving reviews, and checking out the sponsors mentioned in the episode.', additional_kwargs={}, example=False)],
 'result': 'Andrew Huberman talks about supplements and the benefits they can provide, as well as the partnership with momentous supplements. He also mentions ways to support the podcast, such as subscribing, leaving reviews, and checking out the sponsors mentioned in the episode.'}

In [None]:
qa_chain.run("What are benefits of cold exposure")

'The benefits of cold exposure can include improved immune function, increased metabolism and fat burning, enhanced mood and mental clarity, improved sleep quality, and increased resilience to stress. Cold exposure can also help improve circulation, reduce inflammation, and boost the production of endorphins, which can lead to a sense of well-being. It may also have potential benefits for athletic performance and recovery.'

In [None]:
from langchain.tools import Tool

desc = ("""This tool is the video provided to you by the user.Use this tool to answer user questions
        using video by Aandrew Huberman. If the user asks about the video or podcast use this tool to get the answer.
        This tool also can be used for follow up questions by the user. When user asks about Andrew Huberman use this tool.
        When user wants to know something about or from the video use this tool. This tool is the transcript
        of the video.""")

tools = [Tool(
    name='Video DB',
    func=qa_chain.run,
    description=desc)]

In [None]:
from langchain.agents import initialize_agent

agent = initialize_agent(
    agent='chat-conversational-react-description',
    tools=tools,
    llm=llm, 
    max_iterations=3, # Limiting how many times the agent can loop through the tool
    early_stopping_method='generate',
    memory=memory,
    verobse=True
)

In [None]:
agent("Hi my name is Dominik, how are you")

{'input': 'Hi my name is Dominik, how are you',
 'chat_history': [HumanMessage(content='Hi my name is Dominik, how are you', additional_kwargs={}, example=False),
  AIMessage(content="Hello Dominik! I am an AI assistant and I'm here to help you. How can I assist you today?", additional_kwargs={}, example=False)],
 'output': "Hello Dominik! I am an AI assistant and I'm here to help you. How can I assist you today?"}

In [None]:
agent("What is this video about")

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo in organization org-K6IqOBIu4JLwqWHf3PvwDrBE on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo in organization org-K6IqOBIu4JLwqWHf3PvwDrBE on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit ht

{'input': 'What is this video about',
 'chat_history': [HumanMessage(content='Hi my name is Dominik, how are you', additional_kwargs={}, example=False),
  AIMessage(content="Hello Dominik! I am an AI assistant and I'm here to help you. How can I assist you today?", additional_kwargs={}, example=False),
  HumanMessage(content='What is this video about', additional_kwargs={}, example=False),
  AIMessage(content="The video is about foundational supplements, which are a category of nutrients and micronutrients that include vitamins, minerals, digestive enzymes, and adaptogens. Adaptogens are substances that are believed to improve the body and brain's ability to handle stress. They are thought to reduce cortisol levels and may also have effects on hormone augmentation. Foundational supplements are designed to support overall health and well-being.", additional_kwargs={}, example=False)],
 'output': "The video is about foundational supplements, which are a category of nutrients and micronut

In [None]:
agent.run("Can you describe this video in 5 bullet points")

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo in organization org-K6IqOBIu4JLwqWHf3PvwDrBE on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo in organization org-K6IqOBIu4JLwqWHf3PvwDrBE on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit ht

'In the video, Andrew Huberman provides a framework for approaching supplementation in various domains of mental and physical health and performance. He emphasizes the importance of maximizing the benefits of supplementation without spending excessive amounts of money and highlights that supplements are not just meant to compensate for deficiencies, but many of them are powerful non-prescription compounds.'

In [None]:
agent.run("What Andrew says about the cold exposure")

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo in organization org-K6IqOBIu4JLwqWHf3PvwDrBE on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo in organization org-K6IqOBIu4JLwqWHf3PvwDrBE on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit ht

RateLimitError: Rate limit reached for default-gpt-3.5-turbo in organization org-K6IqOBIu4JLwqWHf3PvwDrBE on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.

In [None]:
agent.agent.llm_chain.prompt

ChatPromptTemplate(input_variables=['input', 'chat_history', 'agent_scratchpad'], output_parser=None, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], output_parser=None, partial_variables={}, template='Assistant is a large language model trained by OpenAI.\n\nAssistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\n\nAssistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, A

In [None]:
agent.run("What are the benefits of it")

In [None]:
agent.run("What about heat exposure")

In [None]:
agent.run("What is my name")