# Imports

In [None]:
# !pip install openai
# !pip install langchain
# !pip install pinecone-client
# !pip install tiktoken yt_dlp pydub librosa
# !pip install python-dotenv
# !pip install youtube-transcript-api

In [None]:
from dotenv import load_dotenv,find_dotenv
load_dotenv(find_dotenv())

True

In [None]:
import os
from urllib.parse import urlparse, parse_qs

# Getting YT url to text

## Parsing the URL

In [None]:
YT_URL = "https://www.youtube.com/watch?v=tLS6t3FVOTI"
save_dir = 'downloads'

if os.path.exists(save_dir):
    print(f"Directory: {save_dir} exists")
else:
    os.mkdir(save_dir)

Directory: downloads exists


In [None]:
parsed_url = urlparse(YT_URL)
parsed_url

ParseResult(scheme='https', netloc='www.youtube.com', path='/watch', params='', query='v=tLS6t3FVOTI', fragment='')

In [None]:
parsed_query = parse_qs(parsed_url.query)
parsed_query

{'v': ['tLS6t3FVOTI']}

In [None]:
yt_id = parsed_query['v'][0]
yt_id

'tLS6t3FVOTI'

In [None]:
def get_yt_id(url):
    parsed_url = urlparse(url)    
    query = parse_qs(parsed_url.query)
    return query['v'][0]

In [None]:
yt_id = get_yt_id(YT_URL)
yt_id

'tLS6t3FVOTI'

## Getting the transcript

In [None]:
from langchain.document_loaders import YoutubeLoader

loader = YoutubeLoader(yt_id)
docs = loader.load()
docs

[Document(page_content="welcome to the huberman Lab podcast where we discuss science and science-based tools for everyday life I'm Andrew huberman and I'm a professor of neurobiology and Ophthalmology at Stanford school of medicine today we are discussing supplements or more specifically a rational guide to supplementation now to be forthright I want to tell you that I am not a fan of the word supplements because it stems from this idea that all supplements are somehow food supplements or designed to compensate for what one could otherwise get from food and that's simply not the case many supplements are compounds that are extremely efficacious for instance for enhancing sleep or for enhancing hormone function or for enhancing focus and many of those compounds are simply not found in food or are not found in enough abundance in food to have the desired effect now that raises the issue as to whether or not these compounds are good to take safe to take and whether or not they are actuall

In [None]:
# Other way to do this, first getting the audiofile and then the transcript using OpenAIWhispererParser API

# from langchain.document_loaders.generic import GenericLoader
# from langchain.document_loaders.parsers import OpenAIWhisperParser
# from langchain.document_loaders.blob_loaders.youtube_audio import YoutubeAudioLoader

# loader_audio = GenericLoader(YoutubeAudioLoader([YT_URL], save_dir), OpenAIWhisperParser())
# docs_openai = loader_audio.load()
# docs_openai

# Splitting the document

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
chunk_docs = text_splitter.split_documents(docs)
len(chunk_docs)

140

In [None]:
chunk_docs[3]

Document(page_content="such as enhanced Focus over the next four to six hours of physical work or mental work so on and so forth plan to cover everything in between that as well and I promise to cover how supplements interact with other things such as behavioral tools prescription drugs when supplements might be a good alternative to prescription drugs when they might not be a good alternative to prescription drugs when supplements can serve as an augment to already excellent nutrition and prescription protocols and every feature of supplements as it relates to mental health physical health and performance by the end of today's episode you should be armed with a number of different questions as I mentioned before that will allow you to develop the most biologically effective and cost-effective supplement regimen for you and of course I want to acknowledge that for some people the total amount or dosages of a given supplement were supplements that you might need to take could be zero th

## Getting Embeddings

In [None]:
from langchain.embeddings import OpenAIEmbeddings

embs = OpenAIEmbeddings()
doc_embs = embs.embed_query(chunk_docs[0].page_content)
doc_embs

[0.014585338048331753,
 0.013407134861614945,
 0.016887574042553692,
 -0.04658640691274868,
 -0.013576416714574808,
 0.00803073892752574,
 0.0089651749421287,
 -0.011064271781476202,
 -0.005708189390345399,
 -0.010983016678319989,
 0.0026560347409451397,
 0.017496988713209195,
 -0.02363176902493927,
 0.018905414847422374,
 0.005891014070938831,
 -0.01687403074925882,
 0.05807050122859123,
 -0.006192336048604167,
 0.0070353604214023644,
 0.0034939806115884113,
 -0.015113497615831044,
 0.00535946821445452,
 0.006547827939819878,
 -0.008497956469165919,
 -0.0026966622925232464,
 0.010197548508057186,
 0.034939807978529316,
 -0.019501288087428213,
 -0.006541056758833744,
 0.0019501287621766913,
 0.016359413543731795,
 0.00035485739858849016,
 -0.006067067104884828,
 -0.01474784825464418,
 0.02066594798085015,
 -0.007407780497914062,
 -0.016454211008860277,
 -0.00018430578912265516,
 0.014070720842804729,
 -0.02390261998967505,
 0.018634563882686594,
 0.0019602857664865435,
 -0.000861645411

# Pinecone embeddings database

In [None]:
import pinecone
from langchain.vectorstores import Pinecone

pinecone.init(api_key=os.getenv('PINECONE_API_KEY'), environment=os.getenv('PINECONE_ENV'))

  from tqdm.autonotebook import tqdm


In [None]:
index_name="yt-knowledge-base"

# If the index doesn't exists, create it
if index_name not in pinecone.list_indexes():
    # Dimensions of OpenAI embeddings are 1536
    pinecone.create_index(name=index_name, metric='cosine', dimension=1536)

search = Pinecone.from_documents(chunk_docs, embs, index_name=index_name)

In [None]:
query = "I promise to cover how supplements interact"
similarity = search.similarity_search(query)

print(similarity)

[Document(page_content="such as enhanced Focus over the next four to six hours of physical work or mental work so on and so forth plan to cover everything in between that as well and I promise to cover how supplements interact with other things such as behavioral tools prescription drugs when supplements might be a good alternative to prescription drugs when they might not be a good alternative to prescription drugs when supplements can serve as an augment to already excellent nutrition and prescription protocols and every feature of supplements as it relates to mental health physical health and performance by the end of today's episode you should be armed with a number of different questions as I mentioned before that will allow you to develop the most biologically effective and cost-effective supplement regimen for you and of course I want to acknowledge that for some people the total amount or dosages of a given supplement were supplements that you might need to take could be zero t

# Language model & QA Chain

In [None]:
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(temperature=0.7, model_name='gpt-3.5-turbo')

In [None]:
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain, RetrievalQA

memory = ConversationBufferMemory(llm=llm, memory_key='chat_history', return_messages=True)
# qa_chain = RetrievalQA.from_chain_type(
#     llm=llm,
#     retriever=search.as_retriever(),
#     chain_type='stuff'
# )
qa_chain =  ConversationalRetrievalChain.from_llm(llm, retriever=search.as_retriever(), memory=memory)

In [None]:
qa_chain.memory.chat_memory

ChatMessageHistory(messages=[])

In [None]:
qa_chain("What is this video about")

{'question': 'What is this video about',
 'chat_history': [HumanMessage(content='What is this video about', additional_kwargs={}, example=False),
  AIMessage(content='This video is about supplementation and how to develop a rational and effective supplementation protocol. It also mentions topics such as deliberate cold exposure and heat exposure.', additional_kwargs={}, example=False)],
 'answer': 'This video is about supplementation and how to develop a rational and effective supplementation protocol. It also mentions topics such as deliberate cold exposure and heat exposure.'}

In [None]:
qa_chain("What is the capital of Poland")

{'question': 'What is the capital of Poland',
 'chat_history': [HumanMessage(content='What is this video about', additional_kwargs={}, example=False),
  AIMessage(content='This video is about supplementation and how to develop a rational and effective supplementation protocol. It also mentions topics such as deliberate cold exposure and heat exposure.', additional_kwargs={}, example=False),
  HumanMessage(content='What is the capital of Poland', additional_kwargs={}, example=False),
  AIMessage(content="I don't know the answer to that question.", additional_kwargs={}, example=False)],
 'answer': "I don't know the answer to that question."}

In [None]:
qa_chain.run("What the host says about cold exposure")

"The host mentions that they will be discussing cold exposure as part of today's discussion on supplementation."

In [None]:
qa_chain.run("And heat exposure")

"The host mentions heat exposure as one of the topics that they discuss in their newsletters and in today's discussion. However, the specific details about heat exposure are not mentioned in the provided context."

In [None]:
qa_chain.run("What this video says about adaptogens")

"The video mentions that most people don't know what an adaptogen is and that many people talking about adaptogens don't define what they are or their purpose. It also states that adaptogens are used for other purposes as well. The video emphasizes the importance of learning to think about supplementation in the same way as nutrition or exercise and developing optimal protocols for oneself. Additionally, it suggests that the ideal dosage of a supplement for an individual may be zero milligrams, depending on factors like sleep quality and overall well-being."

In [None]:
qa_chain.run("Mention benefits of cold exposure")

'The benefits of cold exposure can include increased metabolism, improved immune function, enhanced circulation, increased production of brown fat, improved mood, and increased mental alertness. However, it is important to note that individual responses to cold exposure can vary, and it is recommended to consult with a healthcare professional before starting any new cold exposure practices.'

# Fully conversational model (not a part of the task)

In [None]:
from langchain.tools import Tool

# Providing description so the model knows when to use this tool
desc = ("""This tool is the video of the podcast episode provided to you by the user.Use this tool to answer user questions
    using video of the podcast episode by Aandrew Huberman. If the user asks about the video or podcast episode use this tool to get the answer.
    This tool also can be used for follow up questions by the user. When user asks about Andrew Huberman use this tool.
    This tool is the transcript of the video.""")

tools = [Tool(
    name='Knowledge Base',
    func=qa_chain.run,
    description=desc)]

In [None]:
from langchain.agents import initialize_agent

# Initializing conversational agent
agent = initialize_agent(
    agent='chat-conversational-react-description',
    tools=tools,
    llm=llm, 
    max_iterations=3, # Limiting how many times the agent can loop through the tool
    early_stopping_method='generate',
    memory=memory,
    verobse=True
)

In [None]:
agent.run("Hi my name is Dominik, how are you")

"Hello Dominik! I'm an AI language model, so I don't have feelings, but I'm here to help you. How can I assist you today?"

In [None]:
agent.run("What is this video about")

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo in organization org-K6IqOBIu4JLwqWHf3PvwDrBE on requests per day. Limit: 2000 / day. Please try again in 43.2s. Contact us through our help center at help.openai.com if you continue to have issues..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo in organization org-K6IqOBIu4JLwqWHf3PvwDrBE on requests per day. Limit: 2000 / day. Please try again in 43.2s. Contact us through our help center at help.openai.com if you continue to have issues..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo in organization org-K6IqOB

RateLimitError: Rate limit reached for default-gpt-3.5-turbo in organization org-K6IqOBIu4JLwqWHf3PvwDrBE on requests per day. Limit: 2000 / day. Please try again in 43.2s. Contact us through our help center at help.openai.com if you continue to have issues.

In [None]:
agent.run("What is the capital of poland")

In [None]:
agent.run("What is this podcast episode about")

In [None]:
agent.run("What are the benefits of cold exposure")

In [None]:
agent.run("What about heat exposure")

In [None]:
agent.run("What is my name")