In [1]:
import openai
import os
from dotenv import load_dotenv

In [2]:
from langchain.document_loaders import YoutubeLoader

In [3]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain
from langchain.prompts.chat import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate

import textwrap


In [4]:
load_dotenv('/home/tom/two/envapi/my-env')

True

In [5]:
embeddings= OpenAIEmbeddings()

In [6]:
#creating a db
def creating_db(video_url):
    loader = YoutubeLoader.from_youtube_url(video_url)
    transcript = loader.load()
    
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000, 
        chunk_overlap=100
    )
    
    docs = text_splitter.split_documents(transcript)
    
    '''
    when a user asks a question, this database will be used to perform the similarity search and 
    generate output based on that 
    '''
    db=Chroma.from_documents(docs, embedding=embeddings)
    
    return db

In [23]:
#get response
def get_response(db, query, k=5):
    '''
    gpt-3.5 turbo can handle up to 4097 tokens. Setting the chunksize to 1000 and k to 4 maximizes
    the number of tokens to analyze.
    '''
    
    docs = db.similarity_search(query, k)
    
    docs_page_content = " ".join([d.page_content for d in docs])
    
    chat = ChatOpenAI(temperature=.4)
    
    #tempalte
    template="""
    You are a helpful assistant who can answer question from Youtube videos based on the video's transcript: {docs}
    Only use the factual information from transcript to answer the question.
    Do not try to make up an answer if you dont have the corresponding datato answer. 
    If you feel like you don't have enough information to answer the question, say: "Sorry, I cannot answer that".
    Your answer should be verbose and detailed.
    """
    
    system_message_prompt = SystemMessagePromptTemplate.from_template(template)
    
    #human question prompt
    
    human_template='Answer the following question: {question}'
    
    human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
    
    chat_prompt = ChatPromptTemplate.from_messages(
        [system_message_prompt, human_message_prompt]
    )
    
    #chaining
    
    chain = LLMChain(llm=chat, prompt=chat_prompt, verbose=True)
    response = chain.run(question=query, docs=docs_page_content)
    response = response.replace("\n", "")
    
    return response, docs

In [8]:
video_url = "https://www.youtube.com/watch?v=H39Z_720T5s"
query="what are encoders?"

In [9]:
mydb = creating_db(video_url)

In [13]:
mydb

<langchain.vectorstores.chroma.Chroma at 0x7f408bc7b610>

In [14]:
response, docs = get_response(mydb, query, k=5)

Number of requested results 5 is greater than number of elements in index 3, updating n_results = 3


In [15]:
response

'Encoders are an essential component of the transformer architecture. They are responsible for accepting inputs, such as text, and converting them into numerical representations or embeddings. These embeddings capture the meaning and context of the input text.The encoder utilizes the self-attention mechanism as its main component. Self-attention allows the encoder to focus on different parts of the input text and assign varying levels of importance to each word or token. This mechanism helps capture the relationships and dependencies between different words in the input.The encoder also has a bi-directional property, meaning it considers both the left and right context of each word when generating the embeddings. This allows the encoder to capture a comprehensive understanding of the input text.The numerical representations generated by the encoder can also be referred to as features or embeddings. These embeddings are then passed on to the decoder for further processing and prediction

In [16]:
docs

[Document(page_content='as well. It differs from the encoder due to its\xa0\xa0 uni-directional property, and is traditionally\xa0\nused in an auto-regressive manner. Here too,\xa0\xa0 we recommend you check out the video on decoders\xa0\nespecially to understand how all of this works.\xa0\xa0 Combining the two parts results in what is known\xa0\nas an encoder-decoder, or a sequence-to-sequence\xa0\xa0 transformer. The encoder accepts inputs and\xa0\ncomputes a high-level representation of those\xa0\xa0 inputs. These outputs are then passed to the\xa0\ndecoder. The decoder uses the encoder\'s output\xa0\xa0 alongside other inputs, in order to generate\xa0\na prediction. It then predicts an output,\xa0\xa0 which it will re-use in future iterations,\xa0\nhence the term "auto-regressive".\xa0\xa0 Finally, to get an understanding\xa0\nof the encoder-decoders as a whole,\xa0\xa0 we recommend you check out\xa0\nthe video on encoder-decoders.', metadata={'source': 'H39Z_720T5s'}),
 Document(p

In [18]:
query="how does the encoder work, explain in 3 sentence only?"
response, docs = get_response(mydb, query, k=3)
response

'The encoder in a transformer network accepts textual inputs and converts them into numerical representations called embeddings or features. It utilizes the self-attention mechanism as its main component to capture the relationships between different words in the input. These embeddings are then used to compute a high-level representation of the inputs, which are passed on to the decoder for further processing and prediction.'

In [19]:
docs

[Document(page_content="into two parts. On the left we have the encoder,\xa0\xa0 and on the right, the decoder. These two can\xa0\nbe used together, but they can also be used\xa0\xa0 independently! Let's understand how these work:\xa0\nThe encoder accepts inputs that represent text.\xa0\xa0 It converts this text, these words, into numerical\xa0\nrepresentations. These numerical representations\xa0\xa0 can also be called embeddings, or features. We'll\xa0\nsee that it uses the self-attention mechanism as\xa0\xa0 its main component. We recommend you check out the\xa0\nvideo on encoders especially to understand what is\xa0\xa0 this numerical representation, as well as how it\xa0\nworks. We'll study the self-attention mechanism as\xa0\xa0 well as its bi-directional properties. The decoder\xa0\nis similar to the encoder: it can also accept\xa0\xa0 the same inputs as the encoder: inputs that\xa0\nrepresent text. It uses a similar mechanism as\xa0\xa0 the encoder, which is the masked self-att

In [21]:
def get_answer(query):
    #query="how does the encoder work, explain in 3 sentence only?"
    response, docs = get_response(mydb, query, k=3)
    return response

In [22]:
print(get_answer("what is self attention, anser in 2 sentence only"))

Self-attention is a mechanism used in the transformer architecture that allows a model to weigh the importance of different words in a sentence when encoding or decoding. It calculates the attention score for each word by considering the relationships between all the words in the input sequence, enabling the model to focus on relevant information and capture long-range dependencies.
