# Building a RAG application from scratch using Langchain

# Setting up the model 


In [46]:
import os
from dotenv import load_dotenv

load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

# This is the YouTube video we're going to use.
YOUTUBE_VIDEO = "https://www.youtube.com/watch?v=cdiD-9MMpb0"


In [47]:
from langchain_openai.chat_models import ChatOpenAI

model = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model="gpt-3.5-turbo")

In [48]:
# Checking if the model is working
# model.invoke("who won 2011 cricket world cup")

In [49]:
# The result from the model is an AIMessage instance containing the answer.
# We can extract this answer by chaining the model with an output parser.

from langchain_core.output_parsers import StrOutputParser

parser = StrOutputParser()

chain = model | parser
chain.invoke("who won 2015 cricket world cup")

'Australia won the 2015 Cricket World Cup by defeating New Zealand in the final.'

# Introducing prompt templates

Need to provide the model with some context and the question. Prompt templates are a simple way to define and reuse prompts.

In [50]:
from langchain.prompts import ChatPromptTemplate

template = """
Answer the question based on the context below. If you can't 
answer the question, reply "I don't know".

Context: {context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)
prompt.format(context="Mary's sister is Susana", question="Who is Mary's sister?")

'Human: \nAnswer the question based on the context below. If you can\'t \nanswer the question, reply "I don\'t know".\n\nContext: Mary\'s sister is Susana\n\nQuestion: Who is Mary\'s sister?\n'

In [51]:
chain = prompt | model | parser
chain.invoke({
    "context": "Mary's sister is Susana",
    "question": "Who is Mary's sister?"
})

'Susana'

In [52]:
translation_prompt = ChatPromptTemplate.from_template(
    "Translate {answer} to {language}"
)

In [53]:
from operator import itemgetter

translation_chain = (
    {"answer": chain, "language": itemgetter("language")} | translation_prompt | model | parser
)

translation_chain.invoke(
    {
        "context": "Mary's sister is Susana. She doesn't have any more siblings.",
        "question": "How many sisters does Mary have?",
        "language": "Spanish",
    }
)

'María tiene una hermana, Susana.'

# Transcribing the YouTube Video

I had issues downloading ffmpeg on the computer so the below script didnot work. Downloaded the txt file separatley

In [54]:
# !pip install openai-whisper

In [55]:
# import tempfile
# import whisper
# from pytube import YouTube


# # Let's do this only if we haven't created the transcription file yet.
# if not os.path.exists("transcription.txt"):
#     youtube = YouTube(YOUTUBE_VIDEO)
#     audio = youtube.streams.filter(only_audio=True).first()

#     # Let's load the base model. This is not the most accurate
#     # model but it's fast.
#     whisper_model = whisper.load_model("base")

#     with tempfile.TemporaryDirectory() as tmpdir:
#         file = audio.download(output_path=tmpdir, filename="audio.mp4")
#         transcription = whisper_model.transcribe(file, fp16=False)["text"].strip()

#         with open("transcription.txt", "w") as transcription_file:
#             transcription_file.write(transcription)

In [56]:
with open("transcription.txt") as file:
    transcription = file.read()

transcription[:100]

"I think it's possible that physics has exploits and we should be trying to find them. arranging some"

# Using the entire transcription as context
If we try to invoke the chain using the transcription as context, the model will return an error because the context is too long.

Large Language Models support limitted context sizes. The video we are using is too long for the model to handle, so we need to find a different solution.

In [57]:
try:
    chain.invoke({
        "context": transcription,
        "question": "Is reading papers a good idea?"
    })
except Exception as e:
    print(e)

Error code: 400 - {'error': {'message': "This model's maximum context length is 16385 tokens. However, your messages resulted in 47047 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}


# Splitting the transcription
Since we can't use the entire transcription as the context for the model, a potential solution is to split the transcription into smaller chunks. We can then invoke the model using only the relevant chunks to answer a particular question:

In [58]:
#Let's start by loading the transcription in memory:

from langchain_community.document_loaders import TextLoader

loader = TextLoader("transcription.txt")
text_documents = loader.load()
# text_documents

In [59]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
documents = text_splitter.split_documents(text_documents)

# Finding the relevant chunks
Given a particular question, we need to find the relevant chunks from the transcription to send to the model. Here is where the idea of embeddings comes into play.

An embedding is a mathematical representation of the semantic meaning of a word, sentence, or document. It's a projection of a concept in a high-dimensional space. Embeddings have a simple characteristic: The projection of related concepts will be close to each other, while concepts with different meanings will lie far away. You can use the Cohere's Embed Playground to visualize embeddings in two dimensions.

To provide with the most relevant chunks, we can use the embeddings of the question and the chunks of the transcription to compute the similarity between them. We can then select the chunks with the highest similarity to the question and use them as the context for the model:

In [60]:
# !pip install "langchain[docarray]"
!pip install -U docarray



In [61]:
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import DocArrayInMemorySearch
from langchain_core.runnables import RunnableParallel, RunnablePassthrough

embeddings = OpenAIEmbeddings()

# example of how embedding works
embedded_query = embeddings.embed_query("Who is Mary's sister?")
print(f"Embedding length: {len(embedded_query)}")
print(embedded_query[:10])

Embedding length: 1536
[-0.0013594045786472944, -0.03437049808954927, -0.01142556447128598, 0.0012913952108823416, -0.02616560552048414, 0.009161713858426773, -0.015621817294155089, 0.001822962257550091, -0.01180078783066434, -0.03324482708009158]


# Setting up a Vector Store and Connecting the vector store to the chain
We need an efficient way to store document chunks, their embeddings, and perform similarity searches at scale. To do this, we'll use a vector store.

A vector store is a database of embeddings that specializes in fast similarity searches.

We can use the vector store to find the most relevant chunks from the transcription to send to the model. Here is how we can connect the vector store to the chain:

In [62]:
vectorstore2 = DocArrayInMemorySearch.from_documents(documents, embeddings)

In [63]:
setup = RunnableParallel(context=vectorstore2.as_retriever(), question=RunnablePassthrough())
chain = setup | prompt | model | parser
chain.invoke("What is synthetic intelligence?")

'Synthetic intelligence is described as the next stage of development that involves artificial intelligence systems that are capable of uncovering puzzles in the universe and solving them.'

# OR

In [64]:
chain = (
    {"context": vectorstore2.as_retriever(), "question": RunnablePassthrough()}
    | prompt
    | model
    | parser
)
chain.invoke("What is synthetic intelligence?")

'Synthetic intelligence is described as the next stage of development in the context provided. It is suggested that synthetic AIs will uncover the puzzle of the universe and solve it.'

# Storing vector store on local disk and then using it to query

In [65]:
!pip install faiss-cpu



In [66]:
from langchain.vectorstores import FAISS

In [67]:
# Create a FAISS vector store from documents
vector_store = FAISS.from_documents(documents, embeddings)

In [68]:
# Save the vector store to local disk
faiss_index_path = "local_faiss_index"
vector_store.save_local(faiss_index_path)

In [69]:
loaded_vector_store = FAISS.load_local(faiss_index_path, embeddings, allow_dangerous_deserialization=True)

In [70]:
chain = (
    {"context": loaded_vector_store.as_retriever(), "question": RunnablePassthrough()}
    | prompt
    | model
    | parser
)

chain.invoke("What is Hollywood going to start doing?")

'Hollywood is going to start using AI to generate scenes for movies.'

# OR

In [71]:
setup = RunnableParallel(context=loaded_vector_store.as_retriever(), question=RunnablePassthrough())
chain = setup | prompt | model | parser
chain.invoke("What is synthetic intelligence?")

'Synthetic intelligence is described as the next stage of development in the context provided. It refers to artificial intelligence systems that are created artificially by humans.'