### Simple GEN-AI app using Langchain

Load all the environment variables

In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ["LANGSMITH_API_KEY"] = os.getenv("LANGSMITH_API_KEY")
os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_PROJECT"] = os.getenv("LANGSMITH_PROJECT")
os.environ["LANGSMITH_ENDPOINT"] = os.getenv("LANGSMITH_ENDPOINT")

Scrape data from a website

In [2]:
from langchain_community.document_loaders import WebBaseLoader, TextLoader
import bs4

class_label = "beebom-single-content-container"
web_url = "https://beebom.com/valorant-characters-agents-abilities"

loader = WebBaseLoader(web_paths=[web_url],
                bs_kwargs={
                    "parse_only": bs4.SoupStrainer(class_=class_label),
                })
docs = loader.load()

text_loader = TextLoader("data/lore.txt")
text_docs = text_loader.load()

docs.extend(text_docs)

USER_AGENT environment variable not set, consider setting it to identify your requests.


Divide the raw data from the website into chunks

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=2000,
    chunk_overlap=300,
    length_function=len,
)

splitted_docs = text_splitter.split_documents(docs)

Generate embeddings of the documents

In [16]:
from langchain_huggingface import HuggingFaceEmbeddings

EMBEDDING_MODEL = "BAAI/bge-small-en-v1.5"
embedder = HuggingFaceEmbeddings(
    model_name=EMBEDDING_MODEL,
    multi_process=True,
    model_kwargs={
        "device": "cuda",
    },
    encode_kwargs={
        "normalize_embeddings": True,
    }
)

Store the embeddings in the vector database FAISS

In [None]:
from langchain_community.vectorstores import FAISS

vectorstore_db = FAISS.from_documents(
    splitted_docs,
    embedder
)


Query the vectorstore DB

In [18]:
query = "How did the valorant agents get their powers?"
result = vectorstore_db.similarity_search(query, k=3)

Add retrieval chain to the LLM

In [19]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq

prompt = ChatPromptTemplate.from_template(
    """
    You are a helpful assistant. Use the following pieces of context wrapped within <context>..</context> to answer the question. If you can't answer the question based on the given context, just say that you don't know. Do not try to make up an answer.
    <context>
    {context}
    </context>
    Question: {input}
    """
)

llm = ChatGroq(
    model_name="llama-3.3-70b-versatile",
    temperature=0.7
)

retriever = vectorstore_db.as_retriever(
    search_type="similarity",
    search_kwargs={
        "k": 5,
    }
)

# Normal way of making a chain

# document_chain = create_stuff_documents_chain(
#     llm=llm,
#     prompt=prompt)


# retrieval_chain = create_retrieval_chain(retriever,document_chain)

# using LCEL
# import runnable
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser


context_info = RunnableParallel({
    "context": retriever,
    "input": RunnablePassthrough()
})

chain = context_info | prompt | llm | StrOutputParser()

In [20]:
response = chain.invoke(query)

In [21]:
response

'The Valorant agents got their powers from a cataclysmic event known as the First Light, which struck Earth around 2039. This event bathed the planet in a powerful substance called Radianite, unlocking extraordinary abilities in some individuals.'

In [22]:
vectorstore_db.save_local("valorant_agents_db")