## LOAD

In [1]:
## Data Ingestion

from langchain_community.document_loaders import TextLoader
loader=TextLoader("sample.txt")
text_documents=loader.load()
text_documents


[Document(metadata={'source': 'sample.txt'}, page_content="I am not on trial for being a dwarf. Oh yes I am. I've been on trial for that my entire life. Have you nothing to say in your defense? Nothing but this: I did not do it. I did not kill Joffrey, but I wish that I had. Watching your vicious bastard die gave me more relief than a thousand lying whores. I wish I was the monster you think I am! I wish I had enough poison for the whole pack of you! I would gladly give my life to watch you all swallow it")]

In [2]:
import os 
from dotenv import load_dotenv

load_dotenv()

os.environ['GOOGLE_API_KEY']=os.getenv("GOOGLE_API_KEY")

In [27]:
from langchain_community.document_loaders import WebBaseLoader
import bs4

# Load only the page itself (don't try to crawl extra links)
loader = WebBaseLoader(
    web_paths=["https://genius.com/Game-of-thrones-tyrions-courtroom-speech-from-the-laws-of-gods-and-men-annotated"],
    bs_kwargs=dict(parse_only=bs4.SoupStrainer(
        class_=("Lyrics__Container-sc-a49d8432-1 fBKwZw",)
    )),
)

# Load documents
text_documents = loader.load()

# Show the first chunk
text_documents


[Document(metadata={'source': 'https://genius.com/Game-of-thrones-tyrions-courtroom-speech-from-the-laws-of-gods-and-men-annotated'}, page_content='6 ContributorsTyrion’s Courtroom Speech from The Laws of Gods and Men LyricsTyrion’s trial forms the final scene of “The Laws of Gods and Men”, (season 4, episode 6 of Game of Thrones). More extravagant and conspicuous than the corresponding passage in the books, owing largely to the… Read More\xa0INT. RED KEEP - THRONE ROOM - DAYAs the crowd flies back in, Jaime approaches Tyrion in the dock.TYRION: Not going well, is it?JAIME: You\'re going to be found guilty.TYRION: Oh, you think so?JAIME: When you are, you need to enter a formal plea for mercy and ask to be sent to the Wall.Tyrion watches Jaime, unsure what games are being played now, waiting to hear more.JAIME: Father has agreed to it. He\'ll spare your life and allow you to join the Night\'s Watch.Tyrion eyes his brother skeptically.TYRION: Ned Stark was promised the same thing and we

In [10]:
## PDF Reader
from langchain_community.document_loaders import PyPDFLoader
loader=PyPDFLoader('Gdg Leads List.pdf')
text_documents=loader.load()
text_documents

[Document(metadata={'producer': 'Skia/PDF m141 Google Docs Renderer', 'creator': 'PyPDF', 'creationdate': '', 'title': 'Gdg Leads List', 'source': 'Gdg Leads List.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}, page_content='Google  Developers  Group  2026  Heads  List   S.no  Head  Head  Details  \n1  GDG  Lead  Kavishnu  G  -  III  CG   \n2  Co  -  Lead  Prathisha  K  II  CG  3  Manager  &  PMO  Head  Megavarshnin  -  II  MSc  SS   4  Open  -  Source  and  GitHub  Head  \n \n5  Web/Full  Stack  Head  Abdul   -  MSc  CT   6  Cloud  and  DevOps  Head  Suman  N  -  II  DS  A  7  AI/ML,Gen  AI  Head  Monisha  R  -  III  CT  B   8  Automation  and  Agents  Head  \n \n9  IOT  Head  Gowtham  -  II  CSA  10  Social  Media  Head   \n11  Python  Head  Bharath  Raj  K  A  -  II  DS  B   \n12  Photograph  and  Editing  Head  \n \n13  Women  In  tech  Head  Thanu  Sree  -  II  CT  A   \n14  UI/UX  Head  Nithya  Sree  -  II  CS   \n15  Public  Relation  Head')]

## Transform

In [28]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Use the text_documents you got from WebBaseLoader
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

documents = text_splitter.split_documents(text_documents)

# Preview first 5 chunks
# for i, doc in enumerate(documents[:5], 1):
#     print(f"--- Chunk {i} ---")
#     print(doc.page_content[:1000])  # preview first 300 chars
#     print()

print(documents)

[Document(metadata={'source': 'https://genius.com/Game-of-thrones-tyrions-courtroom-speech-from-the-laws-of-gods-and-men-annotated'}, page_content="6 ContributorsTyrion’s Courtroom Speech from The Laws of Gods and Men LyricsTyrion’s trial forms the final scene of “The Laws of Gods and Men”, (season 4, episode 6 of Game of Thrones). More extravagant and conspicuous than the corresponding passage in the books, owing largely to the… Read More\xa0INT. RED KEEP - THRONE ROOM - DAYAs the crowd flies back in, Jaime approaches Tyrion in the dock.TYRION: Not going well, is it?JAIME: You're going to be found guilty.TYRION: Oh, you think so?JAIME: When you are, you need to enter a formal plea for mercy and ask to be sent to the Wall.Tyrion watches Jaime, unsure what games are being played now, waiting to hear more.JAIME: Father has agreed to it. He'll spare your life and allow you to join the Night's Watch.Tyrion eyes his brother skeptically.TYRION: Ned Stark was promised the same thing and we bo

## Vector Embedding and Vector Store

In [29]:
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import Chroma
db=Chroma.from_documents(documents[:2],OllamaEmbeddings(model="gemma"))


In [32]:
## Gemini

from langchain_google_genai import GoogleGenerativeAIEmbeddings

# Load API key from .env
load_dotenv()
os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")

# Initialize Gemini embeddings
embeddings = GoogleGenerativeAIEmbeddings(
    model="models/gemini-embedding-001"   # ✅ Gemini embedding model
)

# Create Chroma vectorstore with Gemini embeddings
db2 = Chroma.from_documents(documents[:2], embeddings)


In [33]:

query="What is the document about"
result=db2.similarity_search(query)
result[0].page_content

"6 ContributorsTyrion’s Courtroom Speech from The Laws of Gods and Men LyricsTyrion’s trial forms the final scene of “The Laws of Gods and Men”, (season 4, episode 6 of Game of Thrones). More extravagant and conspicuous than the corresponding passage in the books, owing largely to the… Read More\xa0INT. RED KEEP - THRONE ROOM - DAYAs the crowd flies back in, Jaime approaches Tyrion in the dock.TYRION: Not going well, is it?JAIME: You're going to be found guilty.TYRION: Oh, you think so?JAIME: When you are, you need to enter a formal plea for mercy and ask to be sent to the Wall.Tyrion watches Jaime, unsure what games are being played now, waiting to hear more.JAIME: Father has agreed to it. He'll spare your life and allow you to join the Night's Watch.Tyrion eyes his brother skeptically.TYRION: Ned Stark was promised the same thing and we both know how that turned out.JAIME: Father is not Joffrey. He'll keep his word.TYRION: How do you know?JAIME: Do you trust me?Tyrion studies his old

In [37]:
query="Who is TYRION said at last?"
result=db.similarity_search(query)
result[0].page_content

"6 ContributorsTyrion’s Courtroom Speech from The Laws of Gods and Men LyricsTyrion’s trial forms the final scene of “The Laws of Gods and Men”, (season 4, episode 6 of Game of Thrones). More extravagant and conspicuous than the corresponding passage in the books, owing largely to the… Read More\xa0INT. RED KEEP - THRONE ROOM - DAYAs the crowd flies back in, Jaime approaches Tyrion in the dock.TYRION: Not going well, is it?JAIME: You're going to be found guilty.TYRION: Oh, you think so?JAIME: When you are, you need to enter a formal plea for mercy and ask to be sent to the Wall.Tyrion watches Jaime, unsure what games are being played now, waiting to hear more.JAIME: Father has agreed to it. He'll spare your life and allow you to join the Night's Watch.Tyrion eyes his brother skeptically.TYRION: Ned Stark was promised the same thing and we both know how that turned out.JAIME: Father is not Joffrey. He'll keep his word.TYRION: How do you know?JAIME: Do you trust me?Tyrion studies his old

In [35]:
### fiass DB

from langchain_community.vectorstores import FAISS
db1=FAISS.from_documents(documents,OllamaEmbeddings(model="gemma"))

In [36]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

# Load API key from .env
load_dotenv()
os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")

# Initialize Gemini embeddings
embeddings = GoogleGenerativeAIEmbeddings(
    model="models/gemini-embedding-001"   # ✅ Gemini embedding model
)
db1=FAISS.from_documents(documents,embeddings)

In [40]:
query="What is TYRION said about ned stark"
result=db1.similarity_search(query)
result[0].page_content

"Ned Stark was promised the same thing and we both know how that turned out.JAIME: Father is not Joffrey. He'll keep his word.TYRION: How do you know?JAIME: Do you trust me?Tyrion studies his older brother for a long beat before nodding.JAIME: Keep your mouth shut. No more outbursts. This will all be over soon.Jaime walks back to his post.Tyrion looks after him. He's confused about what's happened, but it's plain from the look on his face he's relieved. His brother has come through for him again. He's not going to die.***Tyrion turns his gaze to his father, who has just retaken his seat on the Iron Throne. They lock eyes for a moment. Tywin, of course, betrays nothing.The crowd quiets down as the other judges take their seats.TYWIN: The Crown may call its next witness.Tyrion turns in the direction of the approaching footsteps. He stands in speechless shock when he sees the next witness enter the throne room:SHAE. Dressed in a modest frock, quite different from her sexy handmaiden duds,