1. Load document

In [21]:
from langchain_community.document_loaders import TextLoader

loader = TextLoader("info.txt")
document = loader.load()

2. Split into chunks

In [22]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500, chunk_overlap=100, add_start_index=True
)
all_splits = text_splitter.split_documents(document)

print(len(all_splits))

9


3. Create embedding (`Google gemini `)

In [23]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")

In [24]:
# vector_1 = embeddings.embed_query(all_splits[0].page_content)
# vector_2 = embeddings.embed_query(all_splits[1].page_content)

# assert len(vector_1) == len(vector_2)
# print(f"Generated vectors of length {len(vector_1)}\n")
# print(vector_1[:10])

4. Vector store (`chromadb`)

In [25]:
from langchain_chroma import Chroma

vector_store = Chroma(
    collection_name="example_collection",
    embedding_function=embeddings,
    persist_directory="./chroma_langchain_db",
)

retriever = vector_store.as_retriever()

In [26]:
ids = vector_store.add_documents(documents=all_splits)

In [27]:
ids

['43a100a7-23ac-4a3f-86a4-cf8bd7e98937',
 '818f0b76-5576-4636-bf32-f7295db79cc0',
 '7e5e8baa-d65e-4c84-b665-87e3f1ac93e3',
 '27092b21-c208-4106-a6ca-bcd2373f56e7',
 '245ec5df-9c1e-464a-960b-3e85afa17553',
 '1ff0176d-e6f4-423d-a359-392d49f767c9',
 'dad610b2-05f9-4b3d-b4ab-0cdacf426f25',
 '617321fb-3f06-4c68-b59d-f33497a31722',
 '6e7a9686-4afd-4936-9f2a-e0f0dc0aadab']

* Similarity search

In [28]:
results = vector_store.similarity_search("Education Qualifications")

results[0].page_content

'--------------------------------------------------\n\nEducation:\n\nBachelor of Science (Honours) in Computer Science\nSpecialization: Artificial Intelligence\nUniversity: University of Kelaniya\nGPA: 3.7\nDuration: 2023 – Present\n\nG.C.E. Advanced Level (2019 – 2021)\nStream: Mathematics\nResults: A, B, B\n\nG.C.E. Ordinary Level (2013 – 2018)\nResults: 8 A passes and 1 B pass\n\n--------------------------------------------------\n\nTechnical Skills:'

5. RAG chain

In [29]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    temperature=0.3
)

In [30]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

# Create a prompt template
template = """You are an assistant answering questions about Udasri Hasindu's professional profile.
Use the following pieces of context to answer the question. 
If you don't know the answer, just say that you don't know.
Keep the answer concise and relevant.

Context: {context}

Question: {question}

Answer:"""

prompt = ChatPromptTemplate.from_template(template)

# Function to format documents
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Create the RAG chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# Ask questions
while True:
    query = input("\nAsk a question (or type 'exit'): ")
    
    if query.lower() == "exit":
        break
    
    response = rag_chain.invoke(query)
    print("\nAnswer:\n", response)


Answer:
 Hello! How can I help you with Udasri Hasindu's professional profile?
