1. Load document

In [1]:
from langchain_community.document_loaders import TextLoader

loader = TextLoader("info.txt")
document = loader.load()

2. Split into chunks

In [2]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500, chunk_overlap=100, add_start_index=True
)
all_splits = text_splitter.split_documents(document)

print(len(all_splits))

9


3. Create embedding (`Google gemini `)

In [3]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")

In [4]:
# vector_1 = embeddings.embed_query(all_splits[0].page_content)
# vector_2 = embeddings.embed_query(all_splits[1].page_content)

# assert len(vector_1) == len(vector_2)
# print(f"Generated vectors of length {len(vector_1)}\n")
# print(vector_1[:10])

4. Vector store (`chromadb`)

In [5]:
from langchain_chroma import Chroma

vector_store = Chroma(
    collection_name="example_collection",
    embedding_function=embeddings,
    persist_directory="./chroma_langchain_db",
)

retriever = vector_store.as_retriever()

In [6]:
ids = vector_store.add_documents(documents=all_splits)

In [7]:
ids

['8afca081-628f-4bb3-9aef-0a98e9b1f14f',
 'ecd8102d-df95-41d1-a03d-ce61ff21d49e',
 '055e5a5e-ff59-44d0-896c-7d8ecd3f3eb6',
 '00e80676-f506-4efe-8d84-eb26204e25a2',
 '2b0796a9-533f-4cc3-b9b6-5626bdb6c3a2',
 'e5b29bd2-c65c-4116-889d-17725fe92efc',
 '71ccea4b-cd39-48a7-9146-d49f217ad095',
 '564d42c3-4094-449b-9e3c-c966d1de89b2',
 '3c70cb37-45f6-47b9-a979-dd4654bdcf02']

* Similarity search

In [8]:
results = vector_store.similarity_search("Education Qualifications")

results[0].page_content

'--------------------------------------------------\n\nEducation:\n\nBachelor of Science (Honours) in Computer Science\nSpecialization: Artificial Intelligence\nUniversity: University of Kelaniya\nGPA: 3.7\nDuration: 2023 – Present\n\nG.C.E. Advanced Level (2019 – 2021)\nStream: Mathematics\nResults: A, B, B\n\nG.C.E. Ordinary Level (2013 – 2018)\nResults: 8 A passes and 1 B pass\n\n--------------------------------------------------\n\nTechnical Skills:'

5. RAG chain

In [9]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    temperature=0.3
)

In [10]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

# Create a prompt template
template = """You are an assistant answering questions about Udasri Hasindu's professional profile.
Use the following pieces of context to answer the question. 
If you don't know the answer, just say that you don't know.
Keep the answer concise and relevant.

Context: {context}

Question: {question}

Answer:"""

prompt = ChatPromptTemplate.from_template(template)

# Function to format documents
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Create the RAG chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# Ask questions
while True:
    query = input("\nAsk a question (or type 'exit'): ")
    
    if query.lower() == "exit":
        break
    
    response = rag_chain.invoke(query)
    print("\nAnswer:\n", response)


Answer:
 Hello! Yes, I can help you with questions about Udasri Hasindu's professional profile based on the information provided. Please ask your question.

Answer:
 He lives in Dompe, Western Province, Sri Lanka.

Answer:
 W. Udasri Hasindu's address is 6/43, Sathmini Uyana, Palugama, Dompe.

Answer:
 Udasri Hasindu is 21 years old.

Answer:
 Udasri Hasindu's projects include:

*   **Parkinson's Disease Predictor:** An AI-based system that analyzes voice recordings to predict the likelihood of Parkinson's disease using UPDRS values. It uses Next.js for the frontend, FastAPI for the backend, and is hosted on Azure Services and DockerHub.

Based on the provided context, his skills include:

*   PHP
*   Python
*   Next.js
*   FastAPI
*   Azure Services
*   DockerHub
*   AI/Machine Learning (for predictive systems)
