In [1]:
#!pip install langchain langchain-community langchain-google-genai langchain-core langchain-text-splitters chromadb python-dotenv

In [2]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.documents import Document
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from dotenv import load_dotenv
import os


True

In [3]:

# Step 1: Load the laptop data
loader = TextLoader("laptops_info.txt")
raw_docs = loader.load()



In [None]:
# Step 2: Split the documents into chunks
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs = text_splitter.split_documents(raw_docs)
print("Total number of documents: ",len(docs))

print("printing one of the document........")
print(docs[2])



Total number of documents:  5
printing one of the document........
page_content='3. ASUS TUF Gaming F15
- Price: â‚¹79,990
- CPU: Intel i5 11400H
- GPU: NVIDIA RTX 3050 (4GB)
- RAM: 16GB DDR4
- Storage: 512GB SSD
- Good for: Deep learning models, parallel processing
- Comments: Rugged build, best performance for the price

4. Acer Aspire 7
- Price: â‚¹62,990
- CPU: AMD Ryzen 5 5500U
- GPU: NVIDIA GTX 1650
- RAM: 8GB
- Storage: 512GB SSD
- Good for: Intro ML, data science
- Comments: Value for money, limited by RAM/GPU' metadata={'source': 'laptops_info.txt'}


In [10]:
# Step 3: Initialize the embedding model
#Get an API key: 
# Head to https://ai.google.dev/gemini-api/docs/api-key to generate a Google AI API key. Paste in .env file
# Embedding models: https://python.langchain.com/v0.1/docs/integrations/text_embedding/

from dotenv import load_dotenv
load_dotenv() 

import os
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embedding_model = GoogleGenerativeAIEmbeddings(
    model="models/embedding-001",
    google_api_key=os.getenv("GOOGLE_API_KEY")
)


vector = embedding_model.embed_query("hello, world!")

#vector
print("example embeddings........")

print(vector[:5])
len(vector)

example embeddings........
[0.05636945366859436, 0.004828543867915869, -0.07625909894704819, -0.023642510175704956, 0.053293220698833466]


768

In [12]:
# Step 4: Create Chroma vectorstore
vectorstore = Chroma.from_documents(documents=docs, embedding=embedding_model)

In [15]:
# Step 5: Convert to retriever
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 2})

In [16]:

# Step 6: Run a query (RAG-style)
query = "Which laptop is best for machine learning under ₹80,000?"
retrieved_docs = retriever.invoke(query)
print(len(retrieved_docs))


2


In [17]:
# Step 7: Print retrieved chunks
print("\nTop Retrieved Chunks:")
for i, doc in enumerate(retrieved_docs):
    print(f"\nChunk {i+1}:\n{doc.page_content}")


Top Retrieved Chunks:

Chunk 1:
# Laptop Buying Tips for AI/ML (2024)
- Prefer 16GB RAM or more
- Look for NVIDIA GPUs like GTX 1650, RTX 3050 or better
- Avoid integrated graphics for training models
- SSD preferred for fast data access
- Ryzen 5, i5 H-series or better recommended

Chunk 2:
# Laptop Buying Tips for AI/ML (2024)
- Prefer 16GB RAM or more
- Look for NVIDIA GPUs like GTX 1650, RTX 3050 or better
- Avoid integrated graphics for training models
- SSD preferred for fast data access
- Ryzen 5, i5 H-series or better recommended
