In [None]:
!pip install langchain chromadb sentence-transformers transformers torch accelerate

In [None]:
# Install required libraries first
# pip install langchain chromadb sentence-transformers transformers torch accelerate

from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch
import os

In [None]:
# Step 1: Initialize Embeddings
embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name)

# Step 2: Initialize ChromaDB (Vector Database)
persist_directory = "./chroma_rag_db"
if not os.path.exists(persist_directory):
    os.makedirs(persist_directory)

In [None]:
# Step 3: Add Documents (Knowledge Base)
texts = [
    "The capital of France is Paris.",
    "The Great Wall of China is visible from space.",
    "Mistral 7B is a powerful open-source language model.",
    "LangChain is a framework for building LLM-powered applications."
]
metadatas = [{"source": f"doc_{i}"} for i in range(len(texts))]

db = Chroma.from_texts(texts, embeddings, metadatas=metadatas, persist_directory=persist_directory)
db.persist()

In [None]:


# Step 4: Load the Language Model
model_name = "mistralai/Mistral-7B-Instruct-v0.2"  # Or any Huggingface instruct model
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto", trust_remote_code=True)

llm_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=300,
    temperature=0.3,
    do_sample=True
)

llm = HuggingFacePipeline(pipeline=llm_pipeline)

# Step 5: Build the RAG Pipeline (Retriever + LLM)
rag_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=db.as_retriever(search_kwargs={"k": 2}),
    chain_type="stuff"  # Stuff retrieved docs into context
)

# Step 6: Ask a Question!
query = "What is LangChain and why is it useful?"
result = rag_chain.run(query)

print("\n=== Final Answer ===\n")
print(result)

# Step 7: Cleanup
import gc
torch.cuda.empty_cache()
gc.collect()
