## Dependencies

In [1]:
import torch
from langchain.chains import RetrievalQA
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.llms import HuggingFacePipeline

from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

ImportError: cannot import name 'model_validator' from 'pydantic' (/home/stu5/s11/hc4293/miniconda3/envs/llama_rag/lib/python3.10/site-packages/pydantic/__init__.cpython-310-x86_64-linux-gnu.so)

## Load and prepare documents

In [None]:
# --------------------------------------------------------------------
# 1. Load & Prepare Documents
# --------------------------------------------------------------------
loader = DirectoryLoader("data")  # Update if your folder is named differently
raw_docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=100
)
docs = text_splitter.split_documents(raw_docs)


## Create an Embedding + Vector Store for Retrieval

In [None]:
# --------------------------------------------------------------------
# 2. Create an Embedding + Vector Store for Retrieval
# --------------------------------------------------------------------
# (All-MPNet-Base-V2 is a good default; you can change it)
embedding_function = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

# Create a Chroma DB from these documents
vectorstore = Chroma.from_documents(
    documents=docs,
    embedding=embedding_function,
    collection_name="my_collection"
)

# Turn vectorstore into a retriever
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})


## 3. Load a Hugging Face Pipeline with Llama 2 or Another Model

In [None]:
# --------------------------------------------------------------------
# 3. Load a Hugging Face Pipeline with Llama 2 or Another Model
# --------------------------------------------------------------------
model_id = "meta-llama/Llama-2-7b-chat-hf"  # or any other LLM on HF Hub

# Make sure you have accepted the license for Llama 2 and are logged in (huggingface-cli login)
tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=True)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    use_auth_token=True,
    device_map="auto",
    torch_dtype=torch.float16,
    load_in_8bit=True
)

# Create a text-generation pipeline
hf_pipeline = pipeline(
    task="text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=1024,
    temperature=0.1,
    top_p=0.9,
    repetition_penalty=1.2,
    do_sample=True
)

# Wrap the pipeline in a LangChain LLM
llm = HuggingFacePipeline(pipeline=hf_pipeline)


## Build a Retrieval QA Chain

In [None]:
# --------------------------------------------------------------------
# 4. Build a Retrieval QA Chain
# --------------------------------------------------------------------
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",  # "stuff", "map_reduce", or "refine"
    retriever=retriever
)


## Run queries

In [None]:
# --------------------------------------------------------------------
# 5. Ask a Question
# --------------------------------------------------------------------
query = "What is 'Attention is All You Need'?"
result = qa_chain.run(query)

print("\n=== RAG Answer ===")
print(result)
