In [8]:
from llama_index.core import VectorStoreIndex, Settings
from llama_index.llms.ollama import Ollama as OllamaLLM
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.core.schema import Document
from llama_index.core.node_parser import SemanticSplitterNodeParser, TokenTextSplitter
from llama_index.core.query_engine import RetrieverQueryEngine
from docx import Document as DocxDocument

In [2]:
def load_docx(file_path):
    doc = DocxDocument(file_path)
    return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])

# Replace this with your actual docx file name
docx_path = "rag sample.docx"
text = load_docx(docx_path)
documents = [Document(text=text)]

In [3]:
EMBEDDING_MODEL = "bge-m3"
LANGUAGE_MODEL = "hf.co/bartowski/Llama-3.2-1B-Instruct-GGUF"

llm = OllamaLLM(model=LANGUAGE_MODEL, request_timeout=180.0, model_kwargs={"temperature":0})
embed_model = OllamaEmbedding(model_name=EMBEDDING_MODEL)

# Set models globally using the new Settings API
Settings.llm = llm
Settings.embed_model = embed_model

In [None]:
splitter = SemanticSplitterNodeParser(embed_model=embed_model, chunk_size=300)
nodes = splitter.get_nodes_from_documents(documents)

print(f"Split into {len(nodes)} semantic chunks.")

In [5]:
# Define a simple token-based splitter
splitter = TokenTextSplitter(
    separator=" ",           # Split on spaces (default)
    chunk_size=300,          # Max tokens per chunk
    chunk_overlap=50,        # Number of overlapping tokens
)

# Get nodes
nodes = splitter.get_nodes_from_documents(documents)

print(f"Split into {len(nodes)} token-based chunks.")

Split into 9 token-based chunks.


In [None]:
for i, node in enumerate(nodes):
    print(f"\n--- Chunk {i+1} ---")
    print(node.text)

In [10]:
# Indexing
index = VectorStoreIndex(nodes)

# Retrieval & Simple Response (prevents crashing from long prompts)
retriever = index.as_retriever(similarity_top_k=3)
from llama_index.core.response_synthesizers import get_response_synthesizer

synthesizer = get_response_synthesizer(response_mode="compact")


query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=synthesizer
)

In [None]:
# Get top-k retrieved nodes (chunks)
retrieved_nodes = retriever.retrieve("According to the context, how are tolerances used during intermediate safety stock planning?")

# Print them with similarity scores
for i, node in enumerate(retrieved_nodes, 1):
    print(f"\n--- Chunk #{i} ---")
    print(f"Similarity: {node.score:.4f}")
    print(f"Content:\n{node.node.get_content()}")

In [None]:
while True:
    query = input("Enter your question. Type 'exit' to quit): ").strip()
    if query.lower() in {"exit", "quit"}:
        print("Exiting chat.")
        break

    try:
        response = query_engine.query(query)
        print("Chatbot response:", response, "\n")
    except Exception as e:
        print(f"Error: {e}\n")
