In [None]:
 pip install langchain-huggingface

In [None]:
pip install chromadb

In [7]:
import re
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.documents import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
import requests


In [8]:
embedding_model  = HuggingFaceEmbeddings(model_name="BAAI/bge-large-en-v1.5")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
file_path = "docs/policies.txt"

In [3]:
def read_docs():
    with open(file_path, 'r', encoding='utf-8') as file:
        text = file.read()
    return text

In [4]:
def create_initial_chunks():
    text = read_docs()
    # Split text by periods but keep the periods with the preceding text
    paragraphs = [p + '.' for p in re.split(r'\.', text)[:-1]]
    # Add the last chunk without adding an extra period
    if text and not text.endswith('.'):
        paragraphs.append(re.split(r'\.', text)[-1])
    print(f"Total paragraphs: {len(paragraphs)}\n")
    return paragraphs

In [5]:
def create_semantic_chunks():
    paragraphs = create_initial_chunks()
    # Fixed this line to embed each paragraph individually
    para_embeddings = [np.array(embedding_model.embed_query(paragraph)).reshape(1,-1) for paragraph in paragraphs]

    semantic_chunks = []
    for i in range(len(paragraphs)):
        if i == 0:
            semantic_chunks.append([paragraphs[i]])
        else:
            similarity = cosine_similarity(para_embeddings[i-1], para_embeddings[i])
            if similarity[0][0] > 0.5:
                semantic_chunks[-1].append(paragraphs[i])
            else:
                semantic_chunks.append([paragraphs[i]])

    return semantic_chunks

In [9]:
persist_directory = "chroma_store"
collection_name = "semantic_chunks"

vectorstore = Chroma(
    collection_name=collection_name,
    embedding_function=embedding_model,
    persist_directory=persist_directory
)

  vectorstore = Chroma(


In [10]:
def store_chunks_in_chroma():
    semantic_chunks = create_semantic_chunks()
    docs = []

    for idx, chunk_group in enumerate(semantic_chunks):
        combined_text = ' '.join(chunk_group).strip()
        doc = Document(page_content=combined_text, metadata={"chunk_id": idx})
        docs.append(doc)

    vectorstore.add_documents(docs)
    vectorstore.persist()
    print(f"Stored {len(docs)} semantic chunks in Chroma.")

In [11]:
def query_chunks(question: str, top_k: int = 3):
    print(f"\nQuery: {question}")
    results = vectorstore.similarity_search(question, k=top_k)
    for i, doc in enumerate(results):
        print(f"\n--- Result {i+1} ---")
        print(doc.page_content)
    return results

In [None]:
def query_ollama(question: str):
    """Send query to local Ollama instance with improved prompt"""
    results = query_chunks(question)
    context = "\n".join([doc.page_content for doc in results])
    try:
        prompt = f""""Use the following context to answer the question.
            
        Rules:
        1. If the context contains relevant information, provide a concise answer based solely on that.
        2. If the question asks about something NOT in the context, respond ONLY with: 'I don't have this information in my knowledge base. Please contact hr@ayatacommerce.com for assistance.'
        3. Never infer or make up information not explicitly stated in the context.
        4. If the question is ambiguous or unclear, ask for clarification.
            
        Context: {context}
            
        Question: {question}
            
        Answer:"""
            
        response = requests.post(
            "http://localhost:11434/api/generate",
            json={
                "model": "llama3.2:3b",
                "prompt": prompt,
                "stream": False,
                "options": {
                    "temperature": 0.7,
                    "top_k": 50,
                    "top_p": 0.9
                }
            },
            timeout=60
        )
            
        if response.status_code == 200:
            response_data = response.json()
            answer = response_data.get('response', 'No response generated')
                
            # Post-process answer to ensure compliance with rules
            if "I don't know" in answer or "I'm not sure" in answer:
                return "I don't have this information in my knowledge base. Please contact hr@greenways.com for assistance."
            return answer
        else:
            return f"Error: {response.status_code} - {response.text}"
    except Exception as e:
        return f"Error querying Ollama: {str(e)}"
  

In [14]:

store_chunks_in_chroma()

Total paragraphs: 226

Stored 56 semantic chunks in Chroma.


  vectorstore.persist()


In [21]:
# === Then you can run: ===
# query_ollama("What is a full day work at AyataCommerce in terms of hours?")
query_ollama("Is it possible to apply for Sick Leave and Earned Leave on consecutive days?")  


Query: Is it possible to apply for Sick Leave and Earned Leave on consecutive days?

--- Result 1 ---
&procedures
Working Arrangements
HOURS
Unless specified otherwise in your contract you are required to work 8hrs per day.  We trust you to determine what 
works for you with regards to when your working day begins and ends.  
ATTENDANCE
If you are unable to work because of sickness or an emergency, you must ensure your manager is informed as early 
as possible on the first day of absence.  
HOLIDAYS
If you work full-time on the India payroll you are entitled to 12 days of Earned Leaves each holiday year, 12 days of 
Casual/Sick Leaves.  The holiday year runs from 1 January to 31 December.  The timing of all holidays should be 
agreed upon with your manager as early as possible (for details please refer to the detailed Leave Guidelines).  We 
would prefer staff to take their full holiday entitlement in the leave year to which it relates.  However, it is 
recognized that unused holidays

"I don't have this information in my knowledge base. Please contact hr@greenways.com for assistance."