In [10]:
# Markdown Documents to MiniLM QnA System

## Setup and Installation

# Install necessary packages - make sure the Pinecone version is compatible
!pip install langchain langchain-community langchain-pinecone pinecone-client==2.2.4 tqdm python-dotenv sentence-transformers

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
^C
Traceback (most recent call last):
  File "/home/sangnik/.pyenv/versions/3.12.2/bin/pip", line 8, in <module>
    sys.exit(main())
             ^^^^^^
  File "/home/sangnik/.pyenv/versions/3.12.2/lib/python3.12/site-packages/pip/_internal/cli/main.py", line 79, in main
    return command.main(cmd_args)
           ^^^^^^^^^^^^^^^^^^^^^^
  File "/home/sangnik/.pyenv/versions/3.12.2/lib/python3.12/site-packages/pip/_internal/cli/base_command.py", line 101, in main
    return self._main(args)
           ^^^^^^^^^^^^^^^^
  File "/home/sangnik/.pyenv/versions/3.12.2/lib/python3.12/site-packages/pip/_internal/cli/base_command.py", line 236, in _main
    self.handle_pip_version_check(options)
  File "/home/sangnik/.pyenv/versions/3.12.2/lib/python3.12/site-packages/pip/_internal/cli/req_command.py", line 188, in handle_pip_version_check
    pip_self_version_check(session, options)
  File "/home/sangnik/.pyenv/versions/

In [11]:
# Import required libraries
import os
import glob
from tqdm import tqdm
from dotenv import load_dotenv
import pinecone
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader, DirectoryLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.llms import HuggingFaceTextGenInference
from langchain_pinecone import PineconeVectorStore
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

# Load environment variables
load_dotenv()


True

In [12]:
## Data Loading

# Load markdown files from the 'data' folder
def load_markdown_files(data_dir="./data"):
    """
    Load all markdown files from the specified directory
    """
    print(f"Loading markdown files from {data_dir}...")
    
    # Check if the directory exists
    if not os.path.exists(data_dir):
        print(f"Directory {data_dir} not found!")
        return []
    
    # Get all markdown files
    md_files = glob.glob(os.path.join(data_dir, "**/*.md"), recursive=True)
    print(f"Found {len(md_files)} markdown files")
    
    # Load documents
    documents = []
    for file_path in tqdm(md_files):
        try:
            loader = TextLoader(file_path)
            documents.extend(loader.load())
        except Exception as e:
            print(f"Error loading {file_path}: {e}")
    
    print(f"Loaded {len(documents)} documents")
    return documents

# Load all markdown documents
documents = load_markdown_files()

Loading markdown files from ./data...
Found 1262 markdown files


100%|██████████| 1262/1262 [00:00<00:00, 4913.44it/s]

Loaded 1262 documents





In [13]:
## Text Chunking

# Split documents into chunks
def chunk_documents(documents, chunk_size=1000, chunk_overlap=200):
    """
    Split documents into chunks with specified size and overlap
    """
    print("Chunking documents...")
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len,
        separators=["\n\n", "\n", " ", ""]
    )
    
    chunks = text_splitter.split_documents(documents)
    print(f"Created {len(chunks)} chunks")
    return chunks

# Create chunks from documents
chunks = chunk_documents(documents)

Chunking documents...
Created 20624 chunks


In [14]:

## Text Chunking

# Split documents into chunks
def chunk_documents(documents, chunk_size=1000, chunk_overlap=200):
    """
    Split documents into chunks with specified size and overlap
    """
    print("Chunking documents...")
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len,
        separators=["\n\n", "\n", " ", ""]
    )
    
    chunks = text_splitter.split_documents(documents)
    print(f"Created {len(chunks)} chunks")
    return chunks

# Create chunks from documents
chunks = chunk_documents(documents)

Chunking documents...
Created 20624 chunks


In [15]:
## Embedding Generation

# Initialize embedding model
def initialize_embeddings():
    """
    Initialize the embedding model
    """
    print("Initializing embedding model...")
    embeddings = HuggingFaceEmbeddings(
        model_name="all-MiniLM-L6-v2",  # A good lightweight model for embeddings
        model_kwargs={'device': 'cpu'}
    )
    return embeddings

# Initialize embeddings
embeddings = initialize_embeddings()

Initializing embedding model...


In [20]:
from pinecone import Pinecone, ServerlessSpec
## Pinecone Setup and Vector Storage

# Initialize Pinecone and create index
def setup_pinecone():
    """
    Initialize Pinecone and create vector store
    """
    print("Setting up Pinecone...")
    
    # Get API key and environment from .env file
    api_key = os.getenv("PINECONE_API_KEY")
    environment = os.getenv("PINECONE_ENVIRONMENT")
    index_name = os.getenv("PINECONE_INDEX_NAME", "markdown-docs")
    
    if not api_key or not environment:
        raise ValueError("Pinecone API key and environment must be set in .env file")
    
    # Initialize Pinecone (using the older API style)
    #pinecone.init(api_key=api_key, environment=environment)
    pc = Pinecone(api_key=api_key)
    
    if index_name not in [index.name for index in pc.list_indexes()]:
        print(f"Creating new Pinecone index: {index_name}")
        pc.create_index(
            name=index_name,
            dimension=384,  # 384 for all-MiniLM-L6-v2
            metric="cosine",
            spec=ServerlessSpec(
                cloud="aws",        # or "gcp" depending on your plan
                region=environment  # example: "us-west-2"
            )
        )
        print(f"Successfully created index: {index_name}")
    else:
        print(f"Using existing index: {index_name}")

    return index_name

In [21]:

# Setup Pinecone
index_name = setup_pinecone()


## Store Document Chunks in Pinecone

# Create vector store and add documents
def store_embeddings(chunks, embeddings, index_name):
    """
    Store document chunks in Pinecone vector store
    """
    print("Creating vector store and storing embeddings...")
    
    try:
        # Create vector store with older Pinecone API style
        vector_store = PineconeVectorStore.from_documents(
            documents=chunks,
            embedding=embeddings,
            index_name=index_name
        )
        
        print(f"Successfully stored {len(chunks)} document chunks in Pinecone")
        return vector_store
    
    except Exception as e:
        print(f"Error storing embeddings: {e}")
        raise

Setting up Pinecone...
Creating new Pinecone index: etq1
Successfully created index: etq1


In [23]:





# Store embeddings in Pinecone
vector_store = store_embeddings(chunks, embeddings, index_name)

## Initialize MiniLM Model for Generation

# Initialize MiniLM model for Q&A
def initialize_minilm_model(model_name="sentence-transformers/all-MiniLM-L6-v2", inference_server_url=None):
    """
    Initialize the MiniLM model for question answering
    """
    print("Initializing MiniLM model for generation...")
    
    # If using a hosted inference API, we can use HuggingFaceTextGenInference
    if inference_server_url:
        model = HuggingFaceTextGenInference(
            inference_server_url=inference_server_url,
            max_new_tokens=512,
            temperature=0.1,
            repetition_penalty=1.1
        )
    else:
        # For local usage, we can use HuggingFacePipeline
        from langchain_community.llms import HuggingFacePipeline
        from transformers import AutoTokenizer, pipeline
        
        # Import required modules
        try:
            from transformers import AutoModelForSeq2SeqLM
            
            # First try loading as a seq2seq model
            try:
                model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")
                tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
                
                # Create a pipeline for text generation
                pipe = pipeline(
                    "text2text-generation",
                    model=model,
                    tokenizer=tokenizer,
                    max_length=512,
                    temperature=0.1
                )
                
                # Create the LangChain wrapper
                model = HuggingFacePipeline(pipeline=pipe)
                
            except Exception as e:
                print(f"Could not load as seq2seq model: {e}")
                # Fallback to using a smaller T5 model which is better for QA
                model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")
                tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
                
                pipe = pipeline(
                    "text2text-generation",
                    model=model,
                    tokenizer=tokenizer,
                    max_length=512
                )
                
                model = HuggingFacePipeline(pipeline=pipe)
                
        except ImportError as e:
            print(f"Error importing transformers components: {e}")
            print("Using a simple model interface for demonstration")
            
            # Create a simple model interface
            from langchain.llms.base import LLM
            
            class SimpleResponseModel(LLM):
                def _call(self, prompt, stop=None):
                    # This is a simple model that just returns a fixed response based on the context
                    if "Context:" in prompt and "Question:" in prompt:
                        return "Based on the provided context, here's what I found in your documents."
                    return "I don't have enough context to answer this question."
                
                @property
                def _identifying_params(self):
                    return {"name": "SimpleResponseModel"}
                
                @property
                def _llm_type(self):
                    return "simple"
            
            model = SimpleResponseModel()
    
    return model

# Initialize model
inference_server_url = os.getenv("INFERENCE_SERVER_URL", None)  # Optional: URL to hosted inference API
model = initialize_minilm_model(inference_server_url=inference_server_url)

Creating vector store and storing embeddings...
Successfully stored 20624 document chunks in Pinecone
Initializing MiniLM model for generation...


Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Device set to use cuda:0
  model = HuggingFacePipeline(pipeline=pipe)


In [24]:

## Create QA Chain

# Create QA chain with retrieved context
def create_qa_chain(model, vector_store):
    """
    Create a QA chain with the model and vector store
    """
    print("Creating QA chain...")
    
    # Create a prompt template that includes context
    prompt_template = """
    You are a helpful AI assistant. Use the following pieces of context to answer the question at the end.
    If you don't know the answer or if the answer is not in the given context, say "I don't have enough information to answer this question."
    
    Context:
    {context}
    
    Question: {question}
    
    Answer:
    """
    
    PROMPT = PromptTemplate(
        template=prompt_template,
        input_variables=["context", "question"]
    )
    
    # Create the chain
    qa_chain = RetrievalQA.from_chain_type(
        llm=model,
        chain_type="stuff",
        retriever=vector_store.as_retriever(search_kwargs={"k": 5}),
        return_source_documents=True,
        chain_type_kwargs={"prompt": PROMPT}
    )
    
    return qa_chain

# Create the QA chain
qa_chain = create_qa_chain(model, vector_store)

Creating QA chain...


In [29]:
## Query Interface

# Function to query the system
def query_documents(qa_chain, query):
    """
    Query the QA system with a question
    """
    print(f"Query: {query}")
    result = qa_chain({"query": query})
    
    answer = result["result"]
    sources = result["source_documents"]
    
    print("\nAnswer:")
    print(answer)
    
    print("\nSources:")
    for i, doc in enumerate(sources):
        print(f"Source {i+1}:")
        print(f"  Content: {doc.page_content[:150]}...")
        print(f"  Source: {doc.metadata.get('source', 'Unknown')}")
        print()
    
    return answer, sources

# Example query
query = """xample: ABC Group’s Loyalty Program

Consider a company ABC Group that want to manage a common loyalty program across its various brands, such as:

- GroceryMart:A grocery and essentials chain.

- TechStore:A store specializing in gadgets and electronics.

- FashionStores:A clothing and accessories brand.

They can use the Standard Organisation framework to meet their requirements.
what are benefits? """
answer, sources = query_documents(qa_chain, query)



Query: xample: ABC Group’s Loyalty Program

Consider a company ABC Group that want to manage a common loyalty program across its various brands, such as:

- GroceryMart:A grocery and essentials chain.

- TechStore:A store specializing in gadgets and electronics.

- FashionStores:A clothing and accessories brand.

They can use the Standard Organisation framework to meet their requirements.
what are benefits? 





Answer:
Unified Point System: Customers earn points on purchases made at any brand under ABC Group. - Flexible Redemption: Points can be redeemed across all participating brands.

Sources:
Source 1:
  Content: #### Example: ABC Jewellers Loyalty Program

- Single loyalty program across all outlets

- Centralised customer database

- Unified points system

- ...
  Source: ./data/ADMIN_CONTROLS_Organization.md

Source 2:
  Content: Benefits:

- Unified Point System: Customers earn points on purchases made at any brand under ABC Group.

- Flexible Redemption: Points can be redeeme...
  Source: ./data/ADMIN_CONTROLS_Organization.md

Source 3:
  Content: - Corporate Loyalty Programs: Employees making transactions and rewards are awarded and redeemed at a company level.

### About User Group Loyalty

Gr...
  Source: ./data/Announcements_April-May-June_2021.md

Source 4:
  Content: Currently, the usability of Loyalty+ for any business or a brand is limited to benefit its end customers. The c