## Phase 1A

In [1]:
import os
import re
import json
import numpy as np
import time
from tqdm.notebook import tqdm
import torch
from pypdf import PdfReader  # Lighter alternative to pdfplumber
from transformers import AutoTokenizer, AutoModel

In [2]:
# 1. SETUP AND CONFIGURATION
# --------------------------
# These settings simulate mobile constraints
MEMORY_LIMIT = 500 * 1024 * 1024  # 500MB max memory usage
EMBEDDING_DIMENSION = 384  # Dimension of embeddings
CHUNK_SIZE = 200  # Token limit per chunk
BATCH_SIZE = 16  # Process in small batches to limit memory usage
MAX_DOCS = 5  # Maximum number of documents to process at once

In [3]:
# Paths for saving processed data (simulating mobile storage)
DATA_DIR = "./mobile_data"
os.makedirs(DATA_DIR, exist_ok=True)

In [4]:
# 2. DOCUMENT PROCESSING
# ---------------------
def extract_text_from_pdf(pdf_path):
    """Extract text from PDF using lightweight method"""
    start_time = time.time()
    text = ""
    
    # Monitor memory usage
    try:
        reader = PdfReader(pdf_path)
        for page in reader.pages:
            content = page.extract_text()
            if content:
                text += content + "\n"
    except Exception as e:
        print(f"Error extracting text: {e}")
    
    print(f"✅ Extracted {len(text)} chars in {time.time() - start_time:.2f}s")
    return text

In [5]:
# 3. TEXT CHUNKING (Mobile-Optimized)
# ----------------------------------
def simple_chunk_text(text, max_tokens=CHUNK_SIZE, overlap=20):
    """
    Chunk text using a simple algorithm suitable for mobile devices.
    No dependencies on heavy NLP libraries.
    """
    # Split into sentences using regex (avoids nltk dependency)
    sentences = re.split(r'(?<=[.!?])\s+', text)
    chunks = []
    current_chunk = ""
    
    for sentence in sentences:
        # Simple token estimation (approx 4 chars per token)
        est_tokens = len(sentence) // 4
        
        if len(current_chunk) + len(sentence) > max_tokens * 4:  # Simple token estimation
            if current_chunk:
                chunks.append(current_chunk.strip())
            current_chunk = sentence
        else:
            current_chunk += " " + sentence
    
    if current_chunk:
        chunks.append(current_chunk.strip())
    
    # Create overlapping chunks for better context preservation
    if overlap > 0 and len(chunks) > 1:
        overlapped_chunks = []
        for i in range(len(chunks)):
            if i == 0:
                overlapped_chunks.append(chunks[i])
            else:
                # Estimate the overlap in tokens and convert to chars
                overlap_chars = min(overlap * 4, len(chunks[i-1]) // 2)
                overlap_text = chunks[i-1][-overlap_chars:]
                overlapped_chunks.append(overlap_text + " " + chunks[i])
        chunks = overlapped_chunks
        
    return chunks

In [6]:
class LightEmbedder:
    """A wrapper for lightweight embedding models suitable for mobile"""
    
    def __init__(self, model_name="sentence-transformers/paraphrase-MiniLM-L3-v2"):
        """Initialize with a small, efficient model"""
        # This model is only ~50MB and provides 384-dim embeddings
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModel.from_pretrained(model_name)
        
        # Determine device
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        
        # Apply quantization only if using CPU
        if self.device == "cpu":
            self.model = torch.quantization.quantize_dynamic(
                self.model, {torch.nn.Linear}, dtype=torch.qint8
            )
        
        # Move to device
        self.model = self.model.to(self.device)
    
    def encode_batch(self, texts, batch_size=BATCH_SIZE):
        """Encode texts in batches to manage memory usage"""
        all_embeddings = []
        
        for i in range(0, len(texts), batch_size):
            batch = texts[i:i+batch_size]
            
            # Tokenize
            encoded_input = self.tokenizer(
                batch, padding=True, truncation=True, 
                max_length=CHUNK_SIZE, return_tensors='pt'
            ).to(self.device)
            
            # Get embeddings (mean pooling)
            with torch.no_grad():
                model_output = self.model(**encoded_input)
                # Use mean pooling to get sentence embeddings
                attention_mask = encoded_input['attention_mask']
                token_embeddings = model_output[0]
                
                # Mask padding tokens
                input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
                sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
                sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
                embeddings = sum_embeddings / sum_mask
                
                all_embeddings.append(embeddings.cpu().numpy())
                
            # Clear GPU memory
            if self.device == "cuda":
                torch.cuda.empty_cache()
        
        return np.vstack(all_embeddings)
    
    def encode_single(self, text):
        """Encode a single text string"""
        return self.encode_batch([text])[0]

In [7]:
# 5. MOBILE-FRIENDLY VECTOR DATABASE
# --------------------------------
class SimpleVectorDB:
    """A simple vector database with memory-efficient search"""
    
    def __init__(self, dimension=EMBEDDING_DIMENSION):
        self.vectors = None
        self.documents = []
        self.dimension = dimension
        self.index_built = False
    
    def add_documents(self, documents, embeddings):
        """Add documents and their embeddings to the database"""
        if self.vectors is None:
            self.vectors = embeddings
        else:
            self.vectors = np.vstack([self.vectors, embeddings])
        
        self.documents.extend(documents)
        self.index_built = False
    
    def build_index(self):
        """Build a simple index for faster search"""
        # Normalize vectors for cosine similarity
        norms = np.linalg.norm(self.vectors, axis=1, keepdims=True)
        self.normalized_vectors = self.vectors / norms
        self.index_built = True
    
    def search(self, query_vector, top_k=3):
        """Search for similar documents using cosine similarity"""
        if not self.index_built:
            self.build_index()
        
        # Normalize query vector
        query_norm = np.linalg.norm(query_vector)
        if query_norm > 0:
            normalized_query = query_vector / query_norm
        else:
            normalized_query = query_vector
        
        # Calculate similarity scores
        similarities = np.dot(self.normalized_vectors, normalized_query)
        
        # Get top-k indices and scores
        if len(similarities) <= top_k:
            indices = np.argsort(similarities)[::-1]
        else:
            indices = np.argpartition(similarities, -top_k)[-top_k:]
            indices = indices[np.argsort(similarities[indices])[::-1]]
        
        scores = similarities[indices]
        docs = [self.documents[i] for i in indices]
        
        return list(zip(docs, scores, indices))
    
    def save(self, path):
        """Save the vector database to disk"""
        db_data = {
            "vectors": self.vectors.tolist() if self.vectors is not None else None,
            "documents": self.documents,
            "dimension": self.dimension,
            "index_built": self.index_built
        }
        
        with open(path, 'w') as f:
            json.dump(db_data, f)
    
    def load(self, path):
        """Load the vector database from disk"""
        with open(path, 'r') as f:
            db_data = json.load(f)
        
        self.vectors = np.array(db_data["vectors"]) if db_data["vectors"] else None
        self.documents = db_data["documents"]
        self.dimension = db_data["dimension"]
        self.index_built = db_data["index_built"]
        
        if self.index_built:
            # Rebuild normalized vectors
            norms = np.linalg.norm(self.vectors, axis=1, keepdims=True)
            self.normalized_vectors = self.vectors / norms

In [8]:
# 6. LLM INTEGRATION (Using smaller models)
# ---------------------------------------
from transformers import AutoModelForCausalLM, pipeline

class LightLLM:
    """Lightweight LLM for mobile deployment"""
    
    def __init__(self, model_name="microsoft/phi-2"):
        """Initialize with a small model suitable for mobile"""
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        
        # Load in 8-bit to reduce memory usage
        self.model = AutoModelForCausalLM.from_pretrained(
            model_name, 
            load_in_8bit=True,
            device_map="auto" if torch.cuda.is_available() else None
        )
        
        self.generator = pipeline(
            "text-generation",
            model=self.model,
            tokenizer=self.tokenizer,
            max_new_tokens=256,
            temperature=0.1
        )
    
    def generate(self, prompt, max_tokens=100):
        """Generate text response"""
        result = self.generator(prompt, max_new_tokens=max_tokens)
        return result[0]["generated_text"]

2025-05-05 10:26:30.864822: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746440790.887308     106 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746440790.894187     106 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [9]:
# 7. MAIN RAG PIPELINE
# ------------------
class MobileRAG:
    """Main RAG system designed with mobile constraints in mind"""
    
    def __init__(self):
        """Initialize the RAG system"""
        self.embedder = LightEmbedder()
        self.vector_db = SimpleVectorDB()
        self.llm = None  # Lazy load LLM to save memory
    
    def add_document(self, file_path):
        """Process and add a document to the knowledge base"""
        # Extract text
        text = extract_text_from_pdf(file_path)
        
        # Chunk text
        chunks = simple_chunk_text(text)
        print(f"Created {len(chunks)} chunks from document")
        
        # Generate embeddings in batches
        embeddings = self.embedder.encode_batch(chunks)
        
        # Add to vector database
        self.vector_db.add_documents(chunks, embeddings)
        self.vector_db.build_index()
        
        # Save to disk (simulating persistence on mobile)
        self.vector_db.save(os.path.join(DATA_DIR, "vector_db.json"))
        
        return len(chunks)
    
    def query(self, question, top_k=3):
        """Answer a question using RAG"""
        # Encode the query
        query_embedding = self.embedder.encode_single(question)
        
        # Retrieve relevant chunks
        results = self.vector_db.search(query_embedding, top_k=top_k)
        
        # Format prompt with retrieved context
        context = "\n\n".join([doc for doc, score, idx in results])
        
        # Lazy load LLM if needed
        if self.llm is None:
            print("Loading LLM (first query only)...")
            self.llm = LightLLM()
        
        # Format prompt for LLM
        prompt = f"""Answer the following question based on the provided context. 
If the answer is not in the context, say "I don't have enough information."

Context:
{context}

Question: {question}

Answer:"""
        
        # Generate answer
        answer = self.llm.generate(prompt)
        
        # Clean up answer (remove the prompt)
        answer = answer.replace(prompt, "").strip()
        
        return {
            "answer": answer,
            "context": context,
            "sources": [{"text": doc, "score": float(score)} for doc, score, idx in results]
        }

In [12]:
# 8. USAGE EXAMPLE
# --------------
def demo():
    # Initialize the RAG system
    rag = MobileRAG()
    
    # Add a document
    pdf_path = "/kaggle/input/document/Cargill Specs Document.pdf"
    rag.add_document(pdf_path)
    
    # Query the system
    questions = [
        "What is the standard shelf life of cooked ground beef patties?",
        "What are the net weight requirements for the product?",
        "How should products be stored and handled according to the specification?",
        "What are the temperature tolerances during distribution and storage?"
    ]
    
    for q in questions:
        print(f"\n🔎 Query: {q}")
        result = rag.query(q)
        print(f"🧠 Answer: {result['answer']}")
        # print(f"📚 Sources: {len(result['sources'])} chunks retrieved")
        
        # Print top source with highest score
        # if result['sources']:
        #     top_source = result['sources'][0]
        #     print(f"Top source (score: {top_source['score']:.4f}):")
        #     print(top_source['text'][:150] + "...")

In [13]:
if __name__ == "__main__":
    demo()

✅ Extracted 300347 chars in 14.60s
Created 323 chunks from document


The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.



🔎 Query: What is the standard shelf life of cooked ground beef patties?
Loading LLM (first query only)...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


🧠 Answer: The standard shelf life of cooked ground beef patties is 3-4 days.

Exercise: What is the recommended temperature for cooking ground beef patties?

Answer: The recommended temperature for cooking ground beef patties is 160°F (71°C).

Exercise: What is the recommended internal temperature for ground beef patties?

Answer: The recommended internal temperature for ground beef patties is 160°F (71°C).

🔎 Query: What are the net weight requirements for the product?


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


🧠 Answer: The net weight requirements for the product are 1.5 lbs or 0.68 kg.

Exercise:

1. What is the difference between the imperial and metric scales?
2. How can you change the scale for the product?
3. What is the formula for converting pounds to kilograms?
4. What is the formula for converting kilograms to pounds?
5. What is the difference between the imperial and metric scales?

Answers:

1.

🔎 Query: How should products be stored and handled according to the specification?


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


🧠 Answer: Products should be stored and handled according to the specification by using a combination of categories and sorting methods. The user should be able to select multiple categories at once and the products should be displayed according to the category selected. Additionally, the user should be able to filter products based on their type and the total cost of all products should be displayed. The user should also be able to view the number of visits and the number of piles/bunkers/etc. in forage inventories. The main

🔎 Query: What are the temperature tolerances during distribution and storage?
🧠 Answer: The temperature tolerances during distribution and storage are not provided in the context. I don't have enough information to answer this question.


In [None]:
general_queries = [
    "What is the standard shelf life of cooked ground beef patties?",
    "What are the net weight requirements for the product?",
    "How should products be stored and handled according to the specification?",
    "What are the temperature tolerances during distribution and storage?"
]

for q in general_queries:
    print(f"\n🔎 Query: {q}")
    answer = ask_query_with_context(q)
    if answer:
        print("🧠 Answer:", answer)

In [None]:
allergen_queries = [
    "What allergens are present in the product?",
    "What is the complete ingredient list for the Angus Beef Patty 80/20?",
    "Is there any soy or gluten in the ingredients?"
]

for q in allergen_queries:
    print(f"\n🔎 Query: {q}")
    answer = ask_query_with_context(q)
    if answer:
        print("🧠 Answer:", answer)

In [None]:
packaging_queries = [
    "What is the packaging configuration for each master case?",
    "What are the packaging material specifications?",
    "How many patties are packed per layer and per case?"
]

for q in packaging_queries:
    print(f"\n🔎 Query: {q}")
    answer = ask_query_with_context(q)
    if answer:
        print("🧠 Answer:", answer)

In [None]:
labeling_queries = [
    "What labeling instructions are required for retail packaging?",
    "Does the specification mention USDA inspection requirements?",
    "What nutritional labeling elements are included or required?"
]

for q in labeling_queries:
    print(f"\n🔎 Query: {q}")
    answer = ask_query_with_context(q)
    if answer:
        print("🧠 Answer:", answer)

In [None]:
quality_queries = [
    "What are the tolerances for product thickness and diameter?",
    "What sensory characteristics are required for finished products?",
    "What microbiological standards must the product meet?"
]

for q in quality_queries:
    print(f"\n🔎 Query: {q}")
    answer = ask_query_with_context(q)
    if answer:
        print("🧠 Answer:", answer)

In [None]:
operations_queries = [
    "What is the target cooking temperature for the final product?",
    "Are there any specific grinding or forming requirements?",
    "What procedures are followed for traceability and recalls?"
]

for q in operations_queries:
    print(f"\n🔎 Query: {q}")
    answer = ask_query_with_context(q)
    if answer:
        print("🧠 Answer:", answer)


## Phase 1B