# Cell 1: Install dependencies

In [1]:

!pip -q install --upgrade pip
!pip -q install "transformers>=4.44" "accelerate>=0.33" "bitsandbytes>=0.43.1" \
                 "flask>=3.0" "pyngrok>=7.2" "uvicorn>=0.30" "nest-asyncio>=1.6.0" \
                 "pyyaml>=6.0" "requests>=2.31"


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.8 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m85.5 MB/s[0m eta [36m0:00:00[0m
[?25h

#Cell 2: Import libraries and setup

In [2]:
# Cell 2: Import libraries and setup
import os, json, time, re, math, threading, yaml
from typing import List, Dict, Any, Tuple

import torch
from flask import Flask, request, jsonify
from pyngrok import ngrok
import nest_asyncio

from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", DEVICE)
if DEVICE == "cuda":
    print("GPU:", torch.cuda.get_device_name(0))
    print("VRAM (GB):", round(torch.cuda.get_device_properties(0).total_memory / 1e9, 2))
torch.manual_seed(42)

Device: cuda
GPU: Tesla T4
VRAM (GB): 15.83


<torch._C.Generator at 0x7c4023b36830>

# Cell 3: Load Configuration (16 Policies + Assistant Settings)

In [10]:
# Cell 3: Load Configuration (hardcoded)

# Ground Truth Policies
GROUND_TRUTH = [
    {
        "id": "Policy3.1",
        "question": "What is your return policy?",
        "answer": "Items can be returned within 30 days of purchase with original receipt. All items must be in original condition with tags attached. Refunds are processed within 5-7 business days.",
        "category": "returns",
        "lastUpdated": "2025-10-01T00:00:00Z"
    },
    {
        "id": "Shipping2.1",
        "question": "What are your shipping options?",
        "answer": "We offer Standard (5-7 days, $5.99), Express (2-3 days, $12.99), and Overnight ($24.99) shipping. Free shipping on orders over $50.",
        "category": "shipping",
        "lastUpdated": "2025-10-01T00:00:00Z"
    },
    {
        "id": "Shipping2.2",
        "question": "How can I track my order?",
        "answer": "Track your order from the 'My Orders' dashboard. You'll receive tracking updates via email, SMS, or push notification. You can also drop your orderID in the support chat to get order status. Tracking numbers are provided within 24 hours of dispatch.",
        "category": "shipping",
        "lastUpdated": "2025-10-01T00:00:00Z"
    },
    {
        "id": "Shipping2.3",
        "question": "What happens if my delivery is delayed?",
        "answer": "You'll receive immediate notification of any delays with revised delivery estimates. Delays due to customs, weather, or carrier issues are communicated promptly.",
        "category": "shipping",
        "lastUpdated": "2025-10-01T00:00:00Z"
    },
    {
        "id": "Returns3.2",
        "question": "Which items cannot be returned?",
        "answer": "Digital goods, perishable food, hygiene-sensitive products (cosmetics), and personalized/custom items cannot be returned. These restrictions are displayed at checkout.",
        "category": "returns",
        "lastUpdated": "2025-10-01T00:00:00Z"
    },
    {
        "id": "Returns3.3",
        "question": "How do I initiate a return?",
        "answer": "Select the item in your account dashboard, state the reason, and receive a return authorization number (RAN). Include the RAN with your package and ship using a trackable method.",
        "category": "returns",
        "lastUpdated": "2025-10-01T00:00:00Z"
    },
    {
        "id": "Payment4.1",
        "question": "What payment methods do you accept?",
        "answer": "We accept credit/debit cards, PayPal, Apple Pay, Google Pay, and regional methods like bank transfers or cash-on-delivery depending on location.",
        "category": "payments",
        "lastUpdated": "2025-10-01T00:00:00Z"
    },
    {
        "id": "Payment4.2",
        "question": "Are my payment details secure?",
        "answer": "All transactions use PCI-DSS compliant gateways with encryption. Payment details are tokenized and never exposed to sellers. Two-factor authentication is available for added security.",
        "category": "payments",
        "lastUpdated": "2025-10-01T00:00:00Z"
    },
    {
        "id": "Payment4.3",
        "question": "How long do refunds take?",
        "answer": "Refunds are processed to the original payment method within 5-7 business days after approval. You'll receive confirmation when the refund is initiated.",
        "category": "payments",
        "lastUpdated": "2025-10-01T00:00:00Z"
    },
    {
        "id": "Account1.1",
        "question": "How do I create an account?",
        "answer": "Register at the signup page with your email, password, and profile details. Click the verification link sent to your email within 24 hours to activate your account.",
        "category": "account",
        "lastUpdated": "2025-10-01T00:00:00Z"
    },
    {
        "id": "Account1.2",
        "question": "What security features are available?",
        "answer": "Password requirements include upper/lowercase letters, numbers, and special characters. Two-factor authentication (2FA) via SMS or authenticator apps is available. Security alerts are sent for unusual login activity.",
        "category": "account",
        "lastUpdated": "2025-10-01T00:00:00Z"
    },
    {
        "id": "Account1.3",
        "question": "How do I recover my account?",
        "answer": "Account recovery requires email confirmation and security question answers. Accounts inactive for 12+ months are flagged for re-verification.",
        "category": "account",
        "lastUpdated": "2025-10-01T00:00:00Z"
    },
    {
        "id": "Warranty5.1",
        "question": "What is covered under warranty?",
        "answer": "Manufacturer defects and malfunctions during the warranty period are covered. Warranty terms vary by product and are displayed on product pages.",
        "category": "warranty",
        "lastUpdated": "2025-10-01T00:00:00Z"
    },
    {
        "id": "Warranty5.2",
        "question": "How do I make a warranty claim?",
        "answer": "Contact support with your order number and proof of purchase. Include photos or descriptions of the defect. Approved claims receive replacement or repair instructions.",
        "category": "warranty",
        "lastUpdated": "2025-10-01T00:00:00Z"
    },
    {
        "id": "Privacy6.1",
        "question": "How is my personal data protected?",
        "answer": "We comply with GDPR, CCPA, and regional data protection laws. All data is encrypted in transit and at rest. You can download or delete your personal data anytime.",
        "category": "privacy",
        "lastUpdated": "2025-10-01T00:00:00Z"
    },
    {
        "id": "Privacy6.2",
        "question": "Can I control how my data is used?",
        "answer": "Yes. You can opt out of marketing, control cookie preferences, and manage third-party data sharing in your privacy settings.",
        "category": "privacy",
        "lastUpdated": "2025-10-01T00:00:00Z"
    }
]

# Configuration from prompts.yaml
CONFIG = {
    'assistant': {
        'name': 'Shoppy',
        'role': 'Shoplite Support Specialist',
        'personality': [
            'Professional yet friendly',
            'Solution-oriented',
            'Patient and helpful',
            'Clear and concise'
        ],
        'never_say': [
            "I'm an AI",
            "I'm ChatGPT",
            "I'm Claude",
            "I'm Llama",
            "As an artificial intelligence",
            "I'm a language model",
            "I'm a bot"
        ],
        'identity_response': "I'm Shoppy, a Shoplite support specialist. I'm here to help you with orders, returns, shipping, and account questions."
    },
    'intents': {
        'policy_question': {
            'description': 'Questions about policies, shipping, returns, warranties, privacy',
            'behavior': 'Search knowledge base, cite policy IDs, provide accurate information',
            'tone': 'Professional, informative'
        },
        'order_status': {
            'description': 'Tracking orders, checking status',
            'behavior': 'Call getOrderStatus function with order ID',
            'tone': 'Helpful, reassuring'
        },
        'product_search': {
            'description': 'Finding products, searching inventory',
            'behavior': 'Call searchProducts function',
            'tone': 'Enthusiastic, helpful'
        },
        'complaint': {
            'description': 'Customer complaints, issues, frustrations',
            'behavior': 'Acknowledge empathetically, offer solutions, escalate if needed',
            'tone': 'Empathetic, solution-focused'
        }
    },
    'response_guidelines': {
        'max_response_length': 300,
        'use_citations': True,
        'citation_format': '[PolicyID]',
        'always_ground_policies': True,
        'max_function_calls': 2,
        'timeout_seconds': 5
    }
}

print(f"✅ Loaded {len(GROUND_TRUTH)} policy documents")
print(f"   Sample policies: {[p['id'] for p in GROUND_TRUTH[:3]]}")
print(f"✅ Loaded configuration")
print(f"   Assistant: {CONFIG['assistant']['name']} - {CONFIG['assistant']['role']}")
print(f"   Personality: {', '.join(CONFIG['assistant']['personality'][:3])}")
print(f"   Available intents: {len(CONFIG['intents'])} defined")
print("\n🎯 Configuration loaded successfully!")

✅ Loaded 16 policy documents
   Sample policies: ['Policy3.1', 'Shipping2.1', 'Shipping2.2']
✅ Loaded configuration
   Assistant: Shoppy - Shoplite Support Specialist
   Personality: Professional yet friendly, Solution-oriented, Patient and helpful
   Available intents: 4 defined

🎯 Configuration loaded successfully!


# Cell 4: System Prompts for LLM

In [4]:
# Cell 4: System Prompts for LLM

# Build system prompt using the loaded CONFIG
def build_system_prompt(intent: str, policies: list) -> str:
    """Build system prompt using CONFIG from prompts.yaml"""

    assistant_info = CONFIG['assistant']
    name = assistant_info['name']
    role = assistant_info['role']

    # Build policy context
    policy_context = "\n\n".join([
        f"**{p['id']}: {p['question']}**\n{p['answer']}"
        for p in policies
    ])

    system_prompt = f"""You are {name}, a helpful {role} at Shoplite.

Your job is to answer customer questions using the policy information provided below. Always cite relevant policies using [PolicyID] format.

Keep responses natural, helpful, and concise (2-3 sentences maximum).

**Available Policies:**
{policy_context}

Instructions:
- Answer using ONLY the policy information above
- Include policy citations like [Policy3.1] when relevant
- If the policies don't cover the question, say you don't have that information
- Be conversational and helpful, not robotic
- Do not mention that you are an AI or language model"""

    return system_prompt


# Intent-specific prompt templates
INTENT_PROMPTS = {
    'policy_question': 'Answer this customer question about our policies:',
    'order_status': 'Help the customer with their order inquiry:',
    'product_search': 'Help the customer find products:',
    'complaint': 'Address this customer concern professionally:',
}


def generate_llm_response(intent: str, message: str, relevant_policies: list) -> dict:
    """Generate response using Qwen model with policy grounding"""

    system_prompt = build_system_prompt(intent, relevant_policies)
    intent_instruction = INTENT_PROMPTS.get(intent, 'Help this customer:')

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": f"{intent_instruction} {message}"}
    ]

    # Generate response
    response_text = chat_complete(messages, max_new_tokens=200, temperature=0.3)

    # Extract citations
    citations = re.findall(r'\[([A-Za-z]+\d+\.\d+)\]', response_text)
    citations = list(set(citations))  # Deduplicate

    return {
        'response': response_text,
        'citations': citations,
        'policiesUsed': [p['id'] for p in relevant_policies]
    }


print("✅ System prompts configured")
print(f"   Assistant: {CONFIG['assistant']['name']}")
print(f"   Supported intents: {list(INTENT_PROMPTS.keys())}")

✅ System prompts configured
   Assistant: Shoppy
   Supported intents: ['policy_question', 'order_status', 'product_search', 'complaint']


# Cell 5: LLM loading and setup

In [5]:
MODEL_NAME = "Qwen/Qwen2.5-7B-Instruct"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True
)

tokenizer = AutoTokenizer.from_pretrained(
    MODEL_NAME,
    use_fast=True,
    trust_remote_code=True
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float16,
    device_map="auto",
    quantization_config=bnb_config,
    trust_remote_code=True
)
model.eval()

# Ensure pad/eos tokens are set to avoid warnings
if tokenizer.pad_token is None and tokenizer.eos_token is not None:
    tokenizer.pad_token = tokenizer.eos_token

def chat_complete(messages: List[Dict[str,str]], max_new_tokens=512, temperature=0.1) -> str:
    """
    messages: [{"role":"system"/"user"/"assistant", "content":"..."}]
    Returns ONLY the assistant's response text.
    """
    prompt = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=False if temperature == 0 else True,
            temperature=temperature,
            repetition_penalty=1.05,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.pad_token_id
        )

    # CRITICAL FIX: Extract only the new generated tokens
    new_tokens = outputs[0][inputs['input_ids'].shape[1]:]
    response = tokenizer.decode(new_tokens, skip_special_tokens=True).strip()

    # Additional cleanup for any remaining artifacts
    if response.startswith(('system', 'user', 'assistant')):
        lines = response.split('\n')
        response = '\n'.join(lines[1:]).strip()

    return response

def force_json(output_text: str) -> str:
    """
    Try to extract the first JSON object from arbitrary text.
    Handles code fences and extra commentary. Falls back to raw text.
    """
    txt = output_text.strip()

    # Strip code fences if present
    if txt.startswith("```"):
        # remove the first fence
        first = txt.find("\n")
        if first != -1:
            txt = txt[first+1:]
        # remove a trailing fence
        if txt.endswith("```"):
            txt = txt[:-3].strip()

    # Fast regex (non-greedy) attempt
    m = re.search(r"\{[\s\S]*?\}", txt)
    if m:
        return m.group(0).strip()

    # Brace-walking fallback (first balanced object)
    depth = 0
    start = -1
    in_str = False
    esc = False
    for i, ch in enumerate(txt):
        if ch == '"' and not esc:
            in_str = not in_str
        esc = (ch == '\\' and not esc) if in_str else False

        if in_str:
            continue
        if ch == '{':
            if depth == 0:
                start = i
            depth += 1
        elif ch == '}':
            if depth > 0:
                depth -= 1
                if depth == 0 and start != -1:
                    return txt[start:i+1].strip()

    # Nothing found; return raw (validator will handle)
    return txt


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/663 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/3.86G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/3.56G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/3.86G [00:00<?, ?B/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/3.95G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/243 [00:00<?, ?B/s]

# Cell 6: Policy Matching and Flask API

In [11]:
# Cell 6: Policy Matching, Function Registry, and Flask API

from flask import Flask, request, jsonify
import requests
import traceback

app = Flask(__name__)

# Backend API URL (update this with your backend URL or IP address)
BACKEND_URL = "https://api-backend-hxfy.onrender.com/api"  # Render.com backend
ENABLE_FUNCTIONS = True  # Backend is publicly accessible from Google Colab

# Function Registry - Functions that can call backend APIs
class FunctionRegistry:
    def __init__(self):
        self.functions = {}

    def register(self, name: str, func: callable, schema: dict):
        """Register a function with its schema"""
        self.functions[name] = {'function': func, 'schema': schema}

    def get_all_schemas(self) -> list:
        """Get all function schemas for LLM"""
        return [f['schema'] for f in self.functions.values()]

    def execute(self, name: str, **kwargs) -> dict:
        """Execute a registered function"""
        if name not in self.functions:
            return {'success': False, 'error': f"Function '{name}' not registered"}
        try:
            return self.functions[name]['function'](**kwargs)
        except Exception as e:
            # Return traceback for easier debugging in Colab logs
            tb = traceback.format_exc()
            return {'success': False, 'error': str(e), 'trace': tb}

registry = FunctionRegistry()

# Function 1: Track Order Status
def track_order(order_id: str) -> dict:
    """Call backend API to get order status"""
    try:
        response = requests.get(f"{BACKEND_URL}/orders/{order_id}", timeout=5)
        if response.ok:
            data = response.json()
            return {
                'success': True,
                'order_id': order_id,
                'status': data.get('status', 'UNKNOWN'),
                'carrier': data.get('carrier'),
                'tracking_number': data.get('trackingNumber'),
                'estimated_delivery': data.get('estimatedDelivery')
            }
        else:
            return {'success': False, 'error': f"Order {order_id} not found", 'status_code': response.status_code}
    except Exception as e:
        return {'success': False, 'error': f"Could not connect to order system: {str(e)}"}

registry.register('trackOrder', track_order, {
    'name': 'trackOrder',
    'description': 'Get the current status and tracking information for an order',
    'parameters': {
        'order_id': {
            'type': 'string',
            'description': 'Order ID (format: ORD-XXXXXX)',
            'required': True
        }
    }
})

# Function 2: Submit Complaint
def submit_complaint(user_id: str, order_id: str, complaint_text: str, category: str = 'general') -> dict:
    """Submit a customer complaint to backend"""
    try:
        payload = {
            'userId': user_id,
            'orderId': order_id,
            'complaint': complaint_text,
            'category': category,
            'source': 'ai_assistant',
            'priority': 'high' if any(word in complaint_text.lower() for word in ['damaged', 'broken', 'wrong', 'missing']) else 'medium'
        }
        response = requests.post(f"{BACKEND_URL}/complaints", json=payload, timeout=5)
        if response.ok:
            data = response.json()
            return {
                'success': True,
                'complaint_id': data.get('id', 'COMP-' + str(int(time.time()))),
                'message': 'Complaint submitted successfully. Our team will contact you within 24 hours.'
            }
        else:
            return {'success': False, 'error': 'Could not submit complaint', 'status_code': response.status_code}
    except Exception as e:
        return {'success': False, 'error': f"Could not connect to complaint system: {str(e)}"}

registry.register('submitComplaint', submit_complaint, {
    'name': 'submitComplaint',
    'description': 'Submit a customer complaint about an order or service issue',
    'parameters': {
        'user_id': {
            'type': 'string',
            'description': 'Customer user ID',
            'required': True
        },
        'order_id': {
            'type': 'string',
            'description': 'Related order ID if applicable',
            'required': False
        },
        'complaint_text': {
            'type': 'string',
            'description': 'Description of the complaint',
            'required': True
        },
        'category': {
            'type': 'string',
            'description': 'Category: damaged_item, wrong_item, late_delivery, poor_service, general',
            'required': False
        }
    }
})

# Function 3: Search Products (robust to different backend response shapes)
def search_products(query: str, limit: int = 5) -> dict:
    """Search for products in the catalog and handle both list and paginated responses"""
    try:
        params = {'search': query, 'limit': limit}
        response = requests.get(f"{BACKEND_URL}/products", params=params, timeout=5)
        if not response.ok:
            return {'success': False, 'error': 'Could not search products', 'status_code': response.status_code}

        body = None
        try:
            body = response.json()
        except Exception as e:
            return {'success': False, 'error': f'Invalid JSON from product catalog: {str(e)}'}

        # Normalize to a list of product dicts
        products_list = []
        if isinstance(body, list):
            products_list = body
        elif isinstance(body, dict):
            # Common pagination shapes: {products: [...], pagination: {...}} or {products: {...}}
            if 'products' in body and isinstance(body['products'], list):
                products_list = body['products']
            elif 'data' in body and isinstance(body['data'], list):
                products_list = body['data']
            elif 'items' in body and isinstance(body['items'], list):
                products_list = body['items']
            else:
                # Try to find the first list value in the dict
                for v in body.values():
                    if isinstance(v, list):
                        products_list = v
                        break
                # If still not found but body looks like a single product dict, wrap it
                if not products_list and all(isinstance(v, (str, int, float, bool, dict, list, type(None))) for v in body.values()):
                    # Heuristic: if keys include 'name' or 'price' treat as single product
                    if 'name' in body or 'price' in body:
                        products_list = [body]

        # Ensure limit is int
        try:
            limit_n = int(limit)
        except Exception:
            limit_n = 5

        # Slice safely
        sliced = products_list[:limit_n] if isinstance(products_list, list) else []

        products_out = []
        for p in sliced:
            if isinstance(p, dict):
                products_out.append({
                    'name': p.get('name') or p.get('title') or p.get('productName'),
                    'price': p.get('price'),
                    'in_stock': bool(p.get('stock', 0))
                })
            else:
                products_out.append({'name': str(p), 'price': None, 'in_stock': False})

        return {
            'success': True,
            'count': len(products_list) if isinstance(products_list, list) else (1 if products_list else 0),
            'products': products_out
        }

    except Exception as e:
        # Return repr for clearer debugging in Colab logs
        return {'success': False, 'error': f"Could not connect to product catalog: {repr(e)}"}

registry.register('searchProducts', search_products, {
    'name': 'searchProducts',
    'description': 'Search for products by name or keyword',
    'parameters': {
        'query': {
            'type': 'string',
            'description': 'Search query (product name or keywords)',
            'required': True
        },
        'limit': {
            'type': 'integer',
            'description': 'Maximum number of results (default: 5)',
            'required': False
        }
    }
})

print(f"✅ Function registry initialized: {list(registry.functions.keys())}")
print(f"⚠️  Function calling enabled: {ENABLE_FUNCTIONS}")
if not ENABLE_FUNCTIONS:
    print("   To enable functions, set ENABLE_FUNCTIONS = True and configure BACKEND_URL")


# Keyword-based policy matching (NO embeddings)
def find_relevant_policies(message: str, intent: str, max_results: int = 3) -> list:
    """Find relevant policies using keyword matching based on category"""

    message_lower = message.lower()

    # Category keywords for matching
    category_keywords = {
        'returns': ['return', 'refund', 'exchange', 'money back', 'send back'],
        'shipping': ['ship', 'delivery', 'track', 'carrier', 'how long', 'transit', 'arrive'],
        'payments': ['payment', 'pay', 'credit card', 'secure', 'transaction', 'billing'],
        'warranty': ['warranty', 'guarantee', 'defect', 'malfunction', 'broken'],
        'privacy': ['privacy', 'data', 'personal information', 'gdpr', 'security'],
        'account': ['account', 'register', 'login', 'password', 'profile'],
        'orders': ['order', 'purchase', 'checkout', 'cart', 'buy'],
        'products': ['product', 'item', 'search', 'find', 'available', 'stock'],
    }

    # Find matching categories
    matched_categories = []
    for category, keywords in category_keywords.items():
        if any(kw in message_lower for kw in keywords):
            matched_categories.append(category)

    # If no categories matched, return all policies (let LLM decide)
    if not matched_categories:
        matched_categories = list(category_keywords.keys())

    # Filter policies by matched categories
    relevant = []
    for policy in GROUND_TRUTH:
        if policy.get('category', '').lower() in matched_categories:
            relevant.append(policy)

    # Return top results
    return relevant[:max_results]


# Detect if function should be called based on intent and message
def should_call_function(intent: str, message: str) -> tuple:
    """Determine if a function should be called and which one"""

    # Skip function calling if disabled
    if not ENABLE_FUNCTIONS:
        return (None, None)

    message_lower = message.lower()

    # Order tracking patterns
    if intent == 'order_status':
        order_match = re.search(r'\b(ORD|ORDER)[-_]?([A-Z0-9]{6,})\b', message, re.IGNORECASE)
        if order_match:
            order_id = f"ORD-{order_match.group(2).upper()}"
            return ('trackOrder', {'order_id': order_id})

    # Complaint submission patterns
    if intent == 'complaint':
        # Extract order ID if present
        order_match = re.search(r'\b(ORD|ORDER)[-_]?([A-Z0-9]{6,})\b', message, re.IGNORECASE)
        order_id = f"ORD-{order_match.group(2).upper()}" if order_match else None

        # Determine category
        category = 'general'
        if any(word in message_lower for word in ['damaged', 'broken', 'defective']):
            category = 'damaged_item'
        elif any(word in message_lower for word in ['wrong', 'incorrect', 'different']):
            category = 'wrong_item'
        elif any(word in message_lower for word in ['late', 'delayed', 'slow']):
            category = 'late_delivery'
        elif any(word in message_lower for word in ['rude', 'unprofessional', 'service']):
            category = 'poor_service'

        return ('submitComplaint', {
            'user_id': 'TEMP_USER',  # Backend will provide this
            'order_id': order_id,
            'complaint_text': message,
            'category': category
        })

    # Product search patterns
    if intent == 'product_search':
        return ('searchProducts', {'query': message, 'limit': 5})

    return (None, None)


# Flask endpoints
@app.route("/health", methods=["GET"])
def health():
    return jsonify({
        "status": "ok",
        "device": DEVICE,
        "model": "Qwen/Qwen2.5-7B-Instruct",
        "policies_loaded": len(GROUND_TRUTH),
        "functions_available": list(registry.functions.keys()) if ENABLE_FUNCTIONS else [],
        "functions_enabled": ENABLE_FUNCTIONS,
        "assistant": CONFIG['assistant']['name'],
        "time": time.time()
    })


@app.route("/chat", methods=["POST"])
def chat():
    """Main endpoint called by backend with masked message and intent"""

    payload = request.get_json(force=True, silent=True) or {}

    # Extract request data
    message = (payload.get('message') or '').strip()
    intent = payload.get('intent', 'policy_question')
    user_id = payload.get('userId', 'TEMP_USER')

    if not message:
        return jsonify({
            "text": "No message provided",
            "citations": [],
            "functionsCalled": [],
            "responseTime": 0
        }), 400

    start_time = time.time()
    functions_called = []



✅ Function registry initialized: ['trackOrder', 'submitComplaint', 'searchProducts']
⚠️  Function calling enabled: True


# Cell 7: Start ngrok tunnel (for Colab LLM endpoint)

**Note**: Your backend is already hosted on Render.com at `https://api-backend-hxfy.onrender.com`
This ngrok tunnel is only for exposing the Colab LLM server to your local machine.

In [7]:
# Cell 7: Start ngrok tunnel (for Colab LLM endpoint)

ngrok_token = input("Enter your ngrok authtoken: ").strip()
assert ngrok_token, "❌ ngrok token is required"

ngrok.set_auth_token(ngrok_token)

# Close any existing tunnels
for tunnel in ngrok.get_tunnels():
    ngrok.disconnect(tunnel.public_url)

# Create new tunnel on port 5000 (for the LLM server)
public_url = ngrok.connect(5000, bind_tls=True).public_url

print("=" * 80)
print("✅ ngrok tunnel created successfully!")
print("=" * 80)
print(f"\n🌐 Colab LLM Public URL: {public_url}")
print(f"\n📋 UPDATE YOUR LOCAL .env FILE:")
print(f"   LLM_ENDPOINT={public_url}/chat")
print("\n✅ Backend Configuration:")
print("   Backend URL: https://api-backend-hxfy.onrender.com/api")
print("   Functions: ENABLED (Colab can reach Render.com)")
print("   No backend tunnel needed - already hosted!")
print("\n🔄 Architecture:")
print("   Local → Render.com Backend → Colab LLM (this tunnel)")
print("              ↓")
print("   Colab calls Render.com APIs for function calling")
print("\n" + "=" * 80)

Enter your ngrok authtoken: 2vzON1XDvRiU7Uf9wsWX0VkhsLu_6pvs1mnWCvDk4EKPYxTjA
✅ ngrok tunnel created successfully!

🌐 Colab LLM Public URL: https://c6aae9f34a6a.ngrok-free.app

📋 UPDATE YOUR LOCAL .env FILE:
   LLM_ENDPOINT=https://c6aae9f34a6a.ngrok-free.app/chat

✅ Backend Configuration:
   Backend URL: https://api-backend-hxfy.onrender.com/api
   Functions: ENABLED (Colab can reach Render.com)
   No backend tunnel needed - already hosted!

🔄 Architecture:
   Local → Render.com Backend → Colab LLM (this tunnel)
              ↓
   Colab calls Render.com APIs for function calling



# Cell 8: Start Flask server and test

In [8]:
# Cell 8: Start Flask server and test

import requests
import threading

# Start Flask in background thread
server_thread = threading.Thread(target=run_flask, daemon=True)
server_thread.start()

print("🚀 Starting Flask server on port 5000...")
time.sleep(3)  # Give server time to start

# Test the server
try:
    # Test health endpoint
    health_resp = requests.get(f"{public_url}/health", timeout=5)
    if health_resp.ok:
        print("✅ Health check passed")
        print(f"   {health_resp.json()}")

    # Test chat endpoint
    print("\n🧪 Testing chat endpoint...")
    test_payload = {
        "message": "How long does shipping take?",
        "intent": "policy_question"
    }

    chat_resp = requests.post(f"{public_url}/chat", json=test_payload, timeout=10)
    if chat_resp.ok:
        data = chat_resp.json()
        print("✅ Chat endpoint working")
        print(f"\n   Response: {data['text'][:100]}...")
        print(f"   Citations: {data['citations']}")
        print(f"   Response time: {data['responseTime']}s")
    else:
        print(f"⚠️ Chat test failed: {chat_resp.status_code}")

except Exception as e:
    print(f"⚠️ Test failed: {e}")

print("\n" + "=" * 80)
print("✅ Server is running!")
print("=" * 80)
print(f"\n🌐 Your backend should call: {public_url}/chat")
print("\nExpected request format:")
print('  {"message": "user question here", "intent": "policy_question"}')
print("\n💡 Keep this cell running - the server will stay active")
print("=" * 80)

🚀 Starting Flask server on port 5000...
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://172.28.0.12:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [23/Oct/2025 04:53:22] "GET /health HTTP/1.1" 200 -


✅ Health check passed
   {'assistant': 'Shoppy', 'device': 'cuda', 'functions_available': ['trackOrder', 'submitComplaint', 'searchProducts'], 'functions_enabled': True, 'model': 'Qwen/Qwen2.5-7B-Instruct', 'policies_loaded': 16, 'status': 'ok', 'time': 1761195202.4669783}

🧪 Testing chat endpoint...


INFO:werkzeug:127.0.0.1 - - [23/Oct/2025 04:53:30] "POST /chat HTTP/1.1" 200 -


✅ Chat endpoint working

   Response: Shipping times depend on the option you choose. For Standard shipping, expect delivery in 5-7 days [...
   Citations: []
   Response time: 7.29s

✅ Server is running!

🌐 Your backend should call: https://c6aae9f34a6a.ngrok-free.app/chat

Expected request format:
  {"message": "user question here", "intent": "policy_question"}

💡 Keep this cell running - the server will stay active


## ✅ Setup Complete!

Your LLM endpoint is now running with **Function Calling** to your **Render.com backend**!

### Configuration Summary:

✅ **Backend**: `https://api-backend-hxfy.onrender.com/api` (Render.com - publicly accessible)
✅ **LLM Server**: Running on Google Colab (this notebook)
✅ **Function Calling**: ENABLED - Colab can reach your Render.com backend

### Next Steps:

1. **Copy the ngrok URL** from Cell 7 output (looks like `https://abc123.ngrok.io`)

2. **Update your LOCAL .env file** with:
   ```
   LLM_ENDPOINT=https://your-colab-ngrok-url.ngrok.io/chat
   ```

3. **Test the full system** with:
   ```bash
   node test-assistant-local.js
   ```

### Function Calling Features:

✨ **Three Functions Available:**

1. **trackOrder** - Get real-time order status
   - Triggered on: `order_status` intent
   - Calls: `GET https://api-backend-hxfy.onrender.com/api/orders/{orderId}`
   - Example: "Track my order ORD-123456"

2. **submitComplaint** - File customer complaints
   - Triggered on: `complaint` intent
   - Calls: `POST https://api-backend-hxfy.onrender.com/api/complaints`
   - Auto-categorizes: damaged_item, wrong_item, late_delivery, poor_service, general
   - Example: "My package arrived damaged for order ORD-123456"

3. **searchProducts** - Search product catalog
   - Triggered on: `product_search` intent
   - Calls: `GET https://api-backend-hxfy.onrender.com/api/products?search={query}`
   - Example: "Do you have wireless headphones?"

### How It Works:

- **Local Test** → **Render.com Backend** → **Google Colab LLM**
- **Colab** → Detects if function needed → calls **Render.com APIs** → formats response
- **Render.com** ← Receives grounded response with function results

### Architecture Flow:

```
User Message (Local)
    ↓
Render.com Backend (https://api-backend-hxfy.onrender.com)
    ├─ PII Masking
    ├─ Intent Classification
    ├─ Fast-path (chitchat/violations)
    └─ → Google Colab LLM (via ngrok)
           ↓
        Colab (This Notebook)
           ├─ Function Detection
           ├─ Render.com API Call ← (trackOrder, submitComplaint, searchProducts)
           ├─ Policy Grounding
           └─ LLM Response
                ↓
        Render.com Backend
           └─ Response to User
```

### Perfect Production Setup:

🎯 **No Local Dependencies** - Everything runs in the cloud
🎯 **Scalable** - Both Render.com and Google Colab handle traffic
🎯 **Reliable** - No local tunnels or port forwarding needed

Keep Cell 8 running to keep the server active! 🚀