In [None]:
# Cell 1: Install dependencies
!pip install transformers torch sentence-transformers faiss-cpu flask pyngrok


In [None]:
# Cell 2: Import libraries and setup
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from sentence_transformers import SentenceTransformer
import faiss
from flask import Flask, request, jsonify
from pyngrok import ngrok
import yaml

# Check GPU
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("Device:", torch.cuda.get_device_name(0))


In [None]:
KNOWLEDGE_BASE = [
    {"title": "Shoplite User Registration Process", "content": """To create a Shoplite account, buyers visit the registration page and provide their full name, email address, and a secure password. Users must verify their email within 24 hours via a verification link.

After authentication, buyers can enjoy all shopping functionalities, such as product browsing, product addition to the shopping cart, order placement and order history. Two-factor authentication is optional but recommended in terms of security and it can be enabled by users with the help of an authenticator application or SMS code.

The registration process of sellers has to be conducted in a more thorough way, they have to provide business documents such as a tax ID, bank account information, and a business registration. Shoplite staff checks through these documents and verification usually takes 2-3 business days. Authenticated sellers have the ability to make a listing, maintain inventory, sales analytics cards, and access dashboards. In the process of verification, sellers receive emails informing about documents that are lacking or not fully provided to them.

The password recovery feature of Shoplite is also enabled with a 30 minute reset link. When there are issues with several failed attempts to gain access, then the accounts are disabled. The users can keep track of logins, purchases, and profile modification via activity logs to keep their profile secure.""", "id": "doc1"},


     {"title": "Shoplite Product Search", "content": """The search system on Shoplite is highly effective, enabling users to quickly find items using keywords, categories, price ranges, brands, sellers, and ratings. The search bar features autocomplete, which dynamically updates as the user types, enhancing search accuracy. Users can filter results using multiple parameters simultaneously, such as availability, shipping options, or customer reviews, and sort results by relevance, price, or popularity.

To improve performance and reduce server load, the search system uses a five-minute cache. Users can access their recent searches, save favorite searches, and receive alerts when new products matching their criteria are added. For mobile users, Shoplite provides responsive search results with optimized data requests for fast loading and smooth browsing.

Advanced search is available for power users, allowing complex queries that combine category filters, price ranges, and seller ratings. This feature is particularly useful for buyers searching for niche products or comparing multiple sellers. Developers can access the Shoplite API to integrate search functionality into third-party applications, providing programmatic access to product data.

Search results display product rankings, images, availability, and shipping estimates. Out-of-stock products are clearly indicated, and related products or recommendations are shown when searches are vague or return few results, improving user experience.""", "id": "doc2"},


     {"title": "Shoplite Shopping Cart", "content": """The Shoplite shopping cart will enable buyers to add various items from sellers and manage quantities and save the products to purchase in the future. The products in the cart are stored in between sessions in case of a logged-in user, and the shopping experience is continuous on both desktop and mobile platforms. The user can see an overview of the items displayed, in terms of images, prices and estimated shipping and taxes. Discounts, promotional codes, and loyalty points can be applied directly within the cart before checkout.

Users can edit quantities, remove items, or move products to a wishlist. Cart updates are reflected in real-time, showing updated totals including shipping and taxes. The system prevents adding more items than are currently in stock and provides warnings if an item becomes unavailable after being added to the cart. Shoplite also stores a limited history of recently removed items to allow users to restore them easily if desired.

To the sellers, the shopping cart communicates with the inventory management system enabling automatic updating of stock quantities upon completion of an order. Sellers are notified when the products in the cart are low in stock or when they are temporarily out of stock. Shoplite is used to make sure that multiple buyers can buy simultaneously without overselling at the cost of real time stock verification and atomic transactions.

The cart integrates with Shoplite’s payment system, allowing users to select their preferred payment method by credit card, mobile wallets or cash on delivery and review order summaries before confirming the purchase.The users receive confirmation mail on the items, totals, shipping and approximate delivery dates.""", "id": "doc3"},


     {"title": "Shoplite Checkout Process", "content": """The Shoplite checkout process allows users to securely complete purchases for items in their shopping cart. After reviewing their cart contents, users proceed to the checkout page, where they confirm shipping addresses, select delivery options, and choose a payment method. Available payment methods include credit/debit cards, mobile wallets, and cash on delivery (COD). The system automatically computes the total cost, which includes item prices, shipping charges, taxes, and any discounts or promotion codes that might be applied.

Users can store multiple shipping addresses and select a preferred address at checkout. The system provides approximate delivery dates based on shipping speed and location. First-time buyers are prompted to confirm billing information to avoid errors. Gift messages can be added, loyalty points applied, and final order details reviewed before payment is confirmed. The interface highlights any missing or incomplete information to prevent failed transactions.

Shoplite implements several security measures at checkout. Credit/debit card and mobile wallet payment details are encrypted using industry-standard protocols. High-value orders may require two-factor authentication. Once payment is successfully processed, confirmation emails are sent, including the order summary, payment details, shipping information, and estimated delivery dates.

For sellers, completed orders automatically update inventory levels and record sales data. Shoplite ensures that concurrent purchases are handled correctly, preventing overselling through real-time stock checks and atomic transactions. Refunds and cancellations are supported within defined timeframes, and all transactions are logged for auditing purposes.**Overlap:** This document links to **Document 3: Shoplite Shopping Cart**, since checkout relies on items in the cart. It also connects to **Document 16: Shoplite Promo Codes**, because discounts applied at checkout depend on active codes, and to **Document 2: Shoplite Product Search**, as users must first locate items to purchase.""", "id": "doc4"},


     {"title": "Shoplite Payment Methods", "content": """Shoplite offers various secure methods of payment in order to finalize the purchases done within the shopping cart and check out processes. Users have the opportunity to pay by credit or debit cards issued by the local banks, which is transacted safely through encrypted connection. For buyers who do not want to use cards online, cash on delivery (COD) is accepted, where they can pay on delivery. Furthermore, Shoplite accepts mobile wallets including OMT, so a user can directly pay with a smartphone.

The system will compute the overall cost of a payment, which includes item prices, shipping costs and taxes, along with any discounts or promotions. Before paying, a summarized version of the payment is made visible to the users. Shoplite also allows saving preferred payment methods for faster checkout in future orders, while still requiring user confirmation for security purposes.

To protect financial information, all transactions use industry-standard encryption protocols, preventing unauthorized access during transmission. Large payments might have to undergo further authentication procedures like two-factor authentication. Users are also sent confirmation emails when their payment is successful and these emails include the details of the transaction, order summary, and approximate delivery dates.

To sellers, the successful payments will automatically update the inventory and save the sales information. Orders of cash-on-delivery are set as pending till the delivery is confirmed, so that the completed and pending payments can be tracked correctly.""", "id": "doc5"},


    {"title": "Shoplite Order Tracking", "content": """Shoplite will have a detailed order tracking and delivery system that will keep buyers updated on purchases up until delivery. Once an order is placed, the user is given a tracking number by which that particular order is associated. This number enables them to track the progress of their order in real-time and see the processing, shipment, transit and delivery phases in real time.

Buyers will be able to get tracking details either on their account dashboard or by receiving email and SMS notifications. Shoplite provides approximate delivery time depending on the shipping method and destination. The system has standard and express delivery options, and it will automatically adjust the expected delivery dates in case of any delays that could be caused by weather, stock shortages, or carrier-related problems.

The platform allows the use of various delivery providers to be reliable and flexible. Delivery notifications include detailed information about the package location, expected arrival, and any special instructions. With mobile users, Shoplite sends push notifications to inform its users that the package has been delivered or is about to be delivered.

To sellers, Shoplite incorporates inventory management with order fulfillment. Shipment labels are printable on-site and sellers are notified when any packages are delayed or returned. Orders that are indicated as delivered will update inventory and sales reports automatically. Cancellations and returns are also done effectively through the system whereby, tracking data is updated and notifications sent to the buyers and sellers.""", "id": "doc6"},


     {"title": "Shoplite Return and Refund Policies", "content": """Shoplite provides a clear and structured return and refund policy to ensure buyer satisfaction. Buyers can return products within 14 days of receipt, provided the items are unused, in their original packaging, and accompanied by a receipt or proof of purchase. Certain items, such as digital downloads, perishable goods, and personalized products, are non-returnable.

To initiate a return, users access their account dashboard and select the order they wish to return. They can specify the reason for the return using predefined categories, such as defective items, incorrect products, or dissatisfaction. Once a return request is submitted, Shoplite generates a return authorization and shipping label for physical products. Users can track the status of their return, including label generation, item receipt, and refund processing.

Refunds are typically processed within 5–7 business days after the returned item is received and inspected. Refunds are issued to the original payment method or, upon request, as store credit. Shoplite sends email notifications at each stage to ensure transparency.

For sellers, Shoplite provides guidance for inspecting returned items and updating inventory. Refunds are automatically deducted from the seller’s account balance. Records of returns and refunds are maintained for auditing and reporting purposes.

Shoplite encourages buyers to contact customer support for issues such as missing shipping labels or delayed refunds. A transparent return policy helps maintain buyer confidence and minimizes conflicts between buyers and sellers.""", "id": "doc7"},


    {"title": "Shoplite Product Reviews", "content": """Shoplite gives buyers the option to leave comprehensive reviews and ratings on products purchased so that other users may make informed choices. Users are also allowed to rate products on a 1 to 5 stars scale after making a purchase followed by the text review of the experience. The commentary in reviews can involve comments on quality of products, speed of shipping and responsiveness of the seller. The purchasers have the option of adding pictures to their reviews as well.

To guarantee quality of reviews, Shoplite will only allow the user to leave a review after buying the product. Reviews are moderated automatically using AI to detect inappropriate language, spam, or false claims. Marked reviews are forwarded to the moderation team where they are manually verified. Verified reviews are displayed with the reviewer’s name, purchase date, and optional profile information, ensuring transparency and trustworthiness.

For the sellers, Shoplite offers a dashboard where they can track the reviews and rating of their products. The ratings are compiled into an average score, which is presented on the product page, and the trends over time are analyzed to give sellers insight into customer satisfaction. Sellers can respond publicly to reviews to address complaints or provide clarifications, improving customer relations and service quality.

Shoplite also assists in filtering and sorting of reviews to the buyers. The reviews can be filtered by the star rating, date, or verified purchase status, and sorted by most helpful, most recent, or highest rating. The system features the top reviews that have images or helpful ones that other users marked. This ensures that relevant and reliable feedback is easily accessible.""", "id": "doc8"},


     {"title": "Shoplite Seller Account Setup", "content": """The user has to create a separate seller account with Shoplite to become a seller. The process begins with providing basic personal information, including full name, email, and contact number, along with business details such as company name, tax ID, and legal registration documents. The users should also include a legitimate bank account in settlement of payment.

Shoplite verifies all submitted documents to ensure compliance with legal and financial regulations. The verification process typically requires 2-3 business days and email notification is sent to the seller concerning any information that is missing or not provided. Once verified, sellers gain access to the seller dashboard, where they can list products, run their inventory, track orders, and access analytics to monitor sales and customer interaction.

The seller dashboard provides a user-friendly interface for bulk product uploads, pricing updates, and promotional campaign management. Sellers can categorize products, set stock levels, and define shipping options for each item. Real-time alerts notify sellers of low stock, pending orders, or customer queries. Shoplite is also compatible with multiple seller accounts in one business entity, allowing teams to operate together and have individual user permissions and access control.

Seller account management is based on security and compliance. All sensitive documents are kept in a safe place, passwords are encrypted, two-factor authentication is encouraged, and passwords are stored in a safe place. Sellers are required to follow Shoplite’s policies regarding prohibited items, product descriptions, and quality standards. Violations may result in temporary suspension or account deactivation, with detailed instructions provided to remedy issues.""", "id": "doc9"},


    {"title": "Shoplite Inventory Management", "content": """Shoplite also offers sellers with a powerful inventory management system that enables them to monitor and control their products effectively. The sellers are able to add the new products one by one or in bulk, change the description of their products, price, stock and assign categories. The products have a description of their SKU, weight, dimensions, shipping choice, photos, and optional promotion tags to increase product visibility. The platform allows product variants that can be in terms of size, color, model among others and this enables sellers to handle the complex inventory with minimal difficulty. Also, sellers are able to arrange products in collections or bundles to maximize sales tactics and generate a more attractive shopping experience to buyers.

The system automatically updates inventory when a purchase is made, ensuring that stock levels remain accurate across the platform. Sellers receive real-time notifications when stock is low or out of stock, allowing them to restock promptly. Inventory reports provide insights into product performance, helping sellers forecast demand and manage supply effectively. For seasonal promotions or limited-time offers, sellers can schedule stock adjustments, pre-order availability, and timed product launches in advance.Bulk import/export is also supported by Shoplite, using a CSV file or Excel file which allows sellers to update large inventory in a short period of time and ensure consistency in multiple listings.

The site provides more sophisticated features such as automated reorder notifications, low-stock insights and past purchasing records to help make purchasing decisions. Sellers can set thresholds for automatic notifications and receive actionable suggestions for inventory replacement based on sales velocity, seasonal trends, and historical demand patterns. Combination with order fulfillment and shipping system will make sure the inventory is updated with the running orders and reduce overselling and mistakes. Compliance with Shoplite policies is enforced at all times, and attempts to list prohibited, duplicate, or miscategorized items are flagged for review. In addition, sellers are able to create inventory reports and analytics dashboards to track tendencies, the performance of best-selling products, and make accurate decisions to maximize profitability.""", "id": "doc10"},


    {"title": "Shoplite Commission and Fees", "content": """The sellers are charged on a commission-based model where the shoplite receives a commission on every completed sale on the platform. The standard commission rate varies by product category, reflecting market norms and operational costs. Sellers can view the applicable rates for each category in their account dashboard, along with historical commission reports for transparency. Once a payment is made, the seller is automatically charged commissions before they can get the rest.

Besides commissions, Shoplite can use transaction fees to process payments. These charges include credit card, mobile wallet and cash-on-delivery processing. The platform will have invoices of every transaction, which display the item price, discounts applied, commission and any extra charges. These invoices can be exported by the sellers to be used to do their accounting and tax reporting.

On Shoplite, sellers can also select the option of premium to market their products, including featured listing, special promo or advertisements in the marketplace. These services cost extra and are billed either monthly or per campaign, depending on the type. Shoplite offers a dashboard displaying the level of these promotions performance in the number of buyers that encountered the product and the number of purchases. This helps sellers to understand whether it is worthwhile to proceed with promotions and make more appropriate marketing and pricing decisions.

Shoplite has strong policies on commissions and fees. Refunds, order cancellations, or disputes may affect the final commission and payout amounts. The system automatically re-calculates totals and informs the sellers about changes through email and dashboard messages. Sellers must ensure their payment details are accurate to receive payouts on schedule.""", "id": "doc11"},


    {"title": "Shoplite Customer Support", "content": """Shoplite offers a variety of support options to enable the users to solve problems easily. Buyers and sellers can contact the support team via email, live chat, or a dedicated support phone line. The site has a comprehensive Help Center that has articles, FAQs, and step-by-step instructions to resolve common problems, as well. Users can submit tickets for specific problems, which are tracked until resolved.

Support requests are categorized by urgency.  High-priority issues, such as payment problems, failed shipments, or account security concerns, are addressed within a few hours. Regular questions, such as product information or general account questions are normally done in 24-48 hours. The ticket status is notified through emails and can be seen in the user dashboard.

Sellers can also benefit by having special assistance in setting up their accounts, inventory, and promotion campaigns and disputes of orders. Technical issues, such as integration with APIs, bulk product uploads, or payment reconciliation errors, are handled by specialized support agents. The sellers may also ask to be trained or tutored on new features of the platform.

To increase support efficiency, Shoplite uses automated tools. Chatbots based on AI can give immediate responses to frequently asked questions and take the user through troubleshooting options. Buyers are notified automatically of the impending support responses, late shipments or account verification. These systems save on response time and keep the users updated.

All support interactions are logged for auditing and quality control purpose. Users can rate support interactions and provide feedback to help improve service quality. Repeat issues are analyzed to identify systemic problems and guide platform updates.""", "id": "doc12"},


        {"title": "Shoplite Mobile App Features", "content": """Shoplite offers a mobile application available for iOS and Android devices, providing users with a seamless shopping and selling experience on the go.  The application replicates the majority of the functionality on the web platform, including product search, shopping cart management, checkout, order tracking, and account management. The user can log in with their email or social media account and turn biometric identification by fingerprint or face recognition to be able to log in quickly and securely.

The mobile app includes push notifications for order updates, promotional offers, and messages from sellers. Users are able to add favorite products, follow sellers and get a personalized list of recommendations based on the browsing history and previous purchases. Its application is designed with a small screen interface, and responsive layouts, user-friendly navigation, and touch controls.

For sellers, the app will offer inventory management tools, sales analytics and have options of order fulfillment. Sellers can add new products, update stock levels, respond to buyer inquiries, and track shipments directly from their mobile device. Alerts notify sellers when stock is low, when new orders are placed, or when a return request is initiated.

Shoplite mobile also integrates with device hardware for enhanced functionality. Users can scan QR codes or barcodes to quickly add products, and the camera can be used to upload product images directly. GPS integration allows for location-based services, such as selecting the nearest delivery options or tracking shipments in real-time.

Security and privacy are implemented all over the app. All data is encrypted with industry-standard protocols and sensitive data such as payment credentials is secured. Users are able to remotely log out and manage the permissions of the device to ensure security of the account. App updates are delivered regularly to improve performance, add features, and address security patches.""", "id": "doc13"},


        {"title": "Shoplite API Documentation", "content": """Shoplite offers a comprehensive API which enables developers to connect their applications, websites or third-party services to the Shoplite platform. The API is RESTful and supports JSON data formats, enabling programmatic access to essential platform features, including product listings, inventory updates, order management, customer information, and promotions.

Developers can use API endpoints to fetch product details, create or update listings, adjust stock levels, and manage pricing. Orders placed through external channels can be synchronized with Shoplite automatically, ensuring accurate inventory and sales reporting. Customer information and purchase history can also be accessed with the API to support a loyalty program and personalized recommendations and targeted marketing campaigns.

The authentication is also managed through API keys which are issued on a per-developer account basis. All requests have to contain the API key as a header and there are also rate limits to prevent abuse. Shoplite gives rich error messages to invalid requests, missing parameters or exceeded limits. Webhooks can be configured to receive real-time notifications for events such as order creation, payment confirmation, or stock changes.

The API documentation contains examples of requests and replies, endpoints, authentication process and guidelines on how to integrate in a secure and efficient way. Developers can test API calls in the sandbox environment before moving to production, minimizing errors and improving reliability. Comprehensive guides and tutorials are available to ensure a smooth onboarding experience.""", "id": "doc14"},


        {"title": "Shoplite Security and Privacy", "content": """Shoplite has strong security and privacy control to ensure that both buyers and sellers are protected. User accounts are protected by secure passwords, optional two-factor authentication (2FA), and session timeouts to prevent unauthorized access.Sensitive information, including payment details and personal data, is encrypted using industry-standard encryption protocols during storage and transmission.

The platform regularly monitors for suspicious activity, such as unusual login attempts, multiple failed password attempts, or potential fraud in orders. Alerts are sent to users and administrators when security anomalies are detected. Shoplite also conducts periodic security audits and updates to ensure that vulnerabilities are addressed promptly.

Privacy policies ensure that personal information is only used for order processing, communication, and improving user experience. The users are allowed to control notification preferences, marketing mail preferences, and data sharing preferences. Shoplite does not sell personal data to third parties and complies with relevant privacy regulations.

For sellers, Shoplite makes sure that financial and inventory information is confidential. Backend analytics dashboards are available to authorized accounts only. The system supports role-based permissions, allowing teams to assign different access levels to employees handling inventory, orders, or customer support.

Shoplite also supports secure APIs for developers who integrate external applications. These APIs have authentication tokens and all the requests are captured to prevent misuse. Automatic alerts are sent whenever there is a suspicious API activity.""", "id": "doc15"},


        {"title": "Shoplite Promo Codes", "content": """Shoplite offers a sales promotion system that gives the sellers and administrators the ability to make discount campaigns to attract and maintain customers. Promotions can take many forms, including percentage discounts, fixed-amount reductions, free shipping, or buy-one-get-one offers. These codes can be applied to specific products, categories, shipping fees, or even entire orders, giving sellers a powerful tool to boost sales and encourage repeat purchases.

The promotional codes also can be designed with elaborate rules and limitations to enable the campaigns to reach the right people. Sellers can set expiration dates for limited-time offers, require minimum or maximum order values, and define usage limits per customer or across the entire campaign. More filters can be applied to target codes to particular audiences, such as new customers, loyal buyers, or even app users in a specific region. These environments help sellers not to be used inappropriately and keep profitable at the same time with providing appealing offers.

For buyers, the process of saving a promo code is straightforward. During checkout, customers simply enter the code in the designated field. Shoplite instantly validates the entry, applies the discount, and updates the order summary in real time. If a code is expired, invalid, or does not meet the stated conditions, the system notifies the buyer clearly, ensuring a smooth checkout experience without confusion.

Sellers benefit from analytics that track how each promotion performs. Dashboards show redemption rates, total sales, and the impact on attracting new buyers and keeping existing ones. These insights enable sellers to improve campaign in future and make smarter marketing choices.

To protect both sellers and buyers, Shoplite has the regulations of compliance concerning promotional campaigns. Fraud detection tools prevent abuse, such as the creation of multiple fake accounts to claim the same discount. Invalid redemptions are blocked automatically, and sellers must follow Shoplite’s promotional guidelines to maintain a fair and trustworthy marketplace.""", "id": "doc16"},
]


In [None]:
PROMPTS = {
    "base_retrieval_prompt": {
        "role": "You are a helpful Shoplite customer service assistant.",
        "goal": "Provide accurate answers using only the provided Shoplite documentation.",
        "context_guidelines": [
            "Use only information from the provided document snippets",
            "Cite specific documents when possible",
            "Keep answers concise, 3-5 sentences max",
            "Provide information in a concise paragraph, not in numbered steps"
        ],
        "response_format": "Answer: [Your response based on context]\nSources: [List document titles referenced]"
    },
    "refusal_prompt": {
        "role": "You are a Shoplite assistant that only answers using official docs.",
        "goal": "Refuse politely if the answer is not in the docs.",
        "context_guidelines": ["Do not guess",
                               "Redirect to customer support if needed"
                              ],
        "response_format": "Answer: Sorry, I could not find this in the Shoplite documentation. Please contact support for more details.\nSources: None"
    },
    "complex_question_prompt": {
        "role": "You are a Shoplite assistant trained to handle complex questions.",
        "goal": "Combine information from multiple Shoplite documents to give a single, well-structured answer.",
        "context_guidelines": [
            "Always synthesize info from at least 2 documents when relevant",
            "Clearly explain how the documents connect",

        ],
        "response_format": "Answer: [Synthesized explanation across docs]\nSources: [List all documents used]"
    },
    "clarification_prompt": {
        "role": "You are a Shoplite assistant that asks for clarification when a question is unclear.",
        "goal": "Make sure you understand the customer before answering.",
        "context_guidelines": ["Point out the unclear part", "Suggest possible meanings"],
        "response_format": "Answer: Could you clarify what you mean by [unclear part]?\nSuggestions: [Possible interpretations]"
    },
    "step_by_step_prompt": {
        "role": "You are a Shoplite assistant that explains answers step by step.",
        "goal": "Provide instructions in a numbered or bulleted list so customers can easily follow.",
        "context_guidelines": ["Break down the answer into clear steps", "Keep steps short and actionable", "Always maintain accuracy from documents"],
        "response_format": "Answer:\n  1. [Step 1 based on documents]\n  2. [Step 2 based on documents]\n  3. [Step 3 based on documents]\nSources: [List documents used]"
    }
}


In [None]:
!pip install -U bitsandbytes
!pip install -U transformers accelerate

In [None]:
# Cell 5: LLM loading and setup
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

# Model name
model_name = "NousResearch/Llama-2-7b-hf"

# 4-bit quantization config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name,token=None)

# Load model with 4-bit quantization
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    token=None
)

# Determine device
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

# Test
prompt = "Hello! How are you?"
inputs = tokenizer(prompt, return_tensors="pt").to(device)
outputs = model.generate(inputs["input_ids"], max_new_tokens=50)
print("\nModel output:")
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


In [None]:
# Cell 6: RAG Pipeline Implementation

from sentence_transformers import SentenceTransformer
import faiss
import torch
import re

# Create embeddings
def create_embeddings(knowledge_base, model_name='all-MiniLM-L6-v2'):
    embedder = SentenceTransformer(model_name)
    docs = [doc["content"] for doc in knowledge_base]
    embeddings = embedder.encode(docs, convert_to_tensor=True)
    return embedder, embeddings

# Build FAISS index
def build_faiss_index(doc_embeddings):
    embedding_dim = doc_embeddings.shape[1]
    index = faiss.IndexFlatL2(embedding_dim)
    index.add(doc_embeddings.cpu().detach().numpy())
    return index

# Retrieve documents
def retrieve_docs(query, knowledge_base, embedder, index, top_k=3):
    query_embedding = embedder.encode([query], convert_to_tensor=True)
    distances, indices = index.search(query_embedding.cpu().detach().numpy(), top_k)

    results = []
    for dist, idx in zip(distances[0], indices[0]):
        results.append({"distance": dist, "doc": knowledge_base[idx]})
    return results

# Generate response
def generate_response(query, knowledge_base, embedder, index, distance_threshold=1.0):
    results = retrieve_docs(query, knowledge_base, embedder, index)

    # If the closest document is too far, return fallback
    if results[0]["distance"] > distance_threshold:
        return {
            "answer": "Sorry, I could not find this in the Shoplite documentation. Please contact support for more details.",
            "sources": "None",
            "confidence": "Low"
        }

    # Only include documents under threshold
    relevant_docs = [r["doc"] for r in results if r["distance"] <= distance_threshold]
    context_text = " | ".join([f"{d['title']}: {d['content']}" for d in relevant_docs])

    # Build prompt
    prompt = (
        f"You are a Shoplite customer service assistant.\n"
        f"Answer the following question using ONLY the context provided.\n"
        f"Write your answer in ONE complete sentence. Do NOT use bullet points or lists.\n"
        f"Do NOT repeat the context.\n"
        f"If it is unrelated or not found, say: "
        f"'Sorry, I could not find this in the Shoplite documentation. Please contact support for more details.'\n\n"
        f"Context: {context_text}\n\n"
        f"Question: {query}\nAnswer:"
    )

    # Tokenize and generate
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    outputs = model.generate(
        inputs["input_ids"],
        max_new_tokens=100,
        do_sample=False,
        temperature=0.0
    )
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Extract only the answer after "Answer:"
    if "Answer:" in generated_text:
        answer = generated_text.split("Answer:", 1)[1].strip()
    else:
        answer = generated_text.strip()

    # Take only the first full sentence
    answer = re.split(r'(?<=[.!?])\s', answer)[0]

    # Build sources from retrieved docs
    sources = ", ".join([d["title"] for d in relevant_docs]) if relevant_docs else "None"

    # Confidence
    top_score = results[0]["distance"]
    if top_score < 1.0:
        confidence = "High"
    elif top_score < 1.5:
        confidence = "Medium"
    else:
        confidence = "Low"

    return {
        "answer": answer,
        "sources": sources,
        "confidence": confidence
    }

# Initialize embeddings and index
embedder, doc_embeddings = create_embeddings(KNOWLEDGE_BASE)
index = build_faiss_index(doc_embeddings)

# Test
queries = [
    "How can I register for an account on Shoplite?",
    "How long does seller verification usually take?",
    "Who is the best player in Brazil?"
]

for query in queries:
    response = generate_response(query, KNOWLEDGE_BASE, embedder, index)
    print(f"Query: {query}\nAssistant response: {response}\n")


In [None]:
# Cell 7: Flask API setup

from flask import Flask, request, jsonify
import torch

app = Flask(__name__)

device = "cuda" if torch.cuda.is_available() else "cpu"

# Health check endpoint
@app.route("/health", methods=["GET"])
def health():
    return jsonify({"status": "ok", "message": "Shoplite RAG system running"}), 200

# Direct LLM interaction (no RAG)
@app.route("/ping", methods=["POST"])
def ping():
    try:
        data = request.get_json()
        query = data.get("query", "")
        if not query:
            return jsonify({"error": "Query is required"}), 400

        # Tokenize the query
        inputs = tokenizer(query, return_tensors="pt").to(device)
        outputs = model.generate(
            inputs["input_ids"],
            max_new_tokens=50,
            do_sample=False,
            temperature=0.0
        )
        answer = tokenizer.decode(outputs[0], skip_special_tokens=True)

        return jsonify({"answer": answer}), 200
    except Exception as e:
        return jsonify({"error": str(e)}), 500

# NEW ENDPOINT FOR WEEK 5 - Simple text generation (no RAG)
@app.route("/generate", methods=["POST"])
def generate():
    """Simple text completion endpoint for Week 5 - no RAG, just LLM generation"""
    try:
        data = request.get_json()
        prompt = data.get("prompt", "")
        max_tokens = data.get("max_tokens", 500)
        
        if not prompt:
            return jsonify({"error": "Prompt is required"}), 400
        
        # Generate response using the LLM
        inputs = tokenizer(prompt, return_tensors="pt").to(device)
        outputs = model.generate(
            inputs["input_ids"],
            max_new_tokens=max_tokens,
            do_sample=True,
            temperature=0.7,
            top_p=0.9
        )
        
        # Extract generated text
        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        # Remove the prompt from the response if it's included
        if generated_text.startswith(prompt):
            response_text = generated_text[len(prompt):].strip()
        else:
            response_text = generated_text.strip()
        
        return jsonify({
            "text": response_text,
            "status": "success"
        }), 200
        
    except Exception as e:
        return jsonify({
            "error": str(e),
            "status": "error"
        }), 500

# Full RAG endpoint (retrieve + generate)
@app.route("/chat", methods=["POST"])
def chat():
    try:
        data = request.get_json()
        query = data.get("query", "")
        if not query:
            return jsonify({"error": "Query is required"}), 400

        response = generate_response(query, KNOWLEDGE_BASE, embedder, index)

        return jsonify({
            "query": query,
            "answer": response["answer"],
            "sources": response["sources"],
            "confidence": response["confidence"]
        }), 200

    except Exception as e:
        return jsonify({"error": str(e)}), 500

# Run Flask app in a background thread
from threading import Thread

def run_app():
    app.run(host="0.0.0.0", port=5000)

Thread(target=run_app, daemon=True).start()

In [None]:
!pip install pyngrok


In [None]:
# Cell 8: ngrok token input and tunnel creation
from pyngrok import ngrok, conf

ngrok_token = input("Enter your ngrok token: ")

# Set ngrok auth token dynamically
conf.get_default().auth_token = ngrok_token

# Open ngrok tunnel for Flask API
public_url = ngrok.connect(addr=5000, proto="http")
print(f"Public URL {public_url}")


In [None]:
#Cell 9: Testing and Validation
import requests

ngrok_url = "ENTER_YOUR_NGROK_URL_HERE"

# Test /chat endpoint
query_text = "Tell me about Shoplite registration process"
chat_resp = requests.post(f"{ngrok_url}/chat", json={"query": query_text})
print(chat_resp.json())

# Test /ping endpoint
ping_resp = requests.post(f"{ngrok_url}/ping", json={"query": "Hello LLM"})
print(ping_resp.json())

# Test /health endpoint
health_resp = requests.get(f"{ngrok_url}/health")
print(health_resp.json())
