# Coding Task
First, we need to set up the environment in Colab.

### Installation & Imports Run this cell to install the necessary libraries.

In [1]:
# Install necessary libraries
!pip install pinecone sentence-transformers flask flask-ngrok pytesseract opencv-python tensorflow duckduckgo_search
!pip install flask pyngrok
!pip install -q ddgs
!sudo apt install tesseract-ocr

import pandas as pd
import numpy as np
import re
import os
import cv2
import pytesseract
import requests
import json
from PIL import Image
from io import BytesIO
import threading
from flask import Flask, render_template_string, request
from pyngrok import ngrok
from sentence_transformers import SentenceTransformer
from pinecone import Pinecone, ServerlessSpec
from sklearn.metrics.pairwise import cosine_similarity
from flask import Flask, request, jsonify, render_template_string
from flask_ngrok import run_with_ngrok
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from duckduckgo_search import DDGS
from ddgs import DDGS

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

# Configuration
PINECONE_API_KEY = "YOUR_PINECONE_KEY_HERE"
INDEX_NAME = "ecommerce-product-index"

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
tesseract-ocr is already the newest version (4.1.1-2.1build1).
0 upgraded, 0 newly installed, 0 to remove and 41 not upgraded.


In [2]:
class DataCleaner:
    def __init__(self, filepath):
        self.filepath = "/content/dataset.csv"
        self.df = None

    def load_data(self):
        try:
            self.df = pd.read_csv(self.filepath, encoding='ISO-8859-1')
        except:
            self.df = pd.read_csv(self.filepath, encoding='utf-8')
        print(f"Initial shape: {self.df.shape}")

    def clean(self):
        # 1. Clean StockCode: Remove special chars like 'ö', '^', etc.
        self.df['StockCode'] = self.df['StockCode'].astype(str).apply(lambda x: re.sub(r'[^A-Za-z0-9]', '', x))
        self.df['StockCode'] = self.df['StockCode'].astype(str).apply(lambda x: re.sub(r'[^\d]', '', x))

        # 2. Clean Description: Remove '$', handle NaNs
        self.df = self.df.dropna(subset=['Description'])
        self.df['Description'] = self.df['Description'].astype(str).apply(lambda x: x.replace('$', '').strip())

        # 3. Clean Quantity: Remove '@', convert to int
        self.df['Quantity'] = self.df['Quantity'].astype(str).apply(lambda x: re.sub(r'[^\d-]', '', x))
        self.df['Quantity'] = pd.to_numeric(self.df['Quantity'], errors='coerce').fillna(0).astype(int)

        # 4. Clean UnitPrice
        self.df['UnitPrice'] = self.df['UnitPrice'].astype(str).apply(lambda x: re.sub(r'[^\d\.]', '', x))
        self.df['UnitPrice'] = pd.to_numeric(self.df['UnitPrice'], errors='coerce').fillna(0.0)

        # 5. Clean CustomerID: Remove '&', '#'
        self.df['CustomerID'] = self.df['CustomerID'].astype(str).apply(lambda x: re.sub(r'[^\d\.]', '', x))

        # 6. Clean Country: Remove 'XxY', '☺️'
        self.df['Country'] = self.df['Country'].astype(str).apply(lambda x: re.sub(r'[^a-zA-Z\s]', '', x).replace('XxY', '').strip())

        # 7. # Enforce strictly numbers in InvoiceNo
        self.df['InvoiceNo'] = self.df['InvoiceNo'].astype(str).apply(lambda x: re.sub(r'[^\d]', '', x))

        # 8. Remove Duplicates
        self.df = self.df.drop_duplicates()

        # B. Drop rows containing ANY NaN values
        self.df = self.df.dropna()

        print(f"Cleaned shape: {self.df.shape}")
        return self.df

# Execute Cleaning
cleaner = DataCleaner('dataset.csv')
cleaner.load_data()
df_clean = cleaner.clean()
df_clean.to_csv('dataset_cleaned.csv', index=False)

Initial shape: (541909, 8)
Cleaned shape: (535537, 8)


In [3]:
display(df_clean.tail(5))

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
541904,581587,22613,PACK OF 20 SPACEBOY NAPKINS,12,2011-12-09 12:50:00,0.85,12680.0,France
541905,581587,22899,CHILDREN'S APRON DOLLY GIRL,6,2011-12-09 12:50:00,2.1,12680.0,France
541906,581587,23254,CHILDRENS CUTLERY DOLLY GIRL,4,2011-12-09 12:50:00,4.15,12680.0,France
541907,581587,23255,CHILDRENS CUTLERY CIRCUS PARADE,4,2011-12-09 12:50:00,4.15,12680.0,France
541908,581587,22138,BAKING SET 9 PIECE RETROSPOT,3,2011-12-09 12:50:00,4.95,12680.0,France


## Task 2 & 3: Vector Database & Similarity Metrics
We use Cosine Similarity because it is the standard metric for semantic text similarity, measuring the angle between vectors rather than magnitude.

### Vector Database Manager

In [4]:
class VectorDBManager:
    def __init__(self, api_key, index_name):
        self.pc = Pinecone(api_key=api_key)
        self.index_name = index_name
        self.model = SentenceTransformer('all-MiniLM-L6-v2')
        self.index = None

    def setup_index(self):
        # (Same as your previous code)
        existing_indexes = [i.name for i in self.pc.list_indexes()]
        if self.index_name not in existing_indexes:
            self.pc.create_index(
                name=self.index_name,
                dimension=384,
                metric='cosine',
                spec=ServerlessSpec(cloud='aws', region='us-east-1')
            )
        self.index = self.pc.Index(self.index_name)

    def vectorize_and_upsert(self, df):
        # (Same as your previous code...)
        pass # Keep your existing logic here

    def query_product(self, query_text, top_k=5):
        query_vector = self.model.encode(query_text).tolist()
        # ERROR FIX: You had 'tok_k' instead of 'top_k' below
        result = self.index.query(vector=query_vector, top_k=top_k, include_metadata=True)
        return result

# Execute Vector Setup (Uncomment to run if you have a Key)
vdb = VectorDBManager(PINECONE_API_KEY, INDEX_NAME)
vdb.setup_index()
vdb.vectorize_and_upsert(df_clean)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


## Module 2: OCR and Web Scraping
Task 4: OCR Implementation
We use pytesseract to extract text from images.

### OCR Processor

In [5]:
class OCRProcessor:
    def __init__(self):
        # Tesseract is already installed in the system
        pass

    def extract_text(self, image_path):
        image = cv2.imread(image_path)
        # Preprocessing for better accuracy
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]

        text = pytesseract.image_to_string(gray)
        return text.strip()

# Test Usage
ocr = OCRProcessor()
text = ocr.extract_text('/content/WhatsApp Image 2025-02-20 at 01.47.25.jpeg.jpg')

## Task 5: Web Scraping for CNN Data
We need images for the classes in CNN_Model_Train_Data.csv. We will use duckduckgo_search to find images.

### Image Scraper

In [6]:
import time

class ImageScraper:
    def __init__(self, save_dir='dataset_images'):
        self.save_dir = save_dir
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

    def download_images(self, query, num_images=50):
        print(f"Searching for: {query}")
        folder_path = os.path.join(self.save_dir, query)
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)

        with DDGS() as ddgs:
            results = list(ddgs.images(query, max_results=num_images))

            count = 0
            for res in results:
                try:
                    img_data = requests.get(res['image'], timeout=5).content
                    with open(os.path.join(folder_path, f"{count}.jpg"), 'wb') as f:
                        f.write(img_data)
                    count += 1
                    time.sleep(0.5) # Add a small delay to avoid rate limiting
                except Exception as e:
                    pass
        print(f"Downloaded {count} images for {query}")

# Execute Scraping
cnn_data = pd.read_csv('CNN_Model_Train_Data.csv')
# Clean the stock codes here too using the same logic as before
cnn_data['StockCode'] = cnn_data['StockCode'].astype(str).apply(lambda x: re.sub(r'[^A-Za-z0-9]', '', x))

scraper = ImageScraper()

# We need descriptions to search, let's map StockCodes to Descriptions from our clean dataset
code_to_desc = df_clean.set_index('StockCode')['Description'].to_dict()

for code in cnn_data['StockCode'].unique():
    description = code_to_desc.get(code)
    if description:
        scraper.download_images(description, num_images=40) # 40 images per class for training


Searching for: LUNCH BAG PINK POLKADOT
Downloaded 40 images for LUNCH BAG PINK POLKADOT
Searching for: ALARM CLOCK BAKELIKE RED
Downloaded 40 images for ALARM CLOCK BAKELIKE RED
Searching for: CHOCOLATE HOT WATER BOTTLE
Downloaded 40 images for CHOCOLATE HOT WATER BOTTLE
Searching for: SPOTTY BUNTING
Downloaded 40 images for SPOTTY BUNTING
Searching for: LUNCH BAG WOODLAND
Downloaded 39 images for LUNCH BAG WOODLAND
Searching for: REX CASH+CARRY JUMBO SHOPPER
Downloaded 39 images for REX CASH+CARRY JUMBO SHOPPER
Searching for: JUMBO STORAGE BAG SUKI
Downloaded 40 images for JUMBO STORAGE BAG SUKI
Searching for: RETROSPOT TEA SET CERAMIC 11 PC
Downloaded 40 images for RETROSPOT TEA SET CERAMIC 11 PC
Searching for: 6 RIBBONS RUSTIC CHARM
Downloaded 40 images for 6 RIBBONS RUSTIC CHARM
Searching for: REGENCY CAKESTAND 3 TIER
Downloaded 40 images for REGENCY CAKESTAND 3 TIER


In [7]:
import os
from PIL import Image

def cleanup_corrupt_images(directory):
    print(f"Scanning '{directory}' for corrupt images...")
    deleted_count = 0

    for root, dirs, files in os.walk(directory):
        for file in files:
            file_path = os.path.join(root, file)
            try:
                # Try to open and verify the image
                with Image.open(file_path) as img:
                    img.verify() # This checks if the file is broken without loading it fully
            except (IOError, SyntaxError, Image.UnidentifiedImageError) as e:
                # If it fails, print and delete the file
                print(f"Removing corrupt file: {file_path}")
                os.remove(file_path)
                deleted_count += 1

    print(f"Cleanup complete. Removed {deleted_count} corrupt files.")

# RUN THIS before your training code
cleanup_corrupt_images('dataset_images')

Scanning 'dataset_images' for corrupt images...
Removing corrupt file: dataset_images/ALARM CLOCK BAKELIKE RED/19.jpg
Removing corrupt file: dataset_images/ALARM CLOCK BAKELIKE RED/28.jpg
Removing corrupt file: dataset_images/ALARM CLOCK BAKELIKE RED/18.jpg
Removing corrupt file: dataset_images/ALARM CLOCK BAKELIKE RED/24.jpg
Removing corrupt file: dataset_images/ALARM CLOCK BAKELIKE RED/33.jpg
Removing corrupt file: dataset_images/ALARM CLOCK BAKELIKE RED/29.jpg
Removing corrupt file: dataset_images/ALARM CLOCK BAKELIKE RED/31.jpg
Removing corrupt file: dataset_images/ALARM CLOCK BAKELIKE RED/20.jpg
Removing corrupt file: dataset_images/RETROSPOT TEA SET CERAMIC 11 PC/22.jpg
Removing corrupt file: dataset_images/RETROSPOT TEA SET CERAMIC 11 PC/10.jpg
Removing corrupt file: dataset_images/REGENCY CAKESTAND 3 TIER/38.jpg
Removing corrupt file: dataset_images/REGENCY CAKESTAND 3 TIER/33.jpg
Removing corrupt file: dataset_images/REGENCY CAKESTAND 3 TIER/11.jpg
Removing corrupt file: datas

## Module 3: CNN Model Development
Task 6: CNN Training
We will build a simple CNN to classify products into the 10 classes found in CNN_Model_Train_Data.

### CNN Model Trainer

In [10]:
class CNNModelTrainer:
    def __init__(self, data_dir='dataset_images'):
        self.data_dir = data_dir
        self.img_height = 128
        self.img_width = 128
        self.batch_size = 32
        self.model = None
        self.class_names = None

    def prepare_data(self):
        # Use ImageDataGenerator for augmentation and loading
        train_datagen = ImageDataGenerator(
            rescale=1./255,
            validation_split=0.2,
            rotation_range=20,
            zoom_range=0.2,
            horizontal_flip=True
        )

        self.train_generator = train_datagen.flow_from_directory(
            self.data_dir,
            target_size=(self.img_height, self.img_width),
            batch_size=self.batch_size,
            class_mode='categorical',
            subset='training'
        )

        self.validation_generator = train_datagen.flow_from_directory(
            self.data_dir,
            target_size=(self.img_height, self.img_width),
            batch_size=self.batch_size,
            class_mode='categorical',
            subset='validation'
        )
        self.class_names = list(self.train_generator.class_indices.keys())

    def build_model(self):
        num_classes = len(self.class_names)
        self.model = Sequential([
            Conv2D(32, (3, 3), activation='relu', input_shape=(self.img_height, self.img_width, 3)),
            MaxPooling2D(2, 2),
            Conv2D(64, (3, 3), activation='relu'),
            MaxPooling2D(2, 2),
            Conv2D(128, (3, 3), activation='relu'),
            MaxPooling2D(2, 2),
            Flatten(),
            Dense(512, activation='relu'),
            Dropout(0.5),
            Dense(num_classes, activation='softmax')
        ])

        self.model.compile(optimizer='adam',
                           loss='categorical_crossentropy',
                           metrics=['accuracy'])

    def train(self, epochs=50):
        self.model.fit(
            self.train_generator,
            validation_data=self.validation_generator,
            epochs=epochs
        )
        self.model.save('product_cnn_model.h5')
        print("Model saved as product_cnn_model.h5")

# Execute Training (Only if images are downloaded)
trainer = CNNModelTrainer()
trainer.prepare_data()
trainer.build_model()
trainer.train(epochs=50)

Found 304 images belonging to 10 classes.
Found 70 images belonging to 10 classes.
Epoch 1/50
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 2s/step - accuracy: 0.0951 - loss: 2.7909 - val_accuracy: 0.1143 - val_loss: 2.2964
Epoch 2/50
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 2s/step - accuracy: 0.0944 - loss: 2.3152 - val_accuracy: 0.1000 - val_loss: 2.2806
Epoch 3/50
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 2s/step - accuracy: 0.1668 - loss: 2.2638 - val_accuracy: 0.1714 - val_loss: 2.2254
Epoch 4/50
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 2s/step - accuracy: 0.2624 - loss: 2.2111 - val_accuracy: 0.1714 - val_loss: 2.1662
Epoch 5/50
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 2s/step - accuracy: 0.2015 - loss: 2.1219 - val_accuracy: 0.3429 - val_loss: 2.0744
Epoch 6/50
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 2s/step - accuracy: 0.2834 - loss: 2.0891



Model saved as product_cnn_model.h5


## Module 4: Frontend Development and Integration
Flask Application
This handles all endpoints (/, /query, /upload_query, /detect_product).

### Flask App

In [None]:
from tensorflow.keras.preprocessing import image as keras_image
import numpy as np
import tensorflow as tf
import os
from flask import Flask, request, render_template_string
from pyngrok import ngrok

# --- 1. DEFINE HELPER FUNCTIONS (RETURNING STRUCTURED DATA) ---

def recommend_products(query_text):
    """Searches Pinecone, looks up details in df_clean, and returns structured data."""
    if not query_text:
        return "No query provided.", []

    try:
        # Query Pinecone
        results = vdb.query_product(query_text, top_k=5)
    except NameError:
        return "Error: Database (vdb) not loaded.", []
    except Exception as e:
        return f"Error querying database: {str(e)}", []

    recommendations = []

    # Handle Pinecone Response Object (v2 vs v3 compatibility)
    matches = []
    if hasattr(results, 'matches'):
        matches = results.matches
    elif isinstance(results, dict) and 'matches' in results:
        matches = results['matches']

    for match in matches:
        # 1. Get Metadata from Pinecone match
        if isinstance(match, dict):
            metadata = match.get('metadata', {})
            score = match.get('score', 0)
        else:
            metadata = getattr(match, 'metadata', {})
            score = getattr(match, 'score', 0)

        # 2. Extract Description
        description = "Unknown Product"
        if isinstance(metadata, dict):
            description = metadata.get('description', 'Unknown Product')
        elif hasattr(metadata, 'get'):
             description = metadata.get('description', 'Unknown Product')

        # 3. Look up Price and StockCode from the global df_clean DataFrame
        # We filter the dataframe to find the matching description to get the price
        price = "N/A"
        stock_code = "N/A"

        try:
            # Find the row in the clean dataframe that matches this description
            product_row = df_clean[df_clean['Description'] == description]
            if not product_row.empty:
                # Take the first match
                price = product_row.iloc[0]['UnitPrice']
                stock_code = product_row.iloc[0]['StockCode']
        except Exception:
            pass # Keep defaults if lookup fails

        # 4. Append structured dictionary instead of a string
        recommendations.append({
            'description': description,
            'price': price,
            'stock_code': stock_code,
            'score': round(score, 2)
        })

    response_text = f"Found {len(recommendations)} matches for '{query_text}'."
    return response_text, recommendations

def process_ocr_query(image_path):
    """Extracts text from image, then searches Pinecone."""
    try:
        extracted_text = ocr.extract_text(image_path)
    except NameError:
        return "Error: OCR processor not loaded.", [], ""

    if not extracted_text:
        return "No text detected in image.", [], ""

    # Use the text we found to search products
    response_text, recommendations = recommend_products(extracted_text)
    return response_text, recommendations, extracted_text

def predict_product_from_image(image_path):
    """Uses CNN to classify image, then searches Pinecone for that class."""
    try:
        # Load and preprocess image for the model (128x128)
        img = keras_image.load_img(image_path, target_size=(128, 128))
        img_array = keras_image.img_to_array(img)
        img_array = np.expand_dims(img_array, axis=0)
        img_array /= 255.0

        # Predict using your trainer model
        predictions = trainer.model.predict(img_array)
        class_idx = np.argmax(predictions[0])
        predicted_class = trainer.class_names[class_idx]

        # Search for products matching the predicted class
        response_text, recommendations = recommend_products(predicted_class)

        return predicted_class, response_text, recommendations
    except Exception as e:
        return f"Error: {str(e)}", "Could not process image", []

# --- 2. RESET THE FLASK APP ---
app = Flask(__name__)

# Ngrok Setup
# Note: Ensure your Authtoken is valid.
NGROK_AUTH_TOKEN = "YOUR_PINECONE_KEY_HERE"
ngrok.set_auth_token(NGROK_AUTH_TOKEN)
# Kill previous tunnels to avoid conflicts
ngrok.kill()
public_url = ngrok.connect(5000).public_url
print(f" * Public URL: {public_url}")

# --- 3. UPDATED HTML TEMPLATE (WITH TABLE) ---
html_template = """
<!DOCTYPE html>
<html>
<head>
    <title>AI E-commerce Assistant</title>
    <style>
        body { font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; padding: 20px; background-color: #f4f4f9; }
        .container { max-width: 900px; margin: auto; background: white; padding: 30px; border-radius: 10px; box-shadow: 0 4px 8px rgba(0,0,0,0.1); }
        h1 { color: #333; text-align: center; }
        nav { text-align: center; margin-bottom: 20px; }
        nav a { margin: 0 15px; text-decoration: none; color: #007bff; font-weight: bold; font-size: 1.1em; }
        nav a:hover { text-decoration: underline; }
        .result { background: #e9ecef; padding: 20px; margin-top: 20px; border-radius: 8px; }

        /* Table Styles */
        table { width: 100%; border-collapse: collapse; margin-top: 15px; background: white; }
        th, td { padding: 12px; text-align: left; border-bottom: 1px solid #ddd; }
        th { background-color: #007bff; color: white; }
        tr:hover { background-color: #f1f1f1; }

        form { margin-top: 20px; text-align: center; }
        input[type="text"] { padding: 10px; width: 60%; border: 1px solid #ddd; border-radius: 4px; }
        button { padding: 10px 20px; background-color: #28a745; color: white; border: none; border-radius: 4px; cursor: pointer; }
        button:hover { background-color: #218838; }
    </style>
</head>
<body>
<div class="container">
    <h1>AI Product Assistant</h1>
    <nav>
        <a href="/">Text Search</a> |
        <a href="/ocr">Handwritten Search</a> |
        <a href="/vision">Image Detection</a>
    </nav>
    <hr>

    {% if page == 'text' %}
    <h2 style="text-align:center;">Find Products by Description</h2>
    <form method="post">
        <input type="text" name="query" placeholder="E.g., 'White metal lantern'..." required>
        <button type="submit">Search</button>
    </form>
    {% elif page == 'ocr' %}
    <h2 style="text-align:center;">Upload Handwritten Note</h2>
    <form method="post" enctype="multipart/form-data">
        <input type="file" name="file" required>
        <button type="submit">Upload & Read</button>
    </form>
    {% elif page == 'vision' %}
    <h2 style="text-align:center;">Upload Product Image</h2>
    <form method="post" enctype="multipart/form-data">
        <input type="file" name="file" required>
        <button type="submit">Identify Product</button>
    </form>
    {% endif %}

    {% if result %}
    <div class="result">
        <h3>Analysis Results:</h3>
        <p><strong>System Response:</strong> {{ response_text }}</p>
        {% if extracted %} <p><strong>Extracted Text:</strong> <em>"{{ extracted }}"</em></p> {% endif %}
        {% if predicted_class %} <p><strong>Detected Category:</strong> <span style="color:green; font-weight:bold;">{{ predicted_class }}</span></p> {% endif %}

        <h4>Recommended Products:</h4>
        {% if recommendations %}
        <table>
            <thead>
                <tr>
                    <th style="width: 50%;">Product Name</th>
                    <th>Stock Code</th>
                    <th>Price ($)</th>
                    <th>Relevance Score</th>
                </tr>
            </thead>
            <tbody>
                {% for item in recommendations %}
                <tr>
                    <td>{{ item.description }}</td>
                    <td>{{ item.stock_code }}</td>
                    <td>{{ item.price }}</td>
                    <td>{{ item.score }}</td>
                </tr>
                {% endfor %}
            </tbody>
        </table>
        {% else %}
        <p>No products found matching your query.</p>
        {% endif %}
        </div>
    {% endif %}
</div>
</body>
</html>
"""

# --- 4. ROUTES ---
@app.route('/', methods=['GET', 'POST'])
def text_query():
    result = None
    response_text = ""
    recommendations = []

    if request.method == 'POST':
        query = request.form.get('query', '')
        response_text, recommendations = recommend_products(query)
        result = True

    return render_template_string(html_template, page='text', result=result, response_text=response_text, recommendations=recommendations)

@app.route('/ocr', methods=['GET', 'POST'])
def ocr_query():
    result = None
    response_text = ""
    recommendations = []
    extracted = ""

    if request.method == 'POST':
        if 'file' not in request.files:
            return "No file uploaded"
        file = request.files['file']
        if file.filename != '':
            filepath = os.path.join('static', file.filename)
            os.makedirs('static', exist_ok=True)
            file.save(filepath)

            response_text, recommendations, extracted = process_ocr_query(filepath)
            result = True

    return render_template_string(html_template, page='ocr', result=result, response_text=response_text, recommendations=recommendations, extracted=extracted)

@app.route('/vision', methods=['GET', 'POST'])
def vision_query():
    result = None
    response_text = ""
    recommendations = []
    predicted_class = ""

    if request.method == 'POST':
        if 'file' not in request.files:
            return "No file uploaded"
        file = request.files['file']
        if file.filename != '':
            filepath = os.path.join('static', file.filename)
            os.makedirs('static', exist_ok=True)
            file.save(filepath)

            predicted_class, response_text, recommendations = predict_product_from_image(filepath)
            result = True

    return render_template_string(html_template, page='vision', result=result, response_text=response_text, recommendations=recommendations, predicted_class=predicted_class)

# --- 5. RUN ---
if __name__ == '__main__':
    app.run(port=5000)

 * Public URL: https://fitful-anapaestically-jarvis.ngrok-free.dev
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [05/Dec/2025 17:23:28] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [05/Dec/2025 17:23:35] "GET /vision HTTP/1.1" 200 -


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 136ms/step


INFO:werkzeug:127.0.0.1 - - [05/Dec/2025 17:23:57] "POST /vision HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [05/Dec/2025 17:45:53] "GET / HTTP/1.1" 200 -
