In [1]:
!pip install flask-cors
!pip install pyngrok
!pip install gdown




In [11]:
import os
import pandas as pd
from flask import Flask, request, jsonify
from flask_cors import CORS
import google.generativeai as genai
from collections import defaultdict
from google.colab import drive
from google.colab import userdata

# Initialize Flask app
app = Flask(__name__)
CORS(app, resources={r"/ask": {"origins": "http://127.0.0.1:5500"}})

# Mount Google Drive
drive.mount('/content/drive')

# Dataset folder path in Google Drive
DATASET_FOLDER = '/content/drive/MyDrive/dataset'

# API Keys from Colab
GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')
NGROK_AUTH_TOKEN = userdata.get('NGROK_AUTH_TOKEN')

if not GOOGLE_API_KEY:
    raise RuntimeError("Error: Please set the 'GOOGLE_API_KEY' secret in Colab.")

# Configure Gemini model
genai.configure(api_key=GOOGLE_API_KEY)
model = genai.GenerativeModel(model_name='gemini-2.0-flash')

# Summarize all CSV datasets
def summarize_datasets():
    dataset_summaries = []

    for file_name in os.listdir(DATASET_FOLDER):
        if file_name.endswith('.csv'):
            file_path = os.path.join(DATASET_FOLDER, file_name)
            try:

                df = pd.read_csv(file_path)

                # Default value
                top_item_id = "N/A"

                # Get top-selling item_id by frequency
                if 'item_id' in df.columns:
                    try:
                        df = df.dropna(subset=['item_id'])
                        top_item_id = df['item_id'].value_counts().idxmax()
                    except Exception as e:
                        print(f"Error identifying top item_id in {file_name}: {e}")

                summary = {
                    "file": file_name,
                    "total_rows": df.shape[0],
                    "columns": df.columns.tolist(),
                    "data_types": df.dtypes.astype(str).to_dict(),
                    "missing_values": df.isnull().sum().to_dict(),
                    "top_selling_item": top_item_id  # this is now based on item_id frequency
                }

                dataset_summaries.append(summary)

            except Exception as e:
                print(f"Error reading file {file_name}: {e}")

    return dataset_summaries

# Chat endpoint
@app.route('/ask', methods=['POST', 'OPTIONS'])
def ask():
    if request.method == 'OPTIONS':
        return jsonify({'message': 'CORS preflight successful'}), 200

    user_query = request.json.get('query', '')
    if not user_query:
        return jsonify({'error': 'Query is required'}), 400

    try:
        dataset_summaries = summarize_datasets()

        corpus_prompt = f"""
        You are an AI chatbot assistant helping a restaurant merchant analyze their sales data and provide insights and recommendations.

        Here are summaries and pre-analyzed insights from the merchant’s datasets:
        {dataset_summaries}

        The merchant asked: "{user_query}"

        Respond directly with a confident answer.
        Do not explain your process or how you got the answer.
        Do not mention file names, column names, or any backend information.
        Use any included insights like 'top_selling_item' to answer clearly.
        No introductions, no explanations, just give the answer.

        Respond in a clear, helpful, and concise way. Get straight to the point and avoid unnecessary explanations.
        Do not show any calculations to the merchant.

        Focus on answering the question directly.
        Do not mention any backend processes, column names, files names, or code logic.
        Keep the response informative but minimal.

        Give a direct, confident answer — no introductions, no step-by-step explanations, no backend references such as files, column names, variables, calculations, or methods.
        Do not explain how you are calculating or getting the data.
        """

        response = model.generate_content(corpus_prompt, generation_config={"temperature": 0.2})
        return jsonify({'response': response.text}), 200

    except Exception as e:
        print(f"Error processing query: {e}")
        return jsonify({'error': str(e)}), 500

# Start Flask app with ngrok
if __name__ == '__main__':
    from pyngrok import ngrok
    if NGROK_AUTH_TOKEN:
        ngrok.set_auth_token(NGROK_AUTH_TOKEN)
        try:
            public_url = ngrok.connect(5000)
            print(f"Flask app is live at {public_url}")
            app.run(host='0.0.0.0', port=5000, debug=False)
        except Exception as e:
            print(f"Error starting ngrok: {e}")
    else:
        print("Missing ngrok token.")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Flask app is live at NgrokTunnel: "https://9122-34-171-197-231.ngrok-free.app" -> "http://localhost:5000"
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://172.28.0.12:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [12/Apr/2025 04:42:21] "OPTIONS /ask HTTP/1.1" 200 -


Processing merchant.csv with pandas...
Processing keywords.csv with pandas...
Processing items.csv with pandas...
Processing transaction_items.csv with pandas...
Processing transaction_data.csv with pandas...


INFO:werkzeug:127.0.0.1 - - [12/Apr/2025 04:42:31] "POST /ask HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [12/Apr/2025 04:42:39] "OPTIONS /ask HTTP/1.1" 200 -


Processing merchant.csv with pandas...
Processing keywords.csv with pandas...
Processing items.csv with pandas...
Processing transaction_items.csv with pandas...
Processing transaction_data.csv with pandas...


INFO:werkzeug:127.0.0.1 - - [12/Apr/2025 04:42:49] "POST /ask HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [12/Apr/2025 04:43:45] "OPTIONS /ask HTTP/1.1" 200 -


Processing merchant.csv with pandas...
Processing keywords.csv with pandas...
Processing items.csv with pandas...
Processing transaction_items.csv with pandas...
Processing transaction_data.csv with pandas...


INFO:werkzeug:127.0.0.1 - - [12/Apr/2025 04:43:56] "POST /ask HTTP/1.1" 200 -
