In [None]:
# ======= Paste this whole block into Google Colab and run =======
# Colab: https://colab.research.google.com/   -> New notebook -> paste & run

# 0. Install (none required for this script) and import libs
import re
from collections import Counter
from urllib.parse import unquote
import requests
import io
import pandas as pd

# 1. Download the file
url = "https://storage.googleapis.com/kagglesdsdata/datasets/715041/1245709/dialogs.txt?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gcp-kaggle-com%40kaggle-161607.iam.gserviceaccount.com%2F20251206%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20251206T100749Z&X-Goog-Expires=259200&X-Goog-SignedHeaders=host&X-Goog-Signature=289f2491357b1b7cb7e2459fa1b890183bbd9cfddd67b9d3e617eb7b9103d4355c63f0ee13eb24b4f172c804a94f399972af7cff194a61cc6f4711539ed6f21f86ecf995fe2b80cdff35f45da7157e142a530de8ee80ed9c08087389ecdda3419264e0648f9a4ce1948c6def1cca7c39e8854e20bec41cd1a9f4cf993110d3f00ce7a39c27e2c2ded2ea3e98dfe86f89c2bac6cb97529b109fb2a6c4acab29650f59252b3385bfc1d3c45d1f8f015fafef7a7e7299048557efa2de291d97208cabf114b9ba258fc214fc45a90348beed1bf2c53ad856d3814e57691d24bed61f8d4c033e0b2cb5b2567a1ece01f72f19a17642f3741766991f5d521e392f3541"

print("Downloading file (this may take a few seconds)...")
r = requests.get(url)
r.raise_for_status()
raw_text = r.text
print("Downloaded bytes:", len(raw_text))

# 2. Quick peek at the start to understand format
print("\n----- FILE PEEK (first 200 lines) -----")
peek_lines = raw_text.splitlines()[:200]
for i, L in enumerate(peek_lines[:80]):
    print(f"{i+1:03d}: {L[:250]}")
print("----- END PEEK -----\n")

# 3. Split into lines & basic cleaning
lines = [ln.strip() for ln in raw_text.splitlines()]
# remove empty lines but keep blank separators as markers by replacing multiple blanks with single blank
# We'll also keep track of original indices if needed
filtered_lines = []
for ln in lines:
    if ln == "":
        filtered_lines.append("")  # preserve blank separators
    else:
        filtered_lines.append(ln)
print("Total lines (including blanks):", len(filtered_lines))

# 4. Heuristic 1: detect speaker-labeled lines like "Agent: text" or "Customer: text" or "A: text" or "1: text"
speaker_pattern = re.compile(r'^\s*(?P<spk>[A-Za-z0-9_\-]{1,20})\s*[:\-\)]\s*(?P<text>.+)$')
matches = 0
sample_matches = []
for ln in filtered_lines:
    if not ln:
        continue
    m = speaker_pattern.match(ln)
    if m:
        matches += 1
        sample_matches.append((m.group("spk"), m.group("text")))
# show detection result
print("Lines matching 'Speaker: text' pattern:", matches, "out of", sum(1 for l in filtered_lines if l))
if sample_matches[:10]:
    print("Sample speaker detections:", sample_matches[:10])

# 5. Decide parsing strategy
use_speaker_labels = (matches / max(1, sum(1 for l in filtered_lines if l)) ) > 0.20
print("Using speaker-label parsing?" , use_speaker_labels)

# 6. Parser helpers & cleaning functions
def clean_text(s):
    # basic normalizations
    s = s.strip()
    # replace long sequences of whitespace
    s = re.sub(r'\s+', ' ', s)
    # normalize weird / non-printable chars
    s = re.sub(r'[^\x00-\x7F]+',' ', s)
    # normalize order-like numbers (long numbers) to <ORDER_NO> to avoid unique tokens
    s = re.sub(r'\b[0-9]{6,}\b', '<ORDER_NO>', s)
    # replace small token patterns like order#123 or order 12345
    s = re.sub(r'\border\s*#?\s*[0-9]+\b', 'order <ORDER_NO>', s, flags=re.IGNORECASE)
    s = s.strip()
    return s

# 7. Parse dialogues
dialogs = []   # list of dialogues, each is list of (speaker, text)
if use_speaker_labels:
    # group lines into dialogues by blank lines, and parse speaker: text per line
    cur = []
    for ln in filtered_lines:
        if ln == "":
            if cur:
                dialogs.append(cur)
                cur = []
            else:
                # consecutive blank lines -> ignore
                continue
        else:
            m = speaker_pattern.match(ln)
            if m:
                spk = m.group("spk").strip()
                text = m.group("text").strip()
            else:
                # fallback: unknown speaker, mark as "UNK"
                spk = "UNK"
                text = ln
            cur.append((spk, clean_text(text)))
    if cur:
        dialogs.append(cur)
else:
    # No speaker labels -> try to split by double blank lines into dialogues
    raw_dialog_chunks = []
    chunk = []
    for ln in filtered_lines:
        if ln == "":
            if chunk:
                raw_dialog_chunks.append(chunk)
            chunk = []
        else:
            chunk.append(clean_text(ln))
    if chunk:
        raw_dialog_chunks.append(chunk)
    # For each chunk, assume alternating lines: user, bot, user, bot...
    for chunk in raw_dialog_chunks:
        parsed = []
        for i, text in enumerate(chunk):
            # mark speaker by parity: even-index -> USER, odd -> BOT
            spk = "USER" if i % 2 == 0 else "BOT"
            parsed.append((spk, text))
        if parsed:
            dialogs.append(parsed)

print("Number of dialogues parsed:", len(dialogs))

# 8. From each dialogue produce user->bot pairs
pairs = []   # list of dicts: dialog_id, turn_id, user_text, bot_text
drop_short_user_thresh = 3  # min chars for a user message to be useful
pair_count = 0
for d_id, dlg in enumerate(dialogs):
    # iterate through turns and pair USER->BOT where possible
    turn_idx = 0
    i = 0
    while i < len(dlg):
        spk, text = dlg[i]
        if spk.upper().startswith("U"):  # user line
            user_text = text
            # find the next BOT line
            bot_text = ""
            j = i + 1
            while j < len(dlg):
                spk2, text2 = dlg[j]
                if spk2.upper().startswith("B") or spk2.upper().startswith("A") or spk2.upper().startswith("S") or spk2.upper().startswith("AGENT"):
                    bot_text = text2
                    break
                elif spk2.upper().startswith("U"):
                    # next user found - no bot response in between
                    bot_text = ""
                    break
                else:
                    # unknown speaker treat as bot candidate
                    bot_text = text2
                    break
                j += 1
            # fallback: if no bot found and next line exists, attempt to pair with next line
            if bot_text == "" and i+1 < len(dlg):
                bot_text = dlg[i+1][1] if dlg[i+1][0].upper().startswith("B") or dlg[i+1][0].upper().startswith("A") or dlg[i+1][0].upper().startswith("UNK") else ""
            # filter short user_text
            if user_text and len(user_text) >= drop_short_user_thresh:
                pairs.append({
                    "dialog_id": d_id,
                    "turn_id": turn_idx,
                    "user_text": user_text,
                    "bot_text": bot_text
                })
                turn_idx += 1
                pair_count += 1
            # advance to j (response) +1
            i = j + 1 if j >= i+1 else i+1
        else:
            # current line not user -> move next
            i += 1

print("Total user->bot pairs extracted:", pair_count)

# 9. Build user message frequency list
user_texts = [p["user_text"] for p in pairs if p["user_text"]]
freq = Counter(user_texts)
unique_user_messages = [{"user_text": t, "count": c} for t, c in freq.most_common()]

# 10. Convert to DataFrames and save CSVs
pairs_df = pd.DataFrame(pairs)
user_messages_df = pd.DataFrame(unique_user_messages)

# Basic cleaning: drop exact duplicates rows if any
pairs_df = pairs_df.drop_duplicates(subset=["user_text","bot_text"])
print("Pairs after dedup:", len(pairs_df))

# Save to CSV files in Colab workspace
pairs_csv = "user_bot_pairs.csv"
users_csv = "user_messages.csv"
pairs_df.to_csv(pairs_csv, index=False)
user_messages_df.to_csv(users_csv, index=False)

print(f"\nSaved: {pairs_csv} ({len(pairs_df)} rows)")
print(f"Saved: {users_csv} ({len(user_messages_df)} unique user messages)")

# 11. Show top 50 user messages for inspection
print("\n----- Top 50 user messages (most frequent) -----")
display(user_messages_df.head(50))

# 12. Show sample pairs
print("\n----- Sample user->bot pairs (first 30) -----")
display(pairs_df.head(30))

# 13. Quick stats
num_dialogues = len(dialogs)
num_pairs = len(pairs_df)
num_unique_users = len(user_messages_df)
print(f"\nDialogs: {num_dialogues}, Pairs: {num_pairs}, Unique user messages: {num_unique_users}")

# 14. Provide download links in Colab (files saved to Colab root)
print("\nFiles saved to Colab working directory. To download, use the left Files sidebar or run:")
print(f"from google.colab import files\nfiles.download('{pairs_csv}')\nfiles.download('{users_csv}')")

# ====== End of script ======


Downloading file (this may take a few seconds)...
Downloaded bytes: 243904

----- FILE PEEK (first 200 lines) -----
001: hi, how are you doing?	i'm fine. how about yourself?
002: i'm fine. how about yourself?	i'm pretty good. thanks for asking.
003: i'm pretty good. thanks for asking.	no problem. so how have you been?
004: no problem. so how have you been?	i've been great. what about you?
005: i've been great. what about you?	i've been good. i'm in school right now.
006: i've been good. i'm in school right now.	what school do you go to?
007: what school do you go to?	i go to pcc.
008: i go to pcc.	do you like it there?
009: do you like it there?	it's okay. it's a really big campus.
010: it's okay. it's a really big campus.	good luck with school.
011: good luck with school.	thank you very much.
012: how's it going?	i'm doing well. how about you?
013: i'm doing well. how about you?	never better, thanks.
014: never better, thanks.	so how have you been lately?
015: so how have you been lat

Unnamed: 0,user_text,count
0,"hi, how are you doing? i'm fine. how about you...",1
1,i'm pretty good. thanks for asking. no problem...,1
2,i've been great. what about you? i've been goo...,1
3,what school do you go to? i go to pcc.,1
4,do you like it there? it's okay. it's a really...,1
5,good luck with school. thank you very much.,1
6,"i'm doing well. how about you? never better, t...",1
7,so how have you been lately? i've actually bee...,1
8,i'm actually in school right now. which school...,1
9,i'm attending pcc right now. are you enjoying ...,1



----- Sample user->bot pairs (first 30) -----


Unnamed: 0,dialog_id,turn_id,user_text,bot_text
0,0,0,"hi, how are you doing? i'm fine. how about you...",i'm fine. how about yourself? i'm pretty good....
1,0,1,i'm pretty good. thanks for asking. no problem...,no problem. so how have you been? i've been gr...
2,0,2,i've been great. what about you? i've been goo...,i've been good. i'm in school right now. what ...
3,0,3,what school do you go to? i go to pcc.,i go to pcc. do you like it there?
4,0,4,do you like it there? it's okay. it's a really...,it's okay. it's a really big campus. good luck...
5,0,5,good luck with school. thank you very much.,how's it going? i'm doing well. how about you?
6,0,6,"i'm doing well. how about you? never better, t...","never better, thanks. so how have you been lat..."
7,0,7,so how have you been lately? i've actually bee...,i've actually been pretty good. you? i'm actua...
8,0,8,i'm actually in school right now. which school...,which school do you attend? i'm attending pcc ...
9,0,9,i'm attending pcc right now. are you enjoying ...,are you enjoying it there? it's not bad. there...



Dialogs: 1, Pairs: 1863, Unique user messages: 1863

Files saved to Colab working directory. To download, use the left Files sidebar or run:
from google.colab import files
files.download('user_bot_pairs.csv')
files.download('user_messages.csv')


In [None]:
# ===== Run this in Colab to cluster user messages and produce suggested intents =====
# Installs (may take ~1 minute)
!pip install -q sentence-transformers hdbscan umap-learn

# Imports
import pandas as pd
from sentence_transformers import SentenceTransformer
import hdbscan
import umap.umap_ as umap
from collections import defaultdict
import numpy as np
import math
import os

# Load data
users_csv = "user_messages.csv"
pairs_csv = "user_bot_pairs.csv"
assert os.path.exists(users_csv), f"{users_csv} not found. Run parser first."
users_df = pd.read_csv(users_csv)
pairs_df = pd.read_csv(pairs_csv)

# Option: keep only top N frequent messages to speed things up (adjustable)
top_n = 3000   # set to None or large number if you want all
if top_n:
    users_df = users_df.head(top_n)

print("Messages to cluster:", len(users_df))

# Compute embeddings
model_name = "all-MiniLM-L6-v2"   # good balance of speed and quality
model = SentenceTransformer(model_name)
texts = users_df['user_text'].astype(str).tolist()
embeddings = model.encode(texts, show_progress_bar=True, convert_to_numpy=True)

# Optional UMAP dimensionality reduction for HDBSCAN
reducer = umap.UMAP(n_neighbors=15, n_components=64, metric='cosine', random_state=42)
emb_reduced = reducer.fit_transform(embeddings)

# HDBSCAN clustering
clusterer = hdbscan.HDBSCAN(min_cluster_size=15, min_samples=5, metric='euclidean', cluster_selection_method='eom')
cluster_labels = clusterer.fit_predict(emb_reduced)
users_df['cluster'] = cluster_labels

# Overview
num_clusters = len(set(cluster_labels)) - (1 if -1 in cluster_labels else 0)
num_noise = sum(cluster_labels == -1)
print(f"Clusters found (excluding noise): {num_clusters}, noise points: {num_noise}")

# Build cluster summaries
cluster_to_texts = defaultdict(list)
for text, lab in zip(texts, cluster_labels):
    cluster_to_texts[lab].append(text)

def cluster_summary(lab, top_k=10):
    items = cluster_to_texts[lab]
    return items[:top_k]  # top examples

# Create suggested intent names automatically (you should rename after inspection)
suggested = []
for lab in sorted(set(cluster_labels)):
    if lab == -1:
        continue
    examples = cluster_summary(lab, top_k=8)
    # auto-suggest a name based on frequent words in examples
    all_text = " ".join(examples).lower()
    words = [w for w in all_text.split() if len(w) > 3]
    # pick top two words as candidate label
    from collections import Counter
    wc = Counter(words)
    common = [w for w,_ in wc.most_common(5)]
    name = "INTENT_" + str(lab)
    if common:
        # join top two words cleaned
        name = (common[0] + "_" + (common[1] if len(common)>1 else "")).upper()
        name = name.replace(" ", "_")
    suggested.append((lab, name, len(cluster_to_texts[lab]), examples[:8]))

# Make DataFrame for inspection
suggested_df = pd.DataFrame([{"cluster": s[0], "suggested_intent": s[1], "count": s[2], "examples": s[3]} for s in suggested])
suggested_df = suggested_df.sort_values(by='count', ascending=False).reset_index(drop=True)
display(suggested_df.head(30))

# Save examples per cluster to CSV for manual review
out_folder = "intent_suggestions"
os.makedirs(out_folder, exist_ok=True)
rows = []
for lab, name, count, examples in suggested:
    # safe name
    safe_name = name.replace(" ", "_").replace("/", "_")[:70]
    # save examples
    fname = f"{out_folder}/cluster_{lab}_{safe_name}.txt"
    with open(fname, "w", encoding="utf-8") as f:
        f.write("\n".join(cluster_to_texts[lab]))
    # record first 5 as summary
    rows.append({"cluster":lab, "suggested_intent":safe_name, "count":count, "examples": " ||| ".join(examples[:8]), "file":fname})

suggested_df2 = pd.DataFrame(rows)
suggested_df2.to_csv("suggested_intents_overview.csv", index=False)
print("Saved suggested_intents_overview.csv and per-cluster example files under intent_suggestions/")

# Build Dialogflow-style CSV: columns: intent, type, text
# We'll make rows with type=training for each phrase, and optional responses from pairs_df if available
dialogflow_rows = []
cluster_to_intent = {r['cluster']: r['suggested_intent'] for _,r in suggested_df2.iterrows()}

for lab, texts in cluster_to_texts.items():
    if lab == -1:
        continue
    intent_name = cluster_to_intent.get(lab, f"INTENT_{lab}")
    for t in texts:
        dialogflow_rows.append({"intent": intent_name, "type": "training", "text": t})

# Optionally attach one canned response per intent by looking up most common bot reply among pairs where user_text in cluster
pairs_map = {}
for idx, row in pairs_df.iterrows():
    ut = str(row['user_text'])
    bt = str(row.get('bot_text',""))
    pairs_map.setdefault(ut, []).append(bt)

responses_added = []
for lab, texts in cluster_to_texts.items():
    if lab == -1: continue
    intent_name = cluster_to_intent.get(lab, f"INTENT_{lab}")
    # collect candidate bot replies from pairs_map for these texts
    candidate_replies = []
    for t in texts:
        if t in pairs_map:
            candidate_replies.extend(pairs_map[t])
    if candidate_replies:
        # choose most common reply
        from collections import Counter
        reply = Counter(candidate_replies).most_common(1)[0][0]
        dialogflow_rows.append({"intent": intent_name, "type": "response", "text": reply})
        responses_added.append((intent_name, reply))

# save dialogflow CSV
dialogflow_df = pd.DataFrame(dialogflow_rows)
dialogflow_df.to_csv("dialogflow_import.csv", index=False)
print("Saved dialogflow_import.csv (columns: intent,type,text). Rows: ", len(dialogflow_df))
print("Examples of responses added for intents (first 10):")
display(pd.DataFrame(responses_added[:10], columns=["intent","example_response"]).head(10))

# Final message
print("\nDone. Files you can download:")
print("- suggested_intents_overview.csv  (summary of clusters & example file paths)")
print("- intent_suggestions/ (folder with cluster example .txt files)")
print("- dialogflow_import.csv (intent,type,text) — ready for programmatic import or manual paste into Dialogflow.")


In [None]:
# ===== Paste & run this in Colab NOW (assumes previous clustering ran) =====
import os, json, pandas as pd
from collections import Counter

# file paths (from previous steps)
suggested_overview = "suggested_intents_overview.csv"
dialogflow_import = "dialogflow_import.csv"
intent_folder = "intent_suggestions"

# check existence
for f in [suggested_overview, dialogflow_import]:
    if not os.path.exists(f):
        raise FileNotFoundError(f"{f} not found. Make sure you ran the clustering cell earlier.")

# load
sugg = pd.read_csv(suggested_overview)
df = pd.read_csv(dialogflow_import)

# normalize names: uppercase, remove bad chars, keep readable mapping
def clean_intent_name(name):
    name = str(name)
    name = name.strip().upper()
    name = name.replace(" ", "_")
    name = "".join(ch for ch in name if (ch.isalnum() or ch == "_" or ch == "-"))
    # ensure it starts with a letter
    if not name or not name[0].isalpha():
        name = "INTENT_" + name
    return name[:80]

sugg['clean_name'] = sugg['suggested_intent'].apply(clean_intent_name)

# Build cluster -> intent name map
cluster_to_intent = dict(zip(sugg['cluster'].astype(int), sugg['clean_name']))

# Build cluster->texts using files in intent_folder if present, else from dialogflow_import
cluster_texts = {}
# try reading files
for idx,row in sugg.iterrows():
    cluster = int(row['cluster'])
    fname = row.get('file', None)
    texts = []
    if fname and os.path.exists(fname):
        with open(fname, 'r', encoding='utf-8') as f:
            texts = [ln.strip() for ln in f.readlines() if ln.strip()]
    # fallback: gather from dialogflow_import rows that match intent suggestion name
    if not texts:
        cand = df[(df['intent'].astype(str).str.contains(str(row['suggested_intent']), case=False, na=False))]
        texts = cand[cand['type']=='training']['text'].tolist()
    # ultimate fallback: empty list
    cluster_texts[cluster] = texts

# Build mapping: intent -> list of training phrases (max 15) and response (one if available)
dialogflow_ready = []
for cluster, intent_name in cluster_to_intent.items():
    texts = cluster_texts.get(cluster, [])[:500]  # cap
    # choose up to 15 representative phrases:
    # pick top frequent by length heuristic: prefer medium-length phrases
    def score_phrase(t):
        return len(t.split())  # simple: prefer longer than 1-2 words

    # dedupe preserving order
    seen = set(); uniq_texts = []
    for t in texts:
        if t not in seen:
            seen.add(t); uniq_texts.append(t)
    if not uniq_texts:
        continue
    # sort by length descending but keep variety: take the top 30, then choose 15 spread
    sorted_by_len = sorted(uniq_texts, key=lambda x: min(max(score_phrase(x),1),100), reverse=True)
    top_pool = sorted_by_len[:60]
    # pick up to 15: mix by taking every n-th to get variety
    n = max(1, len(top_pool)//15)
    chosen = [top_pool[i] for i in range(0, len(top_pool), n)][:15]
    # find candidate response from dialogflow_import.csv 'response' rows or pairs file
    response_row = df[(df['intent'].str.contains(intent_name, case=False, na=False)) & (df['type']=='response')]
    response = ""
    if not response_row.empty:
        response = str(response_row.iloc[0]['text'])
    else:
        # try reading from pairs: look for most common bot_text in pairs_df where user_text in chosen
        # if pairs csv exists, try to find reply
        if os.path.exists("user_bot_pairs.csv"):
            pairs = pd.read_csv("user_bot_pairs.csv")
            candidate_replies = []
            for ut in chosen:
                candidate_replies += pairs[pairs['user_text']==ut]['bot_text'].astype(str).tolist()
            if candidate_replies:
                response = Counter(candidate_replies).most_common(1)[0][0]
    dialogflow_ready.append({
        "intent": intent_name,
        "training_phrases": chosen,
        "response": response
    })

# Save CSV with JSON-encoded training_phrases for convenience
out_df = pd.DataFrame([{"intent":d['intent'], "training_phrases": json.dumps(d['training_phrases'], ensure_ascii=False), "response": d['response']} for d in dialogflow_ready])
out_df.to_csv("dialogflow_ready_intents.csv", index=False)

# Print them nicely for copy-paste into Dialogflow
print("\n====== DIALOGFLOW-READY INTENTS (copy each block into a Dialogflow intent) ======\n")
for d in dialogflow_ready:
    print("INTENT NAME:", d['intent'])
    print("TRAINING PHRASES (paste each as a separate phrase):")
    for ph in d['training_phrases']:
        print("-", ph)
    print("RESPONSE (copy into Responses):")
    print(d['response'] if d['response'] else "[NO RESPONSE FOUND — write one]")
    print("\n" + ("-"*80) + "\n")

print("Saved dialogflow_ready_intents.csv with", len(dialogflow_ready), "intents. Download from left Files panel.")


In [None]:
!pip install flask ngrok-lite

[31mERROR: Could not find a version that satisfies the requirement ngrok-lite (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for ngrok-lite[0m[31m
[0m

In [None]:
# ===== Paste this whole block into Google Colab and run it once. Keep it running! =====
!pip install -q flask pyngrok nest_asyncio requests

import json
from flask import Flask, request, jsonify
from pyngrok import ngrok
import nest_asyncio
import threading
import requests
import time
import os

# Allow nested event loop in Colab for Flask/ngrok interaction
nest_asyncio.apply()

app = Flask(__name__)

# Dummy DB for demo
DUMMY_ORDERS = {
    "12345678": "Shipped — expected delivery in 2 days",
    "987654": "Delivered on 2025-12-04",
    "555000": "Processing — will ship in 1 business day"
}

# ngrok authtoken setup
# You can get one by signing up at https://dashboard.ngrok.com/signup
# and finding it on https://dashboard.ngrok.com/get-started/your-authtoken
# Make sure to replace "YOUR_NGROK_AUTHTOKEN" with your actual token.
# ngrok.set_auth_token("YOUR_NGROK_AUTHTOKEN") # Uncomment and set your token
# For this example, I'll use the token provided in an earlier turn. If this fails, please check your token.
ngrok.set_auth_token("36SvpRpZ7k9676qC4QtqOqkvBST_5v6VvfkrR4kBRLv74qR2x")

@app.route("/", methods=["GET"])
def home():
    return "Webhook server is running. Use POST /webhook for Dialogflow requests."

@app.route("/webhook", methods=["POST"])
def webhook():
    # Log incoming request for debugging
    print("\n----- INCOMING REQUEST -----")
    print("Headers:")
    for k,v in request.headers.items():
        print(f"{k}: {v}")
    print("\nBody:")
    body = request.get_data(as_text=True)
    print(body[:4000]) # Print up to 4000 characters of the body
    print("----- END REQUEST -----\n")

    req = request.get_json(silent=True, force=True)
    if not req:
        print("[DEBUG] No JSON received in request.")
        return jsonify({"fulfillmentText": "[DEBUG] No JSON received (test your POST body)."}), 400

    # Defensive extraction of Dialogflow data
    query_result = req.get("queryResult", {})
    intent = query_result.get("intent", {}).get("displayName", "<no-intent>")
    params = query_result.get("parameters", {})
    query_text = query_result.get("queryText", "")

    print(f"[DEBUG] Detected intent: {intent} | queryText: {query_text} | params: {params}")

    # Handle ORDER_STATUS intent
    if intent == "ORDER_STATUS":
        order_no = params.get("order_number") or params.get("orderNo") or params.get("order") or ""
        if isinstance(order_no, list) and order_no:
            order_no = order_no[0]
        order_no = str(order_no).strip()

        if not order_no:
            return jsonify({"fulfillmentText": "Please provide your order number (e.g., 12345678)."})
        status = DUMMY_ORDERS.get(order_no)
        if status:
            return jsonify({"fulfillmentText": f"Order {order_no}: {status}."})
        else:
            return jsonify({"fulfillmentText": f"I couldn't find order {order_no}. Please check the number and try again."})

    # Default fallback if not handled by specific intent
    return jsonify({"fulfillmentText": f"[DEBUG] Sorry, I didn't understand that. Received intent: {intent}."})

# Terminate any existing ngrok tunnels before starting a new one
ngrok.kill()

# Start ngrok tunnel on port 5002 (or another free port)
# Use bind_tls=True for HTTPS tunnel, which is required by Dialogflow
public_url = ngrok.connect(5002, bind_tls=True)
print("Public URL:", public_url.public_url)

# Start Flask in a background thread to prevent blocking the Colab cell
def run_app():
    # Note: use host 0.0.0.0 so ngrok can tunnel to it
    app.run(port=5002, host="0.0.0.0")

thread = threading.Thread(target=run_app)
thread.setDaemon(True) # Daemonize thread so it stops when main program exits
thread.start()

# Quick test: request the health path to ensure server is up
time.sleep(1.5) # Give the server a moment to start
try:
    r = requests.get(public_url.public_url)
    print("Health check GET", public_url.public_url, "->", r.status_code)
    print("GET / response preview:", r.text[:200])
except Exception as e:
    print("Health check failed:", e)

print("\nNow point Dialogflow webhook URL to:")
print(f"{public_url.public_url.rstrip("/")}/webhook")
print("\nKeep this Colab cell running while you test. If Colab disconnects or you stop it, the tunnel will stop.")


ERROR:root:Unexpected exception finding object shape
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/google/colab/_debugpy_repr.py", line 54, in get_shape
    shape = getattr(obj, 'shape', None)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/werkzeug/local.py", line 318, in __get__
    obj = instance._get_current_object()
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/werkzeug/local.py", line 519, in _get_current_object
    raise RuntimeError(unbound_message) from None
RuntimeError: Working outside of request context.

This typically means that you attempted to use functionality that needed
an active HTTP request. Consult the documentation on testing for
information about how to avoid this problem.


Public URL: NgrokTunnel: "https://unfeeble-unascendable-kimberly.ngrok-free.dev" -> "http://localhost:5002"
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5002
INFO:werkzeug:[33mPress CTRL+C to quit[0m
ERROR:root:Unexpected exception finding object shape
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/google/colab/_debugpy_repr.py", line 54, in get_shape
    shape = getattr(obj, 'shape', None)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/werkzeug/local.py", line 318, in __get__
    obj = instance._get_current_object()
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/werkzeug/local.py", line 519, in _get_current_object
    raise RuntimeError(unbound_message) from None
RuntimeError: Working outside of request context.

This typically means that you attempted to use functionality that needed
an active HTTP request. Consult the documentation on testing for
information about how to avoid this problem.
ERROR:root:Unexpected exception finding object shape
Traceback (most recent call last):
  File

In [None]:
# Run this first (may take ~30s)
!pip install -q flask pyngrok==5.1.0 nest_asyncio requests


In [None]:
# Paste entire block and run this single cell.
import json
from flask import Flask, request, jsonify
from pyngrok import ngrok
import nest_asyncio
import threading
import requests
import time
import os

# allow nested event loop in Colab
nest_asyncio.apply()

app = Flask(__name__)

# Dummy DB for demo
DUMMY_ORDERS = {
    "12345678": "Shipped — expected delivery in 2 days",
    "987654": "Delivered on 2025-12-04",
    "555000": "Processing — will ship in 1 business day"
}

@app.route("/", methods=["GET"])
def home():
    return "Webhook server is running. Use POST /webhook for Dialogflow requests."

@app.route("/webhook", methods=["POST"])
def webhook():
    # Dialogflow sends a JSON payload. We handle queryResult.intent.displayName and parameters.
    req = request.get_json(silent=True, force=True)
    if not req:
        return jsonify({"fulfillmentText": "No JSON received (test your POST body)."}), 400

    # defensive extraction
    query_result = req.get("queryResult", {})
    intent = query_result.get("intent", {}).get("displayName", "")
    params = query_result.get("parameters", {})

    # Handle ORDER_STATUS intent
    if intent == "ORDER_STATUS":
        order_no = params.get("order_number") or params.get("orderNo") or params.get("order") or ""
        order_no = (order_no[0] if isinstance(order_no, list) and order_no else order_no) or ""
        order_no = str(order_no).strip()
        if not order_no:
            return jsonify({"fulfillmentText": "Please provide your order number (e.g., 12345678)."})
        status = DUMMY_ORDERS.get(order_no)
        if status:
            return jsonify({"fulfillmentText": f"Order {order_no}: {status}"})
        else:
            return jsonify({"fulfillmentText": f"I couldn't find order {order_no}. Please check the number and try again."})

    # Default fallback if not handled
    return jsonify({"fulfillmentText": "Sorry, I didn't understand that. Please ask about order status, returns, or shipping."})

# Start ngrok tunnel
print("Starting ngrok tunnel (this can take a few seconds)...")
public_url = ngrok.connect(5000, bind_tls=True)
print("Public URL:", public_url)

# Start Flask in a background thread (so this cell doesn't block other cells)
def run_app():
    # Note: use host 0.0.0.0 so ngrok can tunnel to it
    app.run(port=5000, host="0.0.0.0")

thread = threading.Thread(target=run_app)
thread.setDaemon(True)
thread.start()

# Quick test: request the health path to ensure server is up
time.sleep(1.5)
try:
    r = requests.get(str(public_url))
    print("Health check GET", public_url, "->", r.status_code)
    print("GET / response preview:", r.text[:200])
except Exception as e:
    print("Health check failed:", e)

print("\nNow point Dialogflow webhook URL to:")
print(f"{public_url}/webhook")
print("\nKeep this Colab cell running while you test. If Colab disconnects or you stop it, the tunnel will stop.")

In [None]:
# Replace with the public URL printed above (include https://...)
PUBLIC_URL = "https://unfeeble-unascendable-kimberly.ngrok-free.dev/"  # e.g. "https://abcd-1234.ngrok.io"

sample_payload = {
  "responseId": "test",
  "queryResult": {
    "queryText": "Where is my order?",
    "parameters": {},
    "intent": {"displayName": "ORDER_STATUS"}
  }
}

url = PUBLIC_URL.rstrip("/") + "/webhook"
print("Testing POST to:", url)
r = requests.post(url, json=sample_payload)
print("Status:", r.status_code)
print("Response json:", r.text)


In [None]:
# Paste into a fresh Colab cell and run it. Keep it running.
!pip install -q flask pyngrok==5.1.0 nest_asyncio requests

import time, threading, json
from flask import Flask, request, jsonify
from pyngrok import ngrok
import nest_asyncio
import requests

nest_asyncio.apply()
app = Flask(__name__)

# Dummy DB for demo (moved from zz6Jzwpz5fl_ to MsLxLa21AB5u)
DUMMY_ORDERS = {
    "12345678": "Shipped — expected delivery in 2 days",
    "987654": "Delivered on 2025-12-04",
    "555000": "Processing — will ship in 1 business day"
}

@app.route("/", methods=["GET"])
def home():
    return "Webhook debug server running. Use POST /webhook for Dialogflow requests."

@app.route("/webhook", methods=["POST"])
def webhook():
    try:
        # Log headers and body to Colab output (visible in cell)
        print("\n----- INCOMING REQUEST -----")
        print("Headers:")
        for k,v in request.headers.items():
            print(f"{k}: {v}")
        print("\nBody:")
        body = request.get_data(as_text=True)
        print(body[:4000])   # show up to 4000 chars
        print("----- END REQUEST -----\n")

        req = request.get_json(silent=True, force=True)
        if not req:
            print("[DEBUG] No JSON received in request.")
            return jsonify({"fulfillmentText": "[DEBUG] No JSON received (test your POST body)."}), 400

        # Extract queryResult, intent, and parameters for Dialogflow webhook handling
        query_result = req.get("queryResult", {})
        intent = query_result.get("intent", {}).get("displayName", "<no-intent>")
        params = query_result.get("parameters", {})
        query_text = query_result.get("queryText", "")

        print(f"[DEBUG] Detected intent: {intent} | queryText: {query_text} | params: {params}")

        # Handle ORDER_STATUS intent (from xvZxlR3PENSs cell, integrated here)
        if intent == "ORDER_STATUS":
            order_no = params.get("order_number") or params.get("orderNo") or params.get("order") or ""
            if isinstance(order_no, list) and order_no:
                order_no = order_no[0]
            order_no = str(order_no).strip()

            if not order_no:
                fulfillment_text = "Please provide your order number (e.g., 12345678)."
            else:
                status = DUMMY_ORDERS.get(order_no)
                if status:
                    fulfillment_text = f"Order {order_no}: {status}."
                else:
                    fulfillment_text = f"I couldn't find order {order_no}. Please check the number and try again."
            return jsonify({"fulfillmentText": fulfillment_text})

        # Default fallback if not handled by specific intent
        return jsonify({"fulfillmentText": f"[DEBUG] Sorry, I didn't understand that. Received intent: {intent}."})

    except Exception as e:
        # Prevent any 500 errors — return 200 and print the exception
        print("Exception in webhook handler:", e)
        return jsonify({"fulfillmentText": f"[DEBUG] Server error (caught): {e}"}), 200

def run():
    app.run(host="0.0.0.0", port=5001)

# start ngrok tunnel (https)
print("Starting ngrok tunnel...")
public_url = ngrok.connect(5001, bind_tls=True)
print("Public URL:", public_url.public_url)
print("Health check:", requests.get(public_url.public_url).status_code, requests.get(public_url.public_url).text[:200])

thread = threading.Thread(target=run, daemon=True)
thread.start()
print("Flask server started; keep this Colab cell running while testing.")
print("Dialogflow webhook URL to use:", public_url.public_url.rstrip("/") + "/webhook")

In [None]:
# inside webhook() after parsing query_result, params
intent = query_result.get("intent", {}).get("displayName", "")
params = query_result.get("parameters", {})

if intent == "ORDER_STATUS":
    order_no = params.get("order_number") or params.get("orderNo") or params.get("order") or ""
    if isinstance(order_no, list) and order_no:
        order_no = order_no[0]
    order_no = str(order_no).strip()

    if not order_no:
        return jsonify({"fulfillmentText": "Please provide your order number (e.g., 12345678)."})
    # lookup in your dummy DB
    status = DUMMY_ORDERS.get(order_no)
    if status:
        return jsonify({"fulfillmentText": f"Order {order_no}: {status}."})
    else:
        return jsonify({"fulfillmentText": f"I couldn't find order {order_no}. Please check the number or type 'agent' to contact support."})
