In [None]:
!fuser -k 8000/tcp || true

In [1]:
%%writefile /content/app.py
import re
import numpy as np
import pandas as pd
from datetime import datetime

from fastapi import FastAPI
from fastapi.responses import HTMLResponse, JSONResponse

DATA_PATH = "/content/riyadh_resturants_clean.csv"

app = FastAPI(title="Riyadh Restaurants Chat")

# Chat history storage (DataFrame)
CHAT_LOG_COLUMNS = [
    "timestamp",
    "question",
    "cuisine",
    "price",
    "location",
    "rating_min",
    "top_k",
    "results_count",
    "results"  # list of dict rows
]
chat_history = pd.DataFrame(columns=CHAT_LOG_COLUMNS)

def save_chat_result(question: str, parsed: dict, results_df: pd.DataFrame | None):
    """Append one interaction row into chat_history DataFrame."""
    global chat_history
    row = {
        "timestamp": datetime.now(),
        "question": question,
        "cuisine": parsed.get("cuisine"),
        "price": parsed.get("price"),
        "location": parsed.get("location"),
        "rating_min": parsed.get("rating_min"),
        "top_k": parsed.get("top_k"),
        "results_count": 0 if results_df is None else int(len(results_df)),
        "results": [] if results_df is None else results_df.fillna("").to_dict(orient="records")
    }
    chat_history = pd.concat([chat_history, pd.DataFrame([row])], ignore_index=True)


# Utils
def normalize_text(x):
    if pd.isna(x):
        return ""
    x = str(x).strip().lower()
    x = re.sub(r"\s+", " ", x)
    return x

def parse_categories(cat_str):
    s = normalize_text(cat_str)
    if not s:
        return "", []
    s = s.replace("[", "").replace("]", "").replace("'", "").replace('"', "")
    parts = [p.strip() for p in re.split(r"[,\|/;•]+", s) if p.strip()]
    uniq, seen = [], set()
    for p in parts:
        if p not in seen:
            seen.add(p)
            uniq.append(p)
    return ", ".join(uniq), uniq

def to_float_safe(x):
    if pd.isna(x):
        return np.nan
    try:
        return float(str(x).strip())
    except:
        return np.nan

def to_int_safe(x):
    if pd.isna(x):
        return np.nan
    try:
        return int(float(str(x).strip()))
    except:
        return np.nan


# Preprocess
def preprocess(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_")
    df = df.dropna(axis=1, how="all")

    for col in ["name", "address", "categories", "category", "cuisine", "city", "area", "price"]:
        if col in df.columns:
            df[col] = df[col].apply(normalize_text)

    if "categories" in df.columns:
        cats = df["categories"].apply(parse_categories)
        df["categories_clean"] = cats.apply(lambda t: t[0])
        df["cuisines_list"] = cats.apply(lambda t: t[1])
    elif "category" in df.columns:
        cats = df["category"].apply(parse_categories)
        df["categories_clean"] = cats.apply(lambda t: t[0])
        df["cuisines_list"] = cats.apply(lambda t: t[1])
    else:
        if "categories_clean" not in df.columns:
            df["categories_clean"] = ""

    for col in ["rating", "latitude", "longitude"]:
        if col in df.columns:
            df[col] = df[col].apply(to_float_safe)

    for col in ["reviews", "review_count", "likes", "photos", "tips"]:
        if col in df.columns:
            df[col] = df[col].apply(to_int_safe)

    if "rating" in df.columns:
        df["rating"] = df["rating"].fillna(df["rating"].mean())

    if "price" in df.columns:
        df["price"] = df["price"].replace("", np.nan).fillna("unknown")

    for col in ["likes", "photos", "tips", "reviews", "review_count"]:
        if col in df.columns:
            df[col] = df[col].fillna(0).astype(int)

    if "name" in df.columns and "address" in df.columns:
        df = df.drop_duplicates(subset=["name", "address"], keep="first")
    elif "name" in df.columns:
        df = df.drop_duplicates(subset=["name"], keep="first")
    else:
        df = df.drop_duplicates(keep="first")

    return df


# Lexicons
def build_lexicons(df):
    lex = {}
    if "address" in df.columns:
        addr = df["address"].fillna("").astype(str).str.lower()
        words = []
        for a in addr:
            words.extend([w for w in re.split(r"[^اأإآء-يa-z0-9]+", a) if len(w) >= 3])
        w_counts = pd.Series(words).value_counts()
        lex["location_terms"] = list(w_counts[w_counts >= 200].index)
    else:
        lex["location_terms"] = []
    return lex


# Query parsing maps
PRICE_MAP = {
    "cheap": ["رخيص", "اقتصادي", "cheap", "low", "budget"],
    "mid": ["متوسط", "moderate", "mid"],
    "expensive": ["غالي", "فاخر", "expensive", "high", "luxury", "fine dining", "راقي", "راقية"]
}
CUISINE_MAP = {
    "لبناني": "lebanese",
    "إيطالي": "italian",
    "ياباني": "japanese",
    "هندي": "indian",
    "صيني": "chinese",
    "تركي": "turkish",
    "أمريكي": "american",
    "مكسيكي": "mexican",
    "كوري": "korean",
    "برجر": "burger",
    "بيتزا": "pizza",
    "كافيه": "coffee",
    "قهوة": "coffee"
}

def parse_query(q: str, lexicons):
    q0 = q
    q = q.strip().lower()

    cuisine = None
    for ar, en in CUISINE_MAP.items():
        if ar in q:
            cuisine = en
            break

    location = None
    for t in lexicons.get("location_terms", []):
        if t and t in q:
            location = t
            break

    price = None
    for bucket, words in PRICE_MAP.items():
        if any(w in q for w in words):
            price = bucket
            break

    rating_min = None
    m = re.search(r"(?:تقييم|rating)?\s*(?:فوق|أعلى من|>=|>)\s*(\d(?:\.\d)?)", q)
    if m:
        rating_min = float(m.group(1))

    top_k = 5
    m = re.search(r"(?:top|أفضل)\s*(\d+)", q)
    if m:
        top_k = int(m.group(1))

    return {
        "raw": q0,
        "cuisine": cuisine,
        "location": location,
        "price": price,
        "rating_min": rating_min,
        "top_k": top_k
    }


# Search
def run_search(df, parsed):
    work = df.copy()

    if parsed["cuisine"] and "categories_clean" in work.columns:
        work = work[work["categories_clean"].str.contains(parsed["cuisine"], na=False)]

    if parsed["location"] and "address" in work.columns:
        work = work[work["address"].str.contains(parsed["location"], na=False)]

    if parsed["price"] and "price" in work.columns:
        p = work["price"].astype(str).str.lower()
        if parsed["price"] == "cheap":
            work = work[p.str.contains(r"(cheap|low|budget|economy|رخيص|اقتصادي)", na=False)]
        elif parsed["price"] == "mid":
            work = work[p.str.contains(r"(moderate|mid|متوسط)", na=False)]
        elif parsed["price"] == "expensive":
            work = work[p.str.contains(r"(expensive|high|luxury|fine dining|فاخر|غالي|راقي|راقية)", na=False)]

    if parsed["rating_min"] is not None and "rating" in work.columns:
        work = work[work["rating"] >= parsed["rating_min"]]

    sort_cols = []
    if "rating" in work.columns: sort_cols.append("rating")
    if "likes" in work.columns: sort_cols.append("likes")
    if sort_cols:
        work = work.sort_values(sort_cols, ascending=False)

    out_cols = [c for c in ["name", "categories_clean", "rating", "price", "address"] if c in work.columns]
    return work[out_cols].head(parsed["top_k"])

def to_cards(results_df):
    if results_df is None or len(results_df) == 0:
        return []
    out = results_df.copy()
    if "rating" in out.columns:
        out["rating"] = out["rating"].astype(float).round(1)
    return out.fillna("").to_dict(orient="records")


# Load data once
raw_df = pd.read_csv(DATA_PATH)
DF = preprocess(raw_df)
LEX = build_lexicons(DF)


# API endpoints
@app.get("/ping")
def ping():
    return {"ok": True, "msg": "pong"}

@app.get("/history")
def get_history():
    # return all history as JSON
    return chat_history.fillna("").to_dict(orient="records")

@app.get("/export")
def export_history():
    path = "/content/chat_history.csv"
    chat_history.to_csv(path, index=False)
    return {"ok": True, "path": path, "rows": int(len(chat_history))}

@app.post("/chat")
def chat(payload: dict):
    msg = (payload.get("message") or "").strip()
    if not msg:
        # store empty question? usually no. just reply.
        return {"ok": True, "reply": "اكتب سؤالك عن مطاعم الرياض ", "results": []}

    parsed = parse_query(msg, LEX)

    # general question -> ask clarification (and log it with no results)
    if not any([parsed["cuisine"], parsed["price"], parsed["location"], parsed["rating_min"]]):
        save_chat_result(msg, parsed, None)
        return {
            "ok": True,
            "reply": (
                "سؤالك عام \n"
                "حدد لي: نوع المطبخ أو السعر أو الحي أو التقييم.\n"
                "مثال: مطاعم لبناني في حي الياسمين تقييم فوق 8"
            ),
            "parsed": parsed,
            "results": []
        }

    # search + log
    results_df = run_search(DF, parsed)
    save_chat_result(msg, parsed, results_df)

    cards = to_cards(results_df)
    if not cards:
        return {"ok": True, "reply": "ما لقيت نتائج مطابقة. جرّب تغير الشروط شوي ", "parsed": parsed, "results": []}

    return {"ok": True, "reply": f"لقيت لك {len(cards)} نتائج ", "parsed": parsed, "results": cards}


# WEB UI
@app.get("/", response_class=HTMLResponse)
def home():
    return """
<!doctype html>
<html lang="ar" dir="rtl">
<head>
<meta charset="utf-8"/>
<meta name="viewport" content="width=device-width,initial-scale=1"/>
<title>مطاعم الرياض</title>
<style>
body{font-family:system-ui,Arial;margin:0;background:#f6f6f6}
.wrap{max-width:980px;margin:0 auto;padding:18px}
.top{display:flex;align-items:center;justify-content:space-between;gap:10px}
h1{margin:0;font-size:22px}
.card{background:#fff;border-radius:14px;padding:16px;box-shadow:0 2px 12px rgba(0,0,0,.06)}
#chat{height:52vh;overflow:auto;background:#fafafa;border:1px solid #eee;border-radius:12px;padding:12px}
.row{display:flex;gap:10px;margin-top:12px}
input{flex:1;padding:12px;border-radius:12px;border:1px solid #ddd;font-size:15px}
button{padding:12px 16px;border-radius:12px;border:1px solid #ddd;background:#111;color:#fff;cursor:pointer}
.msg{display:flex;margin:10px 0}
.me{justify-content:flex-start}
.bot{justify-content:flex-end}
.bubble{max-width:75%;padding:10px 12px;border-radius:14px;line-height:1.6;font-size:14px;white-space:pre-wrap}
.me .bubble{background:#fff;border:1px solid #e6e6e6}
.bot .bubble{background:#e9f2ff;border:1px solid #d7e7ff}
.hint{color:#666;font-size:13px;margin-top:8px}
.grid{display:grid;grid-template-columns:repeat(2,minmax(0,1fr));gap:10px;margin-top:12px}
.rcard{background:#fff;border:1px solid #eee;border-radius:14px;padding:12px}
.rname{font-weight:800;margin:0 0 6px 0}
.meta{color:#444;font-size:13px;margin:0}
.pill{display:inline-block;background:#f2f2f2;border-radius:999px;padding:4px 8px;font-size:12px;margin:6px 6px 0 0}
@media (max-width:700px){.grid{grid-template-columns:1fr}}
</style>
</head>
<body>
<div class="wrap">
  <div class="top">
    <h1> مطاعم الرياض</h1>
    <div class="hint">History: <code>/history</code> • Export: <code>/export</code></div>
  </div>

  <div class="card" style="margin-top:12px">
    <div id="chat"></div>

    <div class="row">
      <input id="msg" placeholder="مثال: مطاعم لبناني في حي الياسمين تقييم فوق 8" />
      <button id="send">إرسال</button>
    </div>

    <div class="hint">
      أمثلة: "مطاعم رخيصة في حي الياسمين" — "أفضل 10 بيتزا تقييم فوق 4.2" — "fine dining"
    </div>

    <div id="results"></div>
  </div>
</div>

<script>
const chat = document.getElementById("chat");
const msg = document.getElementById("msg");
const sendBtn = document.getElementById("send");
const resultsDiv = document.getElementById("results");

function addMessage(text, who){
  const row=document.createElement("div");
  row.className="msg "+(who==="me"?"me":"bot");
  const b=document.createElement("div");
  b.className="bubble";
  b.textContent=text;
  row.appendChild(b);
  chat.appendChild(row);
  chat.scrollTop=chat.scrollHeight;
}

function esc(x){
  return String(x ?? "").replaceAll("&","&amp;").replaceAll("<","&lt;").replaceAll(">","&gt;");
}

function renderCards(items){
  if(!items || items.length===0){ resultsDiv.innerHTML=""; return; }
  let html = `<div class="grid">`;
  for(const r of items){
    html += `
      <div class="rcard">
        <p class="rname">${esc(r.name)}</p>
        ${r.categories_clean ? `<span class="pill">${esc(r.categories_clean)}</span>` : ``}
        ${r.price ? `<span class="pill">السعر: ${esc(r.price)}</span>` : ``}
        ${r.rating ? `<span class="pill">التقييم: ${esc(r.rating)}</span>` : ``}
        ${r.address ? `<p class="meta" style="margin-top:8px"> ${esc(r.address)}</p>` : ``}
      </div>`;
  }
  html += `</div>`;
  resultsDiv.innerHTML = html;
}

async function sendMessage(){
  const text = msg.value.trim();
  if(!text) return;

  addMessage(text,"me");
  msg.value="";
  renderCards([]);

  try{
    const res = await fetch("/chat", {
      method:"POST",
      headers:{ "Content-Type":"application/json" },
      body: JSON.stringify({message:text})
    });

    const raw = await res.text();
    if(!res.ok){
      addMessage("خطأ من السيرفر: " + res.status + "\\n" + raw.slice(0,300), "bot");
      return;
    }

    const data = JSON.parse(raw);
    addMessage(data.reply || "تم", "bot");
    renderCards(data.results || []);

  }catch(e){
    addMessage("تعذر الاتصال بالسيرفر.", "bot");
  }
}

sendBtn.addEventListener("click", sendMessage);
msg.addEventListener("keydown", (e)=>{ if(e.key==="Enter") sendMessage(); });
addMessage("هلا! اكتب سؤالك ","bot");
</script>
</body>
</html>
"""


Overwriting /content/app.py


In [3]:
import os, uvicorn, threading
os.chdir("/content")

def run():
    uvicorn.run("app:app", host="0.0.0.0", port=8000, log_level="info")

threading.Thread(target=run, daemon=True).start()

In [8]:
import requests
requests.get("http://127.0.0.1:8000/export").json()

INFO:     127.0.0.1:41806 - "GET /export HTTP/1.1" 200 OK


{'ok': True, 'path': '/content/chat_history.csv', 'rows': 1}

In [9]:
import pandas as pd
df_log = pd.read_csv("/content/chat_history.csv")
df_log.head()

Unnamed: 0,timestamp,question,cuisine,price,location,rating_min,top_k,results_count,results
0,2025-12-16 22:44:05.608631,افضل مطاعم يابانيه,japanese,,,,5,5,"[{'name': 'nozomi (نوزومي)', 'categories_clean..."
