<h1> Step 0: Installs (Colab/Jupyter) </h1>

In [1]:
!pip -q install "transformers>=4.35" "accelerate>=0.21" gradio pandas

<h1> Step 1: Imports & runtime setup </h1>

In [2]:
import os, re, json, traceback, torch, pandas as pd
import gradio as gr
from typing import Dict, List, Tuple, Optional

from transformers import AutoTokenizer, AutoModelForSequenceClassification

# Optional: silence external telemetry/wandb
os.environ["WANDB_MODE"] = "disabled"
os.environ["WANDB_DISABLED"] = "true"
os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

<h1> Step 2: (Colab) Mount Google Drive </h1>

In [3]:
try:
    from google.colab import drive
    drive.mount('/content/drive', force_remount=True)
except Exception:
    # Not in Colab; ignore.
    pass

Mounted at /content/drive


<h1> Step 3: Constants (paths, label sets, aspects) </h3>

In [4]:
# IMDb (binary) best models you trained earlier (EDIT if your paths differ):
IMDB_MODELS: Dict[str, str] = {
    "roberta-base":            "/content/drive/MyDrive/bert_imdb_sentiment_analysis/models/roberta-base/best_model",
    "bert-base-uncased":       "/content/drive/MyDrive/bert_imdb_sentiment_analysis/models/bert-base-uncased/best_model",
    "distilbert-base-uncased": "/content/drive/MyDrive/bert_imdb_sentiment_analysis/models/distilbert-base-uncased/best_model",
}

# ABSA best models (produced by your ABSA training notebook):
ABSA_ROOT = "/content/drive/MyDrive/ABSA"
TRACKS = ["TrackA", "TrackB"]

# Sentihood aspects & sentiments (4-way head expected for ABSA)
ASPECTS = [
    "dining","general","green-nature","live","multicultural","nightlife",
    "price","quiet","safety","shopping","touristy","transit-location"
]
SENTIMENTS = ["negative", "neutral", "positive", "none"]
SENT2ID = {s:i for i,s in enumerate(SENTIMENTS)}
ID2SENT = {i:s for i,s in enumerate(SENTIMENTS)}

# Regex to extract Sentihood-style LOCATION targets (kept for backwards-compat)
TARGET_RX = re.compile(r"LOCATION\d+", re.IGNORECASE)
def extract_targets(text: str) -> List[str]:
    ts = sorted(set(TARGET_RX.findall(text or "")))
    # NOTE: If you do not want any fallback, return [] here instead of ["LOCATION1"].
    return ts if ts else ["LOCATION1"]

<h1> Step 4: Utilities: validate & discover checkpoints </h1>

In [5]:
WEIGHTS = {"model.safetensors", "pytorch_model.bin", "tf_model.h5", "flax_model.msgpack"}

def _has_weights_dir(p: Optional[str]) -> bool:
    if not p or not os.path.isdir(p): return False
    try:
        fs = set(os.listdir(p))
        return bool(WEIGHTS & fs) and ("config.json" in fs)
    except Exception:
        return False

def discover_imdb_models() -> Dict[str, str]:
    out = {}
    for k, p in IMDB_MODELS.items():
        if _has_weights_dir(p):
            out[k] = p
    return out

def discover_absa_models() -> Dict[str, Dict[str, str]]:
    pretty = {
        "roberta-base": "RoBERTa Base",
        "bert-base-uncased": "BERT Base Uncased",
        "distilbert-base-uncased": "DistilBERT Base Uncased"
    }
    found: Dict[str, Dict[str, str]] = {}
    for t in TRACKS:
        t_dir = os.path.join(ABSA_ROOT, t)
        if not os.path.isdir(t_dir):
            continue
        per_track = {}
        for sub in sorted(os.listdir(t_dir)):
            best_dir = os.path.join(t_dir, sub, "best")
            if _has_weights_dir(best_dir):
                per_track[pretty.get(sub, sub)] = best_dir
        if per_track:
            found[t] = per_track
    return found

IMDB_FOUND = discover_imdb_models()              # { "roberta-base": "/path/..." , ... }
ABSA_FOUND = discover_absa_models()              # { "TrackA": {"RoBERTa Base": "/.../best", ...}, "TrackB": {...} }

# Display names for IMDb dropdown (hide raw paths)
PRETTY_MAP = {
    "roberta-base": "RoBERTa Base",
    "bert-base-uncased": "BERT Base Uncased",
    "distilbert-base-uncased": "DistilBERT Base Uncased",
}
imdb_display_to_path = {PRETTY_MAP.get(k, k): p for k, p in IMDB_FOUND.items()}
IMDB_CHOICES = sorted(imdb_display_to_path.keys())

<h1> Step 5: Device & robust model cache/loader </h1>

In [6]:

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
_cache: Dict[str, Tuple[AutoTokenizer, AutoModelForSequenceClassification]] = {}

def _validate_checkpoint_dir(model_dir: str):
    if not model_dir or not os.path.isdir(model_dir):
        raise FileNotFoundError(f"Path does not exist: {model_dir!r}")
    files = set(os.listdir(model_dir))
    if "config.json" not in files:
        raise FileNotFoundError(f"Missing config.json in {model_dir}")
    if not (WEIGHTS & files):
        raise FileNotFoundError(
            f"No weights file found in {model_dir}. Expected one of: {sorted(WEIGHTS)}"
        )

def load_model(model_dir: str) -> Tuple[AutoTokenizer, AutoModelForSequenceClassification]:
    """Robust loader with validation and GPU→CPU fallback."""
    if model_dir in _cache:
        return _cache[model_dir]

    _validate_checkpoint_dir(model_dir)

    tok = AutoTokenizer.from_pretrained(model_dir, use_fast=True)

    # Try GPU first, fall back to CPU if CUDA fails
    try:
        mdl = AutoModelForSequenceClassification.from_pretrained(model_dir)
        mdl.to(DEVICE).eval()
    except Exception as e:
        try:
            mdl = AutoModelForSequenceClassification.from_pretrained(model_dir)
            mdl.to("cpu").eval()
        except Exception as e2:
            raise RuntimeError(f"Failed to load model from {model_dir}: {e2}") from e

    _cache[model_dir] = (tok, mdl)
    return tok, mdl

<h1> Step 6: Inference helpers </h1>

In [7]:

def imdb_predict(text: str, model_dir: str) -> Tuple[str, float]:
    text = str(text or "").strip()
    if not text:
        return "—", 0.0
    tok, mdl = load_model(model_dir)
    enc = tok(text, return_tensors="pt", truncation=True, padding=True, max_length=256)
    enc = {k: v.to(mdl.device) for k, v in enc.items()}
    with torch.no_grad():
        logits = mdl(**enc).logits
        probs = torch.softmax(logits, dim=-1).squeeze(0).cpu().numpy()
    if probs.shape[-1] != 2:
        raise ValueError(
            f"This IMDb checkpoint has {probs.shape[-1]} classes, expected 2. "
            f"Did you select an ABSA model by mistake?"
        )
    idx = int(probs.argmax())
    label = "positive" if idx == 1 else "negative"
    conf = float(probs[idx])
    return label, conf

def absa_table(text: str, model_dir: str, min_conf=0.60, min_margin=0.20, top_k: Optional[int]=6) -> pd.DataFrame:
    text = str(text or "").strip()
    if not text:
        return pd.DataFrame(columns=["Aspect","Target","Label","Confidence"])

    tok, mdl = load_model(model_dir)
    SEP = tok.sep_token or "[SEP]"

    targets = extract_targets(text)  # If you want NO fallback, change function to return [].
    batch, meta = [], []
    for t in targets:
        for a in ASPECTS:
            batch.append(
                f"Sentence: {text} {SEP} Target: {t} {SEP} Aspect: {a} {SEP} Task: classify sentiment for this target & aspect."
            )
            meta.append((t, a))

    enc = tok(batch, return_tensors="pt", truncation=True, padding=True, max_length=256)
    enc = {k: v.to(mdl.device) for k, v in enc.items()}
    with torch.no_grad():
        logits = mdl(**enc).logits
        probs = torch.softmax(logits, dim=-1).cpu().numpy()

    if probs.shape[-1] != 4:
        raise ValueError(
            f"This ABSA checkpoint has {probs.shape[-1]} classes, expected 4 "
            f"(['negative','neutral','positive','none']). Did you select an IMDb model by mistake?"
        )

    rows = []
    for (t, a), p in zip(meta, probs):
        p_none = float(p[SENT2ID["none"]])
        best_lbl, best_conf = None, -1.0
        for lbl in ("positive","neutral","negative"):
            c = float(p[SENT2ID[lbl]])
            if c > best_conf:
                best_lbl, best_conf = lbl, c
        if best_conf >= min_conf and (best_conf - p_none) >= min_margin:
            rows.append({"Aspect": a, "Target": t, "Label": best_lbl, "Confidence": round(best_conf, 3)})

    df = pd.DataFrame(rows).sort_values("Confidence", ascending=False)
    if top_k is not None:
        df = df.head(top_k)
    return df.reset_index(drop=True)

<h1> Step 7: Gradio callbacks </h1>

In [8]:

def _first_or_blank(xs: List[str]) -> str:
    return xs[0] if xs else ""

def absa_model_choices(track: str) -> List[str]:
    if not track or track not in ABSA_FOUND:
        return []
    return sorted(list(ABSA_FOUND[track].keys()))  # display names only

def on_track_change(track: str):
    choices = absa_model_choices(track)
    return gr.Dropdown(choices=choices, value=_first_or_blank(choices), interactive=True)

def run_inference(
    text: str,
    imdb_display: str,
    absa_track: str,
    absa_display: str,
    min_conf: float,
    min_margin: float,
    top_k: int,
    imdb_manual: str,
    absa_manual: str
):
    # IMDb
    try:
        imdb_dir = (imdb_manual or "").strip() or imdb_display_to_path.get(imdb_display or "", "")
        if imdb_dir:
            imdb_label, imdb_conf = imdb_predict(text, imdb_dir)
            imdb_pretty = f"{imdb_label.capitalize()} ({imdb_conf:.3f})"
        else:
            imdb_pretty = "No IMDb model selected."
    except Exception as e:
        imdb_pretty = "ERROR: " + str(e)

    # ABSA
    try:
        absa_dir = (absa_manual or "").strip() or ABSA_FOUND.get(absa_track or "", {}).get(absa_display or "", "")
        if absa_dir:
            absa_df = absa_table(text, absa_dir, min_conf=min_conf, min_margin=min_margin, top_k=top_k)
        else:
            absa_df = pd.DataFrame([{"Aspect":"—","Target":"—","Label":"No ABSA model selected.","Confidence":0.0}])
    except Exception as e:
        absa_df = pd.DataFrame([{"Aspect":"ERROR","Target":"—","Label":str(e),"Confidence":0.0}])

    return imdb_pretty, absa_df

def test_load(imdb_display, absa_track, absa_display, imdb_manual, absa_manual):
    msgs = []
    # IMDb
    imdb_dir = (imdb_manual or "").strip() or imdb_display_to_path.get(imdb_display or "", "")
    try:
        if imdb_dir:
            _validate_checkpoint_dir(imdb_dir)
            load_model(imdb_dir)
            msgs.append(f"✅ IMDb ok: {imdb_dir}")
        else:
            msgs.append("ℹ️ IMDb not selected.")
    except Exception as e:
        msgs.append(f"❌ IMDb error: {e}")

    # ABSA
    absa_dir = (absa_manual or "").strip() or ABSA_FOUND.get(absa_track or "", {}).get(absa_display or "", "")
    try:
        if absa_dir:
            _validate_checkpoint_dir(absa_dir)
            load_model(absa_dir)
            msgs.append(f"✅ ABSA ok: {absa_dir}")
        else:
            msgs.append("ℹ️ ABSA not selected.")
    except Exception as e:
        msgs.append(f"❌ ABSA error: {e}")

    return "\n".join(msgs)

<h1> Step 8: Colorful CSS (borders & centered title) </h1>

In [9]:

app_css = """
#app-title { text-align: center; margin-top: 4px; margin-bottom: 16px; }
.card {
  border: 2px solid #6366f1 !important;   /* indigo */
  border-radius: 12px !important;
  padding: 10px !important;
}
.card .gr-textbox, .card .gr-dropdown, .card .gr-slider, .card .gr-dataframe {
  border: 1.5px solid #22c55e !important; /* green */
  border-radius: 10px !important;
}
.card label, .card .wrap > label, .card .label-wrap label { font-weight: 600; }
.card .gr-dataframe table thead th { background: #eef2ff; }
"""

<h1> Step 9: Build the UI </h1>

In [10]:
with gr.Blocks(title="IMDb + ABSA Inference", css=app_css) as demo:
    gr.HTML('<h2 id="app-title">IMDb Sentiment (left) + Aspect-Based Sentiment (right)</h2>')
    gr.Markdown("**Enter a sentence.**")

    with gr.Row():
        text_in = gr.Textbox(
            label="Input text",
            placeholder="Type any sentence…",
            lines=3,
            elem_classes=["card"]
        )

    with gr.Row():
        # Left: IMDb
        with gr.Column(scale=1):
            gr.Markdown("### IMDb Sentiment")
            imdb_dd = gr.Dropdown(
                choices=IMDB_CHOICES,
                value=_first_or_blank(IMDB_CHOICES) if IMDB_CHOICES else None,
                label="IMDb model (best)",
                interactive=True,
                allow_custom_value=True,  # you can type if list empty
                elem_classes=["card"]
            )
            manual_imdb = gr.Textbox(
                label="(Optional) IMDb model path (overrides dropdown)",
                placeholder="/content/drive/MyDrive/.../best_model",
                lines=1,
                elem_classes=["card"]
            )
            imdb_out = gr.Textbox(
                label="IMDb result",
                value="—",
                interactive=False,
                elem_classes=["card"]
            )

        # Right: ABSA
        with gr.Column(scale=2):
            gr.Markdown("### ABSA (Sentihood)")
            absa_tracks = sorted(list(ABSA_FOUND.keys()))
            track_dd = gr.Dropdown(
                choices=absa_tracks,
                value=_first_or_blank(absa_tracks) if absa_tracks else None,
                label="ABSA Track",
                interactive=True,
                allow_custom_value=True,
                elem_classes=["card"]
            )
            absa_models_dd = gr.Dropdown(
                choices=absa_model_choices(_first_or_blank(absa_tracks)) if absa_tracks else [],
                value=_first_or_blank(absa_model_choices(_first_or_blank(absa_tracks))) if absa_tracks else None,
                label="ABSA model (best)",
                interactive=True,
                allow_custom_value=True,
                elem_classes=["card"]
            )
            manual_absa = gr.Textbox(
                label="(Optional) ABSA model path (overrides dropdown)",
                placeholder="/content/drive/MyDrive/ABSA/TrackA/bert-base-uncased/best",
                lines=1,
                elem_classes=["card"]
            )

            with gr.Row():
                min_conf_sld   = gr.Slider(0.0, 1.0, value=0.60, step=0.01, label="Min confidence (keep ≥)", elem_classes=["card"])
                min_margin_sld = gr.Slider(0.0, 1.0, value=0.20, step=0.01, label="Min margin vs 'none'", elem_classes=["card"])
                topk_sld       = gr.Slider(1, 20, value=6, step=1, label="Top-K rows", elem_classes=["card"])

            absa_df = gr.Dataframe(
                headers=["Aspect","Target","Label","Confidence"],
                label="ABSA output",
                wrap=True,
                elem_classes=["card"]
            )

    with gr.Row():
        run_btn  = gr.Button("Analyze")
        test_btn = gr.Button("Test load")
    diag_out = gr.Markdown()

    # Wiring
    track_dd.change(on_track_change, inputs=[track_dd], outputs=[absa_models_dd])
    run_btn.click(
        run_inference,
        inputs=[text_in, imdb_dd, track_dd, absa_models_dd, min_conf_sld, min_margin_sld, topk_sld, manual_imdb, manual_absa],
        outputs=[imdb_out, absa_df]
    )
    test_btn.click(
        test_load,
        inputs=[imdb_dd, track_dd, absa_models_dd, manual_imdb, manual_absa],
        outputs=[diag_out]
    )

<h1> Step 10: Launch </h1>

In [11]:
demo.launch(share=True, debug=False)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://0ffbc5ba0cfc8d7b13.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


