In [2]:
# import torch
# import torch.nn as nn
# import gradio as gr
# import numpy as np
# from transformers import AutoTokenizer, AutoModel

# # DistilBERT model defined
# class MultiHeadDistilBert(nn.Module):
#     def __init__(self, base_name, num_labels_sent, num_labels_emot):
#         super().__init__()
#         self.encoder = AutoModel.from_pretrained(base_name)
#         hidden = 768
#         self.dropout = nn.Dropout(0.2)
#         self.classifier_sent = nn.Linear(hidden, num_labels_sent)
#         self.classifier_emot = nn.Linear(hidden, num_labels_emot)

#     def forward(self, input_ids, attention_mask):
#         out = self.encoder(input_ids=input_ids, attention_mask=attention_mask)
#         cls = out.last_hidden_state[:, 0]
#         cls = self.dropout(cls)
#         return {
#             "logits_sent": self.classifier_sent(cls),
#             "logits_emot": self.classifier_emot(cls)
#         }

# # --- Load the saved model and tokenizer ---
# distil_dir = "./distilbert_ardor_saved"   # üëà adjust this path
# tokenizer = AutoTokenizer.from_pretrained(distil_dir)
# model = MultiHeadDistilBert("distilbert-base-uncased", 2, 6)
# model.load_state_dict(torch.load(f"{distil_dir}/pytorch_model.bin", map_location="cpu"))
# model.eval()
# print("‚úÖ DistilBERT model loaded successfully!")

# # --- Label maps ---
# id2label_sent = {0: "Negative", 1: "Positive"}
# id2label_emot = {0: "joy", 1: "sad", 2: "anger", 3: "fear", 4: "love", 5: "surprise"}

# def softmax_temp(x, T=1.5):
#     return torch.nn.functional.softmax(x / T, dim=-1)

# # --- Prediction ---
# def predict_distilbert(text):
#     inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
#     with torch.no_grad():
#         out = model(**inputs)
#         ps = softmax_temp(out["logits_sent"]).cpu().numpy()[0]
#         pe = softmax_temp(out["logits_emot"]).cpu().numpy()[0]
#     sent_idx = int(np.argmax(ps))
#     sent_label = id2label_sent[sent_idx]
#     sent_conf = float(ps[sent_idx])
#     top_idx = np.argsort(-pe)[:3]
#     emotions = [(id2label_emot[i], float(pe[i])) for i in top_idx]
#     return sent_label, sent_conf, emotions

# # --- Gradio App ---
# def analyze_text(text):
#     label, conf, emo = predict_distilbert(text)
#     ardor = round(conf * 100, 2)
#     emo_str = " | ".join([f"{e[0]}: {e[1]:.2f}" for e in emo])
#     return f"**Sentiment:** {label} ({ardor}%)", f"**Ardor Scale:** {ardor}%", f"**Top Emotions:** {emo_str}"

# iface = gr.Interface(
#     fn=analyze_text,
#     inputs=gr.Textbox(label="Enter text to analyze"),
#     outputs=[
#         gr.Markdown(label="Sentiment"),
#         gr.Markdown(label="Ardor Scale"),
#         gr.Markdown(label="Top Emotions")
#     ],
#     title="üé≠ The Ardor Scale ‚Äì DistilBERT Demo",
#     examples=[
#         ["I absolutely love this project!"],
#         ["This is terrible and makes me so upset."],
#         ["It‚Äôs okay, nothing special."],
#         ["I can‚Äôt wait for the concert tonight!"]
#     ]
# )

# iface.launch(share=True)
# =================================================================================


# Unified website for all models


import os
import torch
import torch.nn as nn
import gradio as gr
import numpy as np
import joblib
from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification

# 1. Define MultiHeadDistilBert

class MultiHeadDistilBert(nn.Module):
    def __init__(self, base_name, num_labels_sent, num_labels_emot):
        super().__init__()
        self.encoder = AutoModel.from_pretrained(base_name)
        hidden = 768
        self.dropout = nn.Dropout(0.2)
        self.classifier_sent = nn.Linear(hidden, num_labels_sent)
        self.classifier_emot = nn.Linear(hidden, num_labels_emot)

    def forward(self, input_ids, attention_mask):
        out = self.encoder(input_ids=input_ids, attention_mask=attention_mask)
        cls = out.last_hidden_state[:, 0]
        cls = self.dropout(cls)
        return {
            "logits_sent": self.classifier_sent(cls),
            "logits_emot": self.classifier_emot(cls)
        }

# 2. Load models

models_dir = "./models"

# --- Logistic Regression + SVM (classical baselines) ---
try:
    logreg_model = joblib.load(os.path.join(models_dir, "logreg_model.pkl"))
    svm_model = joblib.load(os.path.join(models_dir, "svm_model.pkl"))
    vectorizer = joblib.load(os.path.join(models_dir, "tfidf_vectorizer.pkl"))
    has_classical = True
    print("‚úÖ Classical models loaded successfully!")
except Exception as e:
    print("‚ö†Ô∏è Skipping classical models:", e)
    has_classical = False
    logreg_model = svm_model = vectorizer = None

# --- DistilBERT (MultiHead) ---
try:
    distil_dir = "./distilbert_ardor_saved"
    distil_tokenizer = AutoTokenizer.from_pretrained(distil_dir)
    distil_model = MultiHeadDistilBert("distilbert-base-uncased", 2, 6)
    distil_model.load_state_dict(torch.load(f"{distil_dir}/pytorch_model.bin", map_location="cpu"))
    distil_model.eval()
    has_distil = True
    print("‚úÖ DistilBERT loaded successfully!")
except Exception as e:
    print("‚ö†Ô∏è Skipping DistilBERT:", e)
    has_distil = False

# --- RoBERTa ---
try:
    roberta_dir = os.path.join(models_dir, "roberta_ardor")
    roberta_tokenizer = AutoTokenizer.from_pretrained(roberta_dir)
    roberta_model = AutoModelForSequenceClassification.from_pretrained(roberta_dir)
    roberta_model.eval()
    has_roberta = True
    print("‚úÖ RoBERTa loaded successfully!")
except Exception as e:
    print("‚ö†Ô∏è Skipping RoBERTa:", e)
    has_roberta = False

# 3. Label maps

id2label_sent = {0: "Negative", 1: "Positive"}
id2label_emot = {0: "joy", 1: "sad", 2: "anger", 3: "fear", 4: "love", 5: "surprise"}

def softmax_temp(x, T=1.5):
    return torch.nn.functional.softmax(x / T, dim=-1)

# 4. Prediction functions

def predict_classical(model, text):
    X = vectorizer.transform([text])
    probs = model.predict_proba(X)[0]
    idx = np.argmax(probs)
    label = "Positive" if idx == 1 else "Negative"
    conf = float(probs[idx])
    return label, conf, []

def predict_distilbert(text):
    inputs = distil_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
    with torch.no_grad():
        out = distil_model(**inputs)
        ps = softmax_temp(out["logits_sent"]).cpu().numpy()[0]
        pe = softmax_temp(out["logits_emot"]).cpu().numpy()[0]
    sent_idx = int(np.argmax(ps))
    sent_label = id2label_sent[sent_idx]
    sent_conf = float(ps[sent_idx])
    top_idx = np.argsort(-pe)[:3]
    emotions = [(id2label_emot[i], float(pe[i])) for i in top_idx]
    return sent_label, sent_conf, emotions

def predict_roberta(text):
    inputs = roberta_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
    with torch.no_grad():
        logits = roberta_model(**inputs).logits
        ps = softmax_temp(logits).cpu().numpy()[0]
    idx = int(np.argmax(ps))
    label = id2label_sent[idx]
    conf = float(ps[idx])
    return label, conf, []

# 5. Unified prediction router

def analyze_text(text, model_choice):
    text = text.strip()
    if not text:
        return "‚ö†Ô∏è Please enter text.", "", ""

    if model_choice == "Logistic Regression" and has_classical:
        label, conf, emo = predict_classical(logreg_model, text)
    elif model_choice == "SVM" and has_classical:
        label, conf, emo = predict_classical(svm_model, text)
    elif model_choice == "DistilBERT" and has_distil:
        label, conf, emo = predict_distilbert(text)
    elif model_choice == "RoBERTa" and has_roberta:
        label, conf, emo = predict_roberta(text)
    else:
        return "‚ö†Ô∏è Model not available or not loaded.", "", ""

    ardor = round(conf * 100, 2)
    emo_str = " | ".join([f"{e[0]}: {e[1]:.2f}" for e in emo]) if emo else "N/A"
    return f"**Sentiment:** {label} ({ardor}%)", f"**Ardor Scale:** {ardor}%", f"**Top Emotions:** {emo_str}"

# ======================================================
# 6. Gradio Interface for Front end

available_models = []
if has_classical:
    available_models.extend(["Logistic Regression", "SVM"])
if has_distil:
    available_models.append("DistilBERT")
if has_roberta:
    available_models.append("RoBERTa")

iface = gr.Interface(
    fn=analyze_text,
    inputs=[
        gr.Textbox(label="Enter text to analyze"),
        gr.Dropdown(available_models, label="Select Model")
    ],
    outputs=[
        gr.Markdown(label="Sentiment"),
        gr.Markdown(label="Ardor Scale"),
        gr.Markdown(label="Top Emotions")
    ],
    title="üé≠ The Ardor Scale ‚Äì Multi-Model Sentiment & Emotion Analyzer",
    description="Compare sentiment, Ardor confidence, and emotion predictions across multiple models.",
    examples=[
        ["I absolutely love this project!", "DistilBERT"],
        ["This is terrible and makes me so upset.", "DistilBERT"],
        ["It‚Äôs okay, nothing special.", "Logistic Regression"],
        ["I'm so excited for the concert tonight!", "DistilBERT"]
    ],
    allow_flagging="never"
)

# 7. Launch App

if __name__ == "__main__":
    iface.launch(share=True)


‚ö†Ô∏è Skipping classical models: [Errno 2] No such file or directory: './models/logreg_model.pkl'
‚úÖ DistilBERT loaded successfully!
‚ö†Ô∏è Skipping RoBERTa: Repo id must be in the form 'repo_name' or 'namespace/repo_name': './models/roberta_ardor'. Use `repo_type` argument if needed.




Running on local URL:  http://127.0.0.1:7861


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Running on public URL: https://70deb95c7d54515f5d.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)
