# Gradio Demos

In [None]:
# !pip install -q gradio==3.23.0
# !pip install -q sentence_transformers
# !pip install -U typing_extensions

## Imports

In [10]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModel
import gradio as gr
import torch
from torch.nn.functional import softmax
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
import joblib
from sentence_transformers import SentenceTransformer

## DEMO 1 - Category Clustering / Sentiment Analysis

### Load Model

In [2]:
model_path = "models/distilbert_sa_20241017_211535"
model = AutoModelForSequenceClassification.from_pretrained(model_path, num_labels=3).to(device)
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")


### Infer model in Gradio

In [None]:
# set model in eval mode
model.eval()

# Define sentiment labels
sentiment_labels = ["Negative", "Neutral", "Positive"]

# Define function for prediction
def predict_sentiment(review_text):
    # Tokenize and predict on input text
    encodings = tokenizer(review_text, truncation=True, padding="max_length", max_length=128, return_tensors="pt").to(device)
    outputs = model(**encodings)
    
    # Calculate probabilities with softmax
    probabilities = softmax(outputs.logits, dim=1).squeeze().tolist()
    # Get the predicted class index
    predicted_class = torch.argmax(outputs.logits).item()
    
    # Return both the predicted sentiment and probabilities as a dictionary
    return {label: prob for label, prob in zip(sentiment_labels, probabilities)}

# Create Gradio interface
interface = gr.Interface(
    fn=predict_sentiment,
    inputs=gr.Textbox(label="Enter a Review"),
    outputs=gr.Label(num_top_classes=3, label="Sentiment Probabilities"),
    title="Sentiment Analysis with DistilBERT",
    description="Enter a review to get a sentiment prediction with class probabilities displayed as a bar graph."
)

# Launch the demo
interface.launch(share=True)


## DEMO 2 - Clustering

In [None]:
# Load trained transformer model and tokenizer
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

# Load trained KMeans model
model_path = "pickle/KMeans_clustering_model.pkl"
kmeans_model = joblib.load(model_path)

# Define categories (adjust as per your clustering categories)
category_names = {
    0: "Electronics & Media",
    1: "Tablets & eReaders",
    2: "Accessories & Adapters",
    3: "Home & Smart Devices",
    4: "Pet Supplies",
    5: "Video & Streaming"
}

# Function to generate embeddings and get the cluster
def get_category(review_text):
    # Encode the new review using the SentenceTransformer model
    review_embedding = model.encode([review_text])
    
    # Predict the cluster
    cluster_label = kmeans_model.predict(review_embedding)[0]
    
    # Map the cluster label to the category name
    category = category_names.get(cluster_label, "Unknown Category")
    return category

# Create Gradio interface
interface = gr.Interface(
    fn=get_category,
    inputs=gr.Textbox(label="Enter a Review"),
    outputs=gr.Textbox(label="Predicted Category"),
    title="Review Categorization",
    description="Enter a product review to get a predicted category."
)

# Launch the Gradio demo
interface.launch(share=True)


## DEMO 3 - Clustering + Sentiment

In [11]:
# Load sentiment analysis model and tokenizer
model_path = "models/distilbert_sa_20241017_193624"
sentiment_model = AutoModelForSequenceClassification.from_pretrained(model_path).to("cuda" if torch.cuda.is_available() else "cpu")
sentiment_model.eval()
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
sentiment_labels = ["Negative", "Neutral", "Positive"]

# Load clustering model and KMeans
embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
kmeans_model_path = "pickle/KMeans_clustering_model.pkl"
kmeans_model = joblib.load(kmeans_model_path)

# Define category names
category_names = {
    0: "Electronics & Media",
    1: "Tablets & eReaders",
    2: "Accessories & Adapters",
    3: "Home & Smart Devices",
    4: "Pet Supplies",
    5: "Video & Streaming"
}

# Prediction functions
def get_category(review_text):
    # Encode review text and predict the cluster
    review_embedding = embedding_model.encode([review_text])
    cluster_label = kmeans_model.predict(review_embedding)[0]
    category = category_names.get(cluster_label, "Unknown Category")
    return category

def predict_sentiment(review_text):
    encodings = tokenizer(review_text, truncation=True, padding="max_length", max_length=128, return_tensors="pt").to(sentiment_model.device)
    outputs = sentiment_model(**encodings)
    
    # Calculate probabilities
    probabilities = softmax(outputs.logits, dim=1).squeeze().tolist()
    predicted_class = torch.argmax(outputs.logits).item()
    
    return {label: prob for label, prob in zip(sentiment_labels, probabilities)}

# Combined Gradio demo
def analyze_review(review_text):
    category = get_category(review_text)
    sentiment = predict_sentiment(review_text)
    return category, sentiment

# Set up Gradio interface
interface = gr.Interface(
    fn=analyze_review,
    inputs=gr.Textbox(label="Enter a Review"),
    outputs=[
        gr.Textbox(label="Predicted Category"),
        gr.Label(num_top_classes=3, label="Sentiment Probabilities")
    ],
    title="Review Categorization and Sentiment Analysis",
    description="Enter a product review to get the predicted category and sentiment analysis with probabilities."
)

# Launch the combined demo
interface.launch(share=True)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


* Running on local URL:  http://127.0.0.1:7866
* Running on public URL: https://0e7534bee85d31a28f.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


