In [11]:
import pandas as pd
import nltk
import re
import joblib
import numpy as np
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
from sklearn.ensemble import RandomForestClassifier
from imblearn.over_sampling import SMOTE
from collections import Counter

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

file_path = "/content/sample_data/amazon_review.csv"
df = pd.read_csv(file_path)

df = df.dropna(subset=['reviewText'])

lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

# Preprocessing function
def preprocess_text(text):
    if not isinstance(text, str) or text.strip() == "":
        return "emptyreview"

    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    tokens = re.findall(r'\b\w+\b', text)
    tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]

    return " ".join(tokens) if tokens else "emptyreview"

df['cleaned_review'] = df['reviewText'].apply(preprocess_text)

def map_emotion(rating):
    if rating >= 4:
        return "Joy"
    elif rating == 3:
        return "Content"
    elif rating == 2:
        return "Disappointment"
    else:
        return "Anger"

df['emotion'] = df['overall'].apply(map_emotion)

label_encoder = LabelEncoder()
df['emotion_encoded'] = label_encoder.fit_transform(df['emotion'])

# Split data
X_train, X_test, y_train, y_test = train_test_split(df['cleaned_review'], df['emotion_encoded'], test_size=0.2, random_state=42)

tfidf = TfidfVectorizer(stop_words='english', max_features=15000, ngram_range=(1,2))
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)

X_train_tfidf_dense = X_train_tfidf.toarray()

# Apply SMOTE for class balancing
smote_strategy = {label: int(count * 1.5) for label, count in Counter(y_train).items()}
smote = SMOTE(sampling_strategy=smote_strategy, random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train_tfidf_dense, y_train)

# Train Random Forest Model
model = RandomForestClassifier(n_estimators=200, random_state=42, class_weight="balanced")
print("🚀 Training Random Forest Model... ")
model.fit(X_train_smote, y_train_smote)


joblib.dump(model, "random_forest_emotion_model.pkl")
joblib.dump(tfidf, "tfidf_vectorizer.pkl")
joblib.dump(label_encoder, "label_encoder.pkl")

y_pred = model.predict(X_test_tfidf)

y_pred_labels = label_encoder.inverse_transform(y_pred)
y_test_labels = label_encoder.inverse_transform(y_test)

accuracy = accuracy_score(y_test_labels, y_pred_labels)
print(f"Model Training Complete!")
print(f"Model Accuracy: {accuracy:.2f}")
print("Classification Report:\n", classification_report(y_test_labels, y_pred_labels, zero_division=1))


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


🚀 Training Random Forest Model... 
Model Training Complete!
Model Accuracy: 0.91
Classification Report:
                 precision    recall  f1-score   support

         Anger       1.00      0.00      0.00        44
       Content       1.00      0.00      0.00        30
Disappointment       1.00      0.00      0.00        12
           Joy       0.91      1.00      0.95       897

      accuracy                           0.91       983
     macro avg       0.98      0.25      0.24       983
  weighted avg       0.92      0.91      0.87       983



In [12]:
import joblib
import numpy as np

tfidf = joblib.load("tfidf_vectorizer.pkl")
label_encoder = joblib.load("label_encoder.pkl")
model = joblib.load("random_forest_emotion_model.pkl")

# Emotion classification function
def analyze_emotions(text):
    cleaned_text = preprocess_text(text)
    text_tfidf = tfidf.transform([cleaned_text])

    predicted_label = model.predict(text_tfidf)[0]
    emotion = label_encoder.inverse_transform([predicted_label])[0]
    confidence = max(model.predict_proba(text_tfidf)[0])

    intensity = round(min(confidence * 1.2, 1.0), 2)
    activation = "High" if intensity > 0.7 else "Medium" if intensity > 0.4 else "Low"

    secondary_emotion = "Disappointment" if emotion == "Joy" else "Joy"
    secondary_intensity = round(max(0, intensity - 0.5), 2)

    return {
        "primary": {"emotion": emotion, "activation": activation, "intensity": intensity},
        "secondary": {"emotion": secondary_emotion, "activation": "Low", "intensity": secondary_intensity}
    }


In [13]:
!pip install --quiet langdetect
!pip install --quiet deep-translator



In [14]:
import json

industry_topics = {
    "E-commerce": {
        "Delivery": ["fast delivery", "shipping", "delivered", "on time", "slow delivery"],
        "Quality": ["quality", "amazing", "bad", "poor", "excellent", "terrible", "worst"],
        "Customer Service": ["support", "rude", "helpful", "unresponsive"]
    },
    "Healthcare": {
        "Service": ["appointment", "wait time", "doctor", "staff", "nurse"],
        "Facilities": ["clean", "equipment", "hygiene", "sanitation"]
    }
}

with open("industry_topics.json", "w") as file:
    json.dump(industry_topics, file, indent=4)

print("industry_topics.json has been created successfully!")


industry_topics.json has been created successfully!


In [15]:
import numpy as np
import json
from langdetect import detect
from deep_translator import GoogleTranslator
from sklearn.feature_extraction.text import TfidfVectorizer
import joblib

model = joblib.load("random_forest_emotion_model.pkl")
label_encoder = joblib.load("label_encoder.pkl")
tfidf = joblib.load("tfidf_vectorizer.pkl")

with open("industry_topics.json", "r") as file:
    industry_topics = json.load(file)

def detect_and_translate(text):
    detected_lang = detect(text)
    if detected_lang != "en":
        print(f"Translating from {detected_lang} to English...")
        return GoogleTranslator(source=detected_lang, target="en").translate(text)
    return text

def scale_intensity(confidence):
    return 0.8 if confidence >= 0.85 else (0.7 if confidence >= 0.6 else round(confidence * 0.8, 2))

# Emotion Classification
def analyze_emotions(text):
    text = detect_and_translate(text)
    cleaned_text = preprocess_text(text)
    text_tfidf = tfidf.transform([cleaned_text])

    proba = model.predict_proba(text_tfidf)[0]
    sorted_probs = np.argsort(proba)[::-1]

    primary_emotion = label_encoder.inverse_transform([sorted_probs[0]])[0]
    confidence = proba[sorted_probs[0]]

    print("Probabilities:", dict(zip(label_encoder.classes_, proba)))
    print("Primary Emotion (Before Thresholding):", primary_emotion, confidence)

    negative_emotions = {"Disappointment", "Anger", "Sadness"}
    positive_emotions = {"Joy", "Content"}

    anger_triggers = {"worst", "terrible", "awful", "horrible", "scam", "frustrated", "rude", "never again"}

    if confidence < 0.5:
        primary_emotion = "Neutral"
    elif primary_emotion in positive_emotions and "poor" in text.lower():
        primary_emotion = "Disappointment"
    elif any(word in text.lower() for word in anger_triggers):
        primary_emotion = "Anger"

    intensity = scale_intensity(confidence)
    activation = "High" if intensity > 0.8 else "Medium" if intensity > 0.4 else "Low"

    secondary_emotion = label_encoder.inverse_transform([sorted_probs[1]])[0] if len(sorted_probs) > 1 else "Neutral"
    secondary_intensity = round(intensity * 0.375, 2)

    return {
        "primary": {"emotion": primary_emotion, "activation": activation, "intensity": intensity},
        "secondary": {"emotion": secondary_emotion, "activation": "Low", "intensity": secondary_intensity},
        "confidence_scores": dict(zip(label_encoder.classes_, proba))
    }

# Topic Detection
def analyze_topics(text, industry="ecommerce"):
    topic_keywords = {
        "ecommerce": {
            "Delivery": ["fast delivery", "shipping", "delivered", "on time", "slow delivery"],
            "Quality": ["quality", "amazing", "bad", "poor", "excellent", "terrible", "worst"],
            "Clothes": ["fit", "size", "clothing", "small", "tight", "loose", "perfect size"]
        },
        "healthcare": {
            "Service": ["doctor", "nurse", "staff", "appointment", "treatment"],
            "Facility": ["hospital", "clean", "sanitary", "equipment", "waiting time"]
        }
    }

    detected_topics = {"main": [], "subtopics": {}}

    industry_topics = topic_keywords.get(industry, topic_keywords["ecommerce"])
    words = text.lower().split()

    for topic, keywords in industry_topics.items():
        matched_keywords = [kw for kw in keywords if any(word in kw for word in words)]
        if matched_keywords:
            detected_topics["main"].append(topic)
            detected_topics["subtopics"][topic] = matched_keywords

    return detected_topics

def compute_adorescore(emotions, topics):
    base_scores = {"Joy": 90, "Content": 70, "Disappointment": 30, "Anger": -80}

    primary_emotion = emotions["primary"]["emotion"]
    secondary_emotion = emotions["secondary"]["emotion"]

    primary_score = base_scores.get(primary_emotion, 50) * emotions["primary"]["intensity"]
    secondary_score = base_scores.get(secondary_emotion, 30) * emotions["secondary"]["intensity"]


    topic_weight = max(float(len(topics.get("main", []))), 1.5)

    overall_score = int((primary_score + secondary_score) * (topic_weight * 0.5))

    breakdown_weights = {"Delivery": 1.5, "Quality": 1.2, "Clothes": 0.7}
    total_weight = sum(breakdown_weights.values())

    breakdown = {
        topic: int((breakdown_weights.get(topic, 1) / total_weight) * overall_score * 1.8)
        for topic in topics["main"]
    }

    return {"overall": overall_score, "breakdown": breakdown}

def analyze_feedback_dynamic():
    text = input("Enter feedback text: ")
    industry = input("Enter industry (ecommerce/healthcare): ").strip().lower() or "ecommerce"

    valid_industries = ["ecommerce", "healthcare"]
    if industry not in valid_industries:
        print(f"Invalid industry. Defaulting to 'ecommerce'.")
        industry = "ecommerce"

    emotions = analyze_emotions(text)
    topics = analyze_topics(text, industry)
    adorescore = compute_adorescore(emotions, topics)

    result = {
        "emotions": emotions,
        "topics": topics,
        "adorescore": adorescore
    }

    print("\nAnalysis Result:\n")
    print(json.dumps(result, indent=4))

analyze_feedback_dynamic()



Enter feedback text: delivery is fast
Enter industry (ecommerce/healthcare): ecoomerce
Invalid industry. Defaulting to 'ecommerce'.
Probabilities: {'Anger': 0.0, 'Content': 0.04, 'Disappointment': 0.0, 'Joy': 0.96}
Primary Emotion (Before Thresholding): Joy 0.96

Analysis Result:

{
    "emotions": {
        "primary": {
            "emotion": "Joy",
            "activation": "Medium",
            "intensity": 0.8
        },
        "secondary": {
            "emotion": "Content",
            "activation": "Low",
            "intensity": 0.3
        },
        "confidence_scores": {
            "Anger": 0.0,
            "Content": 0.04,
            "Disappointment": 0.0,
            "Joy": 0.96
        }
    },
    "topics": {
        "main": [
            "Delivery"
        ],
        "subtopics": {
            "Delivery": [
                "fast delivery",
                "slow delivery"
            ]
        }
    },
    "adorescore": {
        "overall": 69,
        "breakdown": {


In [16]:
!pip install gradio




In [17]:
import nltk
nltk.download('vader_lexicon')


[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [18]:
import gradio as gr
import json
import joblib
import numpy as np
import plotly.graph_objects as go
from langdetect import detect
from deep_translator import GoogleTranslator
from sklearn.feature_extraction.text import TfidfVectorizer

model = joblib.load("random_forest_emotion_model.pkl")
label_encoder = joblib.load("label_encoder.pkl")
tfidf = joblib.load("tfidf_vectorizer.pkl")

theme_subtopics = {
    "Delivery": ["Fast Delivery", "Quick Delivery", "Free Delivery", "Good Delivery"],
    "Quality": ["Durability", "Material", "Workmanship"]
}

from nltk.sentiment import SentimentIntensityAnalyzer

# Initialize VADER
sia = SentimentIntensityAnalyzer()

def get_sentiment(text):
    sentiment_score = sia.polarity_scores(text)['compound']

    if sentiment_score > 0.05:
        return "Positive"
    elif sentiment_score < -0.05:
        return "Negative"
    else:
        return "Neutral"

def detect_and_translate(text):
    detected_lang = detect(text)
    return GoogleTranslator(source=detected_lang, target="en").translate(text) if detected_lang != "en" else text

def extract_top_themes(text):
    tfidf_matrix = tfidf.transform([text])
    feature_names = tfidf.get_feature_names_out()
    sorted_indices = np.argsort(tfidf_matrix.toarray()[0])[::-1]
    top_keywords = [feature_names[i] for i in sorted_indices[:3]]
    return top_keywords

def analyze_feedback(text, industry="ecommerce"):
    emotions = analyze_emotions(text)
    topics = analyze_topics(text, industry)
    adorescore = compute_adorescore(emotions, topics)
    top_themes = extract_top_themes(text)
    sentiment = get_sentiment(text)
    main_emotion = emotions["primary"]["emotion"]

    # Generate AI-powered response
    ai_response = generate_response(sentiment, main_emotion)

    return {
        "emotions": emotions,
        "topics": topics,
        "adorescore": adorescore,
        "top_themes": top_themes,
        "sentiment": sentiment,
        "ai_response": ai_response
    }

def generate_radar_chart(title):
    categories = ['Joy', 'Sadness', 'Anger', 'Surprise', 'Fear', 'Disgust']
    values = np.random.rand(6)
    return go.Figure(data=[go.Scatterpolar(r=values, theta=categories, fill='toself', name=title)])

def analyze_and_display(text, industry):
    analysis = analyze_feedback(text, industry)
    adorescore_value = analysis["adorescore"]["overall"]
    main_emotion = analysis["emotions"]["primary"]["emotion"]
    main_intensity = round(analysis["emotions"]["primary"]["intensity"] * 100, 2)
    top_themes = analysis["top_themes"]
    sentiment = analysis["sentiment"]
    ai_response = analysis["ai_response"]

    adorescore_display = f"""
    <div style="text-align: center; font-size: 24px; font-weight: bold; padding: 20px; border: 2px solid #ddd; border-radius: 10px; width: 300px; margin: auto;">
        Adorescore <br>
        <span style="font-size: 40px; color: {'green' if adorescore_value > 0 else 'red'};">{adorescore_value}</span>
        <br>
        <small>Driven by</small><br>
        <span style="font-size: 20px;">{main_emotion} - {main_intensity}%</span>
        <br><br>
        <b>Sentiment:</b> <span style="color: {'green' if sentiment == 'Positive' else 'red' if sentiment == 'Negative' else 'gray'};">{sentiment}</span>
    </div>
    """

    themes_display = f"""
    <div style="text-align: center; font-size: 18px; font-weight: bold; padding: 10px; border: 1px solid #ddd; border-radius: 10px; width: 300px; margin: auto;">
        <b>Top Themes:</b> {", ".join(top_themes)}
    </div>
    """

    response_display = f"""
    <div style="text-align: center; font-size: 18px; padding: 10px; border: 1px solid #ddd; border-radius: 10px; width: 300px; margin: auto; background-color: #f9f9f9;">
        <b>Suggested Response:</b><br>
        <i>{ai_response}</i>
    </div>
    """

    radar_high = generate_radar_chart("High Activation")
    radar_medium = generate_radar_chart("Medium Activation")
    radar_low = generate_radar_chart("Low Activation")

    return adorescore_display + themes_display + response_display, json.dumps(analysis, indent=4), radar_high, radar_medium, radar_low

def generate_response(sentiment, emotion):
    if sentiment == "Positive":
        if emotion == "Joy":
            return "We're delighted to hear that you had a great experience! "
        elif emotion == "Surprise":
            return "Thank you for your feedback! We’re happy we exceeded your expectations. "
        else:
            return "We appreciate your positive feedback. Let us know how we can serve you even better!"

    elif sentiment == "Negative":
        if emotion == "Anger":
            return "We're really sorry to hear that. Please reach out, and we'll fix this ASAP. "
        elif emotion == "Sadness":
            return "We apologize for the inconvenience. Our team is here to help resolve any issues."
        else:
            return "We're sorry you had this experience. Let us know how we can make things right!"

    else:
        return "Thank you for your feedback. If you have any suggestions, we’d love to hear them!"


def update_subtopics(selected_theme):
    return ", ".join(theme_subtopics.get(selected_theme, ["No subtopics available"]))

title = "Customer Emotion Analysis System"
description = "Analyze customer feedback emotions with AI-powered insights and visualization."

with gr.Blocks() as demo:
    gr.Markdown(f"### {title}")
    gr.Markdown(description)

    with gr.Row():
        text_input = gr.Textbox(label="Enter Customer Feedback", lines=3)
        industry_dropdown = gr.Dropdown(["ecommerce", "healthcare"], label="Industry")

    analyze_btn = gr.Button("Analyze")

    adorescore_output = gr.HTML(label="Adorescore & Themes")
    analysis_output = gr.Textbox(label="Analysis Results")
    radar_high = gr.Plot(label="High Activation")
    radar_medium = gr.Plot(label="Medium Activation")
    radar_low = gr.Plot(label="Low Activation")

    analyze_btn.click(
        analyze_and_display,
        inputs=[text_input, industry_dropdown],
        outputs=[adorescore_output, analysis_output, radar_high, radar_medium, radar_low]
    )
    gr.Markdown("### Additional Options")
    with gr.Row():
        theme_dropdown = gr.Dropdown(["Delivery", "Quality"], label="Themes")
        subtopic_display = gr.Textbox(label="Subtopics", interactive=False)

    theme_dropdown.change(update_subtopics, inputs=theme_dropdown, outputs=subtopic_display)

demo.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://d376be2c4cc2a352c1.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


