# 🌸 Bahar - Advanced Multilingual Emotion & Linguistic Analysis

**Version:** 0.2.0  
**Features:**
- 🌍 Multilingual support (English, Dutch, Persian)
- 🎭 28 fine-grained emotions (GoEmotions)
- 📊 Linguistic analysis (formality, tone, intensity, style)
- 🤖 9 language-specific models (3 per language)
- 🎨 Beautiful Rich output formatting

This notebook demonstrates the advanced multilingual emotion detection capabilities with language-specific models.


## 📦 Setup and Imports


In [1]:
# Standard library imports
from __future__ import annotations

import warnings
warnings.filterwarnings('ignore')

# Bahar imports
from bahar import EmotionAnalyzer, EnhancedAnalyzer
from bahar.datasets.goemotions import GOEMOTIONS_EMOTIONS, EMOTION_GROUPS
from bahar.utils.language_models import (
    get_available_models,
    get_supported_languages,
    detect_language,
)

# Rich imports for beautiful output
from rich.console import Console, Group
from rich.panel import Panel
from rich.table import Table
from rich.text import Text
from rich.progress import track

# Initialize Rich console
console = Console()

console.print("[bold green]✓[/bold green] All imports successful!")


## 🌍 Available Languages and Models

Let's see what languages and models are available in Bahar.


In [14]:
# Display supported languages and their models with enhanced formatting
languages = get_supported_languages()

console.print()
console.rule("[bold cyan]🌍 Supported Languages and Models[/bold cyan]", style="cyan")
console.print()

language_flags = {
    "english": "🇬🇧",
    "dutch": "🇳🇱",
    "persian": "🇮🇷"
}

language_colors = {
    "english": "green",
    "dutch": "blue",
    "persian": "magenta"
}

for lang in languages:
    models = get_available_models(lang)
    flag = language_flags.get(lang, "🌐")
    color = language_colors.get(lang, "cyan")

    table = Table(
        title=f"[bold {color}]{flag} {lang.upper()} Models[/bold {color}]",
        show_header=True,
        header_style="bold magenta",
        border_style=color,
        title_style=f"bold {color}"
    )
    table.add_column("Model Key", style="cyan", width=30)
    table.add_column("HuggingFace Model / Display Name", style="white", width=55)
    table.add_column("Type", style="yellow", width=15, justify="center")

    for key, model_name in models.items():
        # Determine if it's a registry model or built-in
        if key.startswith("registry:"):
            model_type = "Registry 🔗"
        else:
            model_type = "Built-in ⚙️"
        table.add_row(key, model_name, model_type)

    console.print(table)
    console.print()


## 🎭 GoEmotions Taxonomy

The GoEmotions dataset includes 28 fine-grained emotion categories grouped by sentiment.


In [15]:
# Display emotion taxonomy with enhanced formatting
from rich.columns import Columns

console.print()
console.rule("[bold cyan]🎭 GoEmotions Emotion Taxonomy[/bold cyan]", style="cyan")
console.print()

colors = {
    "positive": "green",
    "negative": "red",
    "ambiguous": "yellow",
    "neutral": "white"
}

# Create a panel for each emotion group
panels = []
for group, emotions in EMOTION_GROUPS.items():
    color = colors.get(group, "white")

    # Create emotion list with better formatting
    emotion_text = Text()
    for i, emotion in enumerate(emotions):
        if i > 0:
            emotion_text.append(" • ", style="dim")
        emotion_text.append(emotion, style=color)

    panel = Panel(
        emotion_text,
        title=f"[bold {color}]{group.upper()}[/bold {color}]",
        subtitle=f"[dim]{len(emotions)} emotions[/dim]",
        border_style=color,
        padding=(1, 2)
    )
    panels.append(panel)

# Display in 2 columns
console.print(Columns(panels[:2], equal=True, expand=True))
console.print()
console.print(Columns(panels[2:], equal=True, expand=True))
console.print()


## 🔍 Language Detection

Bahar can automatically detect the language of input text.


In [10]:
# Test language detection with enhanced display
test_texts = [
    ("This is absolutely wonderful and amazing!", "🇬🇧"),
    ("Dit is absoluut verschrikkelijk en teleurstellend.", "🇳🇱"),
    ("این واقعاً افتضاح و ناامیدکننده است.", "🇮🇷"),
]

console.print()
console.rule("[bold cyan]🔍 Language Detection Demo[/bold cyan]", style="cyan")
console.print()

detection_table = Table(
    show_header=True,
    header_style="bold magenta",
    border_style="cyan",
    title="[bold]Automatic Language Detection[/bold]",
    title_style="bold cyan"
)
detection_table.add_column("Flag", style="white", width=6, justify="center")
detection_table.add_column("Text Sample", style="white", width=55)
detection_table.add_column("Detected", style="green bold", width=15, justify="center")
detection_table.add_column("Confidence", style="yellow", width=12, justify="center")

for text, flag in test_texts:
    detected = detect_language(text)
    # Simple confidence based on detection (in real scenario, would be from model)
    confidence = "High ✓"
    detection_table.add_row(flag, text, detected.upper(), confidence)

console.print(detection_table)
console.print()


## 🎭 Basic Emotion Analysis

### Example 1: English - GoEmotions Model

Let's analyze a positive English text using the GoEmotions model.


In [11]:
# Initialize English analyzer with GoEmotions
from rich.progress import Progress, SpinnerColumn, TextColumn

console.print()
console.rule("[bold green]🇬🇧 English Analysis - GoEmotions Model[/bold green]", style="green")
console.print()

with Progress(
    SpinnerColumn(),
    TextColumn("[progress.description]{task.description}"),
    console=console,
) as progress:
    task = progress.add_task("[cyan]Loading English GoEmotions model...", total=None)
    analyzer_en = EmotionAnalyzer(language="english", model_key="goemotions")
    analyzer_en.load_model()
    progress.update(task, completed=True)

console.print("[bold green]✓ Model loaded successfully![/bold green]\n")

# Analyze positive English text
text_en = "This is absolutely wonderful and amazing! I'm so excited and grateful!"

# Display input text in a panel
input_panel = Panel(
    Text(text_en, style="white"),
    title="[bold cyan]📝 Input Text[/bold cyan]",
    border_style="cyan",
    padding=(1, 2)
)
console.print(input_panel)
console.print()

result_en = analyzer_en.analyze(text_en, top_k=5)

# Display results in an enhanced table
table = Table(
    title="[bold]🎭 Emotion Analysis Results[/bold]",
    show_header=True,
    header_style="bold magenta",
    border_style="green",
    title_style="bold green"
)
table.add_column("Rank", style="dim", width=6, justify="center")
table.add_column("Emotion", style="cyan bold", width=18)
table.add_column("Score", style="yellow", width=10, justify="right")
table.add_column("Percentage", style="green", width=10, justify="right")
table.add_column("Confidence", style="white", width=35)

for idx, (emotion, score) in enumerate(result_en.get_top_emotions(), 1):
    bar_length = int(score * 30)
    bar = "█" * bar_length + "░" * (30 - bar_length)
    percentage = f"{score * 100:.1f}%"

    # Add medal emoji for top 3
    rank_display = ["🥇", "🥈", "🥉"][idx-1] if idx <= 3 else str(idx)

    table.add_row(rank_display, emotion.title(), f"{score:.4f}", percentage, bar)

console.print(table)
console.print()

# Display sentiment in a styled panel
sentiment = result_en.get_sentiment_group()
sentiment_colors = {
    "positive": "green",
    "negative": "red",
    "ambiguous": "yellow",
    "neutral": "white"
}
color = sentiment_colors.get(sentiment, "white")

sentiment_panel = Panel(
    Text(sentiment.upper(), style=f"bold {color}", justify="center"),
    title="[bold]💭 Overall Sentiment[/bold]",
    border_style=color,
    padding=(0, 2)
)
console.print(sentiment_panel)
console.print()


### Example 2: Dutch - Sentiment Model

Now let's analyze a negative Dutch text. The model correctly detects it as negative!


In [12]:
# Initialize Dutch analyzer
console.print()
console.rule("[bold blue]🇳🇱 Dutch Analysis - Multilingual Sentiment Model[/bold blue]", style="blue")
console.print()

with Progress(
    SpinnerColumn(),
    TextColumn("[progress.description]{task.description}"),
    console=console,
) as progress:
    task = progress.add_task("[cyan]Loading Dutch sentiment model...", total=None)
    analyzer_nl = EmotionAnalyzer(language="dutch", model_key="sentiment")
    analyzer_nl.load_model()
    progress.update(task, completed=True)

console.print("[bold green]✓ Model loaded successfully![/bold green]\n")

# Analyze negative Dutch text
text_nl = "Dit is absoluut verschrikkelijk en teleurstellend."
translation = "This is absolutely terrible and disappointing."

# Display input text with translation
input_text = Text()
input_text.append(text_nl, style="white bold")
input_text.append("\n\n", style="")
input_text.append("🔄 Translation: ", style="dim italic")
input_text.append(translation, style="dim italic")

input_panel = Panel(
    input_text,
    title="[bold cyan]📝 Input Text (Dutch)[/bold cyan]",
    border_style="cyan",
    padding=(1, 2)
)
console.print(input_panel)
console.print()

result_nl = analyzer_nl.analyze(text_nl, top_k=5)

# Display results in an enhanced table
table = Table(
    title="[bold]🎭 Emotion Analysis Results[/bold]",
    show_header=True,
    header_style="bold magenta",
    border_style="blue",
    title_style="bold blue"
)
table.add_column("Rank", style="dim", width=6, justify="center")
table.add_column("Emotion", style="cyan bold", width=18)
table.add_column("Score", style="yellow", width=10, justify="right")
table.add_column("Percentage", style="red", width=10, justify="right")
table.add_column("Confidence", style="white", width=35)

for idx, (emotion, score) in enumerate(result_nl.get_top_emotions(), 1):
    bar_length = int(score * 30)
    bar = "█" * bar_length + "░" * (30 - bar_length)
    percentage = f"{score * 100:.1f}%"
    rank_display = ["🥇", "🥈", "🥉"][idx-1] if idx <= 3 else str(idx)
    table.add_row(rank_display, emotion.title(), f"{score:.4f}", percentage, bar)

console.print(table)
console.print()

# Display sentiment
sentiment = result_nl.get_sentiment_group()
sentiment_colors = {
    "positive": "green",
    "negative": "red",
    "ambiguous": "yellow",
    "neutral": "white"
}
color = sentiment_colors.get(sentiment, "white")

sentiment_panel = Panel(
    Text(sentiment.upper(), style=f"bold {color}", justify="center"),
    title="[bold]💭 Overall Sentiment[/bold]",
    border_style=color,
    padding=(0, 2)
)
console.print(sentiment_panel)
console.print()


### Example 3: Persian - ParsBERT Model

Let's analyze Persian text using the ParsBERT model.


In [13]:
# Initialize Persian analyzer
console.print("\n[bold cyan]Loading Persian ParsBERT model...[/bold cyan]")
analyzer_fa = EmotionAnalyzer(language="persian", model_key="sentiment")
analyzer_fa.load_model()
console.print("[bold green]✓ Model loaded![/bold green]\n")

# Analyze Persian text
text_fa = "این واقعاً افتضاح و ناامیدکننده است."

console.print(f"[bold]Text:[/bold] {text_fa}")
console.print(f"[dim]Translation: This is really terrible and disappointing.[/dim]\n")

result_fa = analyzer_fa.analyze(text_fa, top_k=5)

# Display results
table = Table(title="Persian Emotion Analysis", show_header=True, header_style="bold green")
table.add_column("Emotion", style="cyan", width=20)
table.add_column("Score", style="yellow", width=10)
table.add_column("Confidence Bar", style="white", width=30)

for emotion, score in result_fa.get_top_emotions():
    bar_length = int(score * 30)
    bar = "█" * bar_length + "░" * (30 - bar_length)
    table.add_row(emotion, f"{score:.4f}", bar)

console.print(table)

sentiment = result_fa.get_sentiment_group()
console.print(f"\n[bold]Sentiment:[/bold] [red]{sentiment.upper()}[/red]")


## 🎨 Enhanced Analysis (Emotion + Linguistics)

The Enhanced Analyzer combines emotion detection with linguistic dimension analysis including formality, tone, intensity, and communication style.


In [8]:
# Initialize enhanced analyzer
console.print("\n[bold cyan]Loading Enhanced Analyzer...[/bold cyan]")
enhanced_analyzer = EnhancedAnalyzer(language="english", model_key="goemotions")
enhanced_analyzer.load_model()
console.print("[bold green]✓ Model loaded![/bold green]\n")

# Test different text styles
test_cases = [
    {
        "text": "I am extremely disappointed with this terrible service.",
        "description": "Formal negative feedback"
    },
    {
        "text": "OMG this is sooo amazing!!! I love it!!!",
        "description": "Informal enthusiastic response"
    },
]

for idx, case in enumerate(test_cases, 1):
    console.print(f"[bold cyan]═══ Example {idx}: {case['description']} ═══[/bold cyan]\n")
    console.print(f"[bold]Text:[/bold] {case['text']}\n")

    result = enhanced_analyzer.analyze(case['text'], top_k=3)

    # Emotion Analysis
    emotion_table = Table(title="🎭 Emotion Analysis", show_header=True, header_style="bold green")
    emotion_table.add_column("Emotion", style="cyan", width=20)
    emotion_table.add_column("Score", style="yellow", width=10)
    emotion_table.add_column("Bar", style="white", width=25)

    for emotion, score in result.emotion_result.get_top_emotions():
        bar_length = int(score * 25)
        bar = "█" * bar_length
        emotion_table.add_row(emotion, f"{score:.3f}", bar)

    console.print(emotion_table)

    # Linguistic Features
    ling_table = Table(title="📊 Linguistic Dimensions", show_header=True, header_style="bold magenta")
    ling_table.add_column("Dimension", style="cyan", width=20)
    ling_table.add_column("Value", style="yellow", width=15)
    ling_table.add_column("Confidence", style="white", width=10)

    ling = result.linguistic_features
    ling_table.add_row("Formality", ling.formality, f"{ling.formality_score:.2%}")
    ling_table.add_row("Tone", ling.tone, f"{ling.tone_score:.2%}")
    ling_table.add_row("Intensity", ling.intensity, f"{ling.intensity_score:.2%}")
    ling_table.add_row("Style", ling.communication_style, f"{ling.style_score:.2%}")

    console.print(ling_table)

    # Sentiment
    sentiment = result.emotion_result.get_sentiment_group()
    sentiment_colors = {"positive": "green", "negative": "red", "ambiguous": "yellow", "neutral": "white"}
    color = sentiment_colors.get(sentiment, "white")
    console.print(f"\n[bold]Overall Sentiment:[/bold] [{color}]{sentiment.upper()}[/{color}]\n")
