In [1]:
################################################################################
# REPRODUCIBILITY SETUP - Set Seeds for Consistent Results
################################################################################
import random
import numpy as np
import torch
import os

def set_seed(seed=42):
    """Set seeds for reproducibility across all libraries"""
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # For transformers models
    os.environ['PYTHONHASHSEED'] = str(seed)
    
    

# Set the global seed
set_seed(42)



In [2]:
################################################################################
# IMPORT REQUIRED LIBRARIES
################################################################################
import os
import json
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torch.nn.functional import softmax
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from transformers import (
    AutoTokenizer,
    BertPreTrainedModel,
    BertModel
)



2025-07-19 18:46:15.220149: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1752950775.450419      13 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1752950775.515159      13 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [3]:
################################################################################
# CUSTOM MODEL DEFINITION (Same as Training)
################################################################################
class BertForMultiTaskClassification(BertPreTrainedModel):
    def __init__(self, config, num_emotions=7, num_intensities=3):
        super().__init__(config)
        self.bert = BertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier_emotion = nn.Linear(config.hidden_size, num_emotions)
        self.classifier_intensity = nn.Linear(config.hidden_size, num_intensities)

        self.init_weights()

    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        token_type_ids=None,
        position_ids=None,
        head_mask=None,
        inputs_embeds=None,
        labels=None,
        output_attentions=None,
        output_hidden_states=None,
        return_dict=None,
    ):
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        outputs = self.bert(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )

        pooled_output = outputs[1]
        pooled_output = self.dropout(pooled_output)

        logits_emotion = self.classifier_emotion(pooled_output)
        logits_intensity = self.classifier_intensity(pooled_output)

        loss = None
        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            emotion_labels = labels[:, 0]
            intensity_labels = labels[:, 1]
            loss_emotion = loss_fct(logits_emotion, emotion_labels)
            loss_intensity = loss_fct(logits_intensity, intensity_labels)
            loss = loss_emotion + loss_intensity

        output = (logits_emotion, logits_intensity) + outputs[2:]
        return ((loss,) + output) if loss is not None else output



In [4]:
################################################################################
# DATASET CLASS FOR INFERENCE (Same as Training)
################################################################################
class EmotionsDataset(Dataset):
    def __init__(self, texts, emotion_labels=None, intensity_labels=None, tokenizer=None, max_length=128):
        self.texts = texts
        self.emotion_labels = emotion_labels
        self.intensity_labels = intensity_labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        encoding = self.tokenizer(
            text,
            padding='max_length',
            truncation=True,
            max_length=self.max_length,
            return_tensors="pt"
        )
        item = {k: v.squeeze() for k, v in encoding.items()}
        
        # For inference, we might not have labels
        if self.emotion_labels is not None and self.intensity_labels is not None:
            emotion_label = self.emotion_labels[idx]
            intensity_label = self.intensity_labels[idx]
            item["labels"] = torch.tensor([emotion_label, intensity_label], dtype=torch.long)
        
        return item



In [5]:
################################################################################
# LOAD TRAINED MODEL AND TOKENIZER
################################################################################

# Model paths
MODEL_DIR = "/kaggle/input/emotion-model/saved_bangla_emotion_model"  # Adjust path if needed
LABELS_FILE = os.path.join(MODEL_DIR, "labels_mapping.json")

# Check if model directory exists
if not os.path.exists(MODEL_DIR):
    print(f"❌ Model directory not found: {MODEL_DIR}")
    print("Please ensure the model was saved during training or adjust the path.")
else:
    print(f"✅ Found model directory: {MODEL_DIR}")
    print(f"Contents: {os.listdir(MODEL_DIR)}")

# Load labels mapping
try:
    with open(LABELS_FILE, 'r') as f:
        labels_mapping = json.load(f)
    
    EMOTIONS = labels_mapping["emotions"]
    INTENSITIES = labels_mapping["intensities"]
    emotion_to_id = labels_mapping["emotion_to_id"]
    id_to_emotion = labels_mapping["id_to_emotion"]
    intensity_to_id = labels_mapping["intensity_to_id"]
    id_to_intensity = labels_mapping["id_to_intensity"]
    
    print(f"📋 Loaded emotion categories: {EMOTIONS}")
    print(f"📋 Loaded intensity categories: {INTENSITIES}")
    
except FileNotFoundError:
    print(f"❌ Labels mapping file not found: {LABELS_FILE}")
    print("Creating default mappings...")
    # Default mappings (adjust based on your actual data)
    EMOTIONS = ['joy', 'fear', 'anger', 'sadness', 'disgust', 'surprise', 'love']
    INTENSITIES = ['low', 'medium', 'high']
    emotion_to_id = {emotion: idx for idx, emotion in enumerate(EMOTIONS)}
    id_to_emotion = {idx: emotion for idx, emotion in enumerate(EMOTIONS)}
    intensity_to_id = {intensity: idx for idx, intensity in enumerate(INTENSITIES)}
    id_to_intensity = {idx: intensity for idx, intensity in enumerate(INTENSITIES)}

# Load tokenizer
try:
    tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
    print("✅ Tokenizer loaded successfully")
except:
    print("❌ Failed to load tokenizer from model directory")
    print("Loading tokenizer from Hugging Face...")
    tokenizer = AutoTokenizer.from_pretrained("sagorsarker/bangla-bert-base")

# Load model
try:
    model = BertForMultiTaskClassification.from_pretrained(
        MODEL_DIR, 
        num_emotions=len(EMOTIONS),
        num_intensities=len(INTENSITIES)
    )
    model.eval()  # Set to evaluation mode
    print("✅ Model loaded successfully and set to evaluation mode")
except Exception as e:
    print(f"❌ Failed to load model: {e}")
    print("You may need to train the model first or check the model path.")

✅ Found model directory: /kaggle/input/emotion-model/saved_bangla_emotion_model
Contents: ['labels_mapping.json', 'config.json', 'tokenizer.json', 'tokenizer_config.json', 'model.safetensors', 'special_tokens_map.json', 'vocab.txt']
📋 Loaded emotion categories: ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise']
📋 Loaded intensity categories: ['0.0', '1.0', '2.0']
✅ Tokenizer loaded successfully
✅ Model loaded successfully and set to evaluation mode


In [6]:
################################################################################
# INFERENCE FUNCTION (Same Logic as Training Test Set Evaluation)
################################################################################

def predict_emotions_and_intensities(texts, model, tokenizer, batch_size=32):
    """
    Perform inference on a list of texts using the trained model.
    Returns predictions in the same format as training evaluation.
    """
    # Create dataset for inference
    dataset = EmotionsDataset(texts, tokenizer=tokenizer)
    
    # Create data loader
    from torch.utils.data import DataLoader
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
    
    # Store predictions
    all_emotion_logits = []
    all_intensity_logits = []
    
    print(f"🔮 Processing {len(texts)} texts for inference...")
    
    with torch.no_grad():
        for batch in dataloader:
            # Move batch to device (CPU/GPU)
            device = next(model.parameters()).device
            batch = {k: v.to(device) for k, v in batch.items() if k != "labels"}
            
            # Get model predictions
            outputs = model(**batch)
            logits_emotion, logits_intensity = outputs[0], outputs[1]
            
            # Store logits
            all_emotion_logits.append(logits_emotion.cpu().numpy())
            all_intensity_logits.append(logits_intensity.cpu().numpy())
    
    # Concatenate all predictions
    emotion_logits = np.concatenate(all_emotion_logits, axis=0)
    intensity_logits = np.concatenate(all_intensity_logits, axis=0)
    
    # Convert logits to predictions (same as training)
    emotion_predictions = np.argmax(emotion_logits, axis=1)
    intensity_predictions = np.argmax(intensity_logits, axis=1)
    
    # Convert predictions to labels
    predicted_emotions = [id_to_emotion[str(pred)] for pred in emotion_predictions]
    predicted_intensities = [id_to_intensity[str(pred)] for pred in intensity_predictions]
    
    # Create results dataframe (same format as training)
    results_df = pd.DataFrame({
        'text': texts,
        'predicted_emotion_id': emotion_predictions,
        'predicted_intensity_id': intensity_predictions,
        'predicted_emotion': predicted_emotions,
        'predicted_intensity': predicted_intensities
    })
    
    print("✅ Inference completed successfully")
    return results_df, emotion_logits, intensity_logits

print("🔧 Inference function defined")

🔧 Inference function defined


In [7]:
################################################################################
# SAMPLE TEXT DATA FOR TESTING (Bangla Text Examples)
################################################################################

# Sample Bangla texts for testing (you can replace with your own texts)
sample_texts = [
    "আমি খুব খুশি আজকে।",  # I am very happy today
    "এটা খুব দুঃখজনক খবর।",  # This is very sad news
    "আল্লাহ তাদের সাহায্য করুন, আর জালিমদের ধ্বংস করুন, আমিন",  # This matter has made me angry
    "আবার হরতাল করে মানুষের মধ্যে আতংক তৈরি করার পাঁয়তারা।",  # You have done amazing work
    "এই দৃশ্যটা দেখে আমি অবাক হয়ে গেছি।",  # I was surprised to see this scene
    "ভগ্যিস ও মানুষ হয়ে জন্মায়নি.",  # I love you
    "ভোটের হার কম হলে দোষ, বেশি হলে দোষ, ভোটের সময় মারামারি না হওয়াটাও দোষের, আসলে সমালোচকরা কী চায় ??", 
]

print(f"📝 Prepared {len(sample_texts)} sample texts for testing")
for i, text in enumerate(sample_texts, 1):
    print(f"{i}. {text}")

📝 Prepared 7 sample texts for testing
1. আমি খুব খুশি আজকে।
2. এটা খুব দুঃখজনক খবর।
3. আল্লাহ তাদের সাহায্য করুন, আর জালিমদের ধ্বংস করুন, আমিন
4. আবার হরতাল করে মানুষের মধ্যে আতংক তৈরি করার পাঁয়তারা।
5. এই দৃশ্যটা দেখে আমি অবাক হয়ে গেছি।
6. ভগ্যিস ও মানুষ হয়ে জন্মায়নি.
7. ভোটের হার কম হলে দোষ, বেশি হলে দোষ, ভোটের সময় মারামারি না হওয়াটাও দোষের, আসলে সমালোচকরা কী চায় ??


In [8]:
################################################################################
# PERFORM INFERENCE ON SAMPLE TEXTS
################################################################################

# Run inference
print("\n" + "="*60)
print("🚀 PERFORMING INFERENCE ON SAMPLE TEXTS")
print("="*60)

results_df, emotion_logits, intensity_logits = predict_emotions_and_intensities(
    sample_texts, model, tokenizer, batch_size=8
)

# Display results in the same format as training
print("\n--- INFERENCE RESULTS ---")
print("Format: Text | Predicted Emotion | Predicted Intensity")
print("-" * 80)

for idx, row in results_df.iterrows():
    print(f"{idx+1}. {row['text'][:50]}...")
    print(f"   → Emotion: {row['predicted_emotion']} (ID: {row['predicted_emotion_id']})")
    print(f"   → Intensity: {row['predicted_intensity']} (ID: {row['predicted_intensity_id']})")
    print()

# Save results to CSV (same as training)
output_file = "inference_predictions.csv"
results_df.to_csv(output_file, index=False)
print(f"💾 Results saved to: {output_file}")


🚀 PERFORMING INFERENCE ON SAMPLE TEXTS
🔮 Processing 7 texts for inference...
✅ Inference completed successfully

--- INFERENCE RESULTS ---
Format: Text | Predicted Emotion | Predicted Intensity
--------------------------------------------------------------------------------
1. আমি খুব খুশি আজকে।...
   → Emotion: happy (ID: 3)
   → Intensity: 2.0 (ID: 2)

2. এটা খুব দুঃখজনক খবর।...
   → Emotion: sad (ID: 4)
   → Intensity: 2.0 (ID: 2)

3. আল্লাহ তাদের সাহায্য করুন, আর জালিমদের ধ্বংস করুন,...
   → Emotion: fear (ID: 2)
   → Intensity: 2.0 (ID: 2)

4. আবার হরতাল করে মানুষের মধ্যে আতংক তৈরি করার পাঁয়তা...
   → Emotion: angry (ID: 0)
   → Intensity: 1.0 (ID: 1)

5. এই দৃশ্যটা দেখে আমি অবাক হয়ে গেছি।...
   → Emotion: surprise (ID: 5)
   → Intensity: 2.0 (ID: 2)

6. ভগ্যিস ও মানুষ হয়ে জন্মায়নি....
   → Emotion: fear (ID: 2)
   → Intensity: 0.0 (ID: 0)

7. ভোটের হার কম হলে দোষ, বেশি হলে দোষ, ভোটের সময় মারা...
   → Emotion: angry (ID: 0)
   → Intensity: 1.0 (ID: 1)

💾 Results saved to: infere