In [None]:
# =========================================================================================
#
#  Notebook:         Inference, XAI, and Submission
#  Competition:      Fragments of Feeling: A Contextual Emotion Contest
#  Author:           Md. Abdur Rahman
#
# =========================================================================================
#
# ### Notebook Overview
#
# > Welcome to my inference notebook! This is the second and final part of my submission.
# > Here, I'll take the 5 models I trained in the first notebook and use them to predict
# > emotions on the official test set.
# >
# > My strategy is to create an **ensemble** of these 5 models. I will average their
# > predictions to get a final, more robust result. Furthermore, I'll dive into
# > **eXplainable AI (XAI)** using the SHAP library to understand *why* my model makes
# > certain predictions. Finally, I'll generate the `submission.csv` file.
#
# =========================================================================================

# **Only change this portion of the code for the specific test set**

In [None]:

test_path = "/kaggle/input/fragments-of-feeling/test_emotions_no_labels.csv"


# **Dont touch this portion of the code**

In [None]:

# =========================================================================================
# Imports
# =========================================================================================
#
# > As always, I start by importing the necessary libraries. This is similar to my
# > training notebook, but with the addition of the `shap` library, which I'll
# > use for model interpretability.
#
# -----------------------------------------------------------------------------------------
import os
import random
import pandas as pd
import numpy as np
import torch
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments, DataCollatorWithPadding
from sklearn.model_selection import StratifiedKFold
import gc
import warnings
import shap

# I'll suppress warnings for a cleaner output.
warnings.filterwarnings("ignore")

# =========================================================================================
# Configuration
# =========================================================================================
#
# > I'm using my `CFG` class again to keep all settings neat and tidy. The parameters
# > here must be consistent with my training setup.
#
# -----------------------------------------------------------------------------------------
class CFG:
    # --- Reproducibility ---
    # Using the same seed as in training for consistency.
    seed = 42
    
    # --- Paths ---
    # This is the crucial path. It points to the Kaggle Dataset I created from the
    # output of my training notebook. This dataset contains my 5 trained models.
    input_model_dir = "/kaggle/input/model-weights/models" 
    # I also need the original train data (for label mappings and XAI), the test data, and the sample submission file.
    train_path = "/kaggle/input/fragments-of-feeling/train_emotions.csv" 
    sample_submission_path = "/kaggle/input/fragments-of-feeling/sample_submission.csv"
    test_path = test_path
    
    # --- Model & Tokenizer ---
    # These must match the settings used during training.
    model_name = 'FacebookAI/roberta-large' 
    max_len = 256
    
    # --- Inference ---
    # I trained 5 models, so I will be inferring with all 5 of them.
    n_splits = 5 
    eval_batch_size = 16

# =========================================================================================
# Reproducibility
# =========================================================================================
#
# > Just like in my training notebook, I'm setting a global seed to ensure my
# > results (like the SHAP sampling) are reproducible.
#
# -----------------------------------------------------------------------------------------
def seed_everything(seed):
    """
    Sets the seed for reproducibility.
    """
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(CFG.seed)
print(f"Seeds set to {CFG.seed}")


# =========================================================================================
# Load and Preprocess Data
# =========================================================================================
#
# > Here, I'll load the data. I need the test data for predictions. You might wonder why
# > I'm also loading the training data. I need it for two reasons:
# > 1. To reconstruct the `id2label` mapping to convert numeric predictions back to emotion names.
# > 2. To get a few sample texts for my SHAP analysis later on.
#
# -----------------------------------------------------------------------------------------
print("Loading and preprocessing data...")
train_df = pd.read_csv(CFG.train_path) 
test_df = pd.read_csv(CFG.test_path)

# --- Preprocess Train Data (for label mapping and XAI) ---
# I only need to do enough preprocessing on the training data to get what I need.
train_df['text'] = train_df['title'] + ' [SEP] ' + train_df['sentence']
train_df['text'] = train_df['text'].astype(str)
train_df = train_df.rename(columns={'emotion_int': 'label'})
id2label = dict(enumerate(train_df['emotion_final'].astype('category').cat.categories))
label2id = {v: k for k, v in id2label.items()}
num_labels = len(id2label)

# --- Preprocess Test Data ---
# It's absolutely critical that I preprocess the test data in the *exact* same way
# as I did the training data. This means combining 'title' and 'sentence' with '[SEP]'.
test_df['text'] = test_df['title'] + ' [SEP] ' + test_df['sentence']
test_df['text'] = test_df['text'].astype(str)

print(f"Test data loaded. Number of samples: {len(test_df)}")
print("Test data columns:", test_df.columns.tolist())


# =========================================================================================
# Tokenizer and Helper Functions
# =========================================================================================
#
# > Instead of loading the tokenizer from the Hugging Face Hub, I'm loading it from
# > one of my saved model directories (fold_0). This ensures I'm using the exact same
# > tokenizer configuration (including any special tokens) that the model was trained with.
#
# -----------------------------------------------------------------------------------------
print(f"Initializing tokenizer from a saved fold model: {CFG.input_model_dir}fold_0")
tokenizer = AutoTokenizer.from_pretrained(os.path.join(CFG.input_model_dir, "fold_0"))

def tokenize_function(examples):
    # This is the same tokenization function from my training notebook.
    return tokenizer(examples['text'], truncation=True, max_length=CFG.max_len)

# =========================================================================================
# Inference Loop (Ensemble Predictions)
# =========================================================================================
#
# > This is the core of my inference strategy. I will loop through each of my 5 saved
# > models. For each model, I'll predict on the entire test set. I'll then average the
# > raw output logits from all models. This ensembling technique often leads to more
# > stable and accurate predictions than relying on a single model.
#
# -----------------------------------------------------------------------------------------
test_preds = np.zeros((len(test_df), num_labels))

# To be efficient, I'll tokenize the entire test dataset just once before the loop.
test_ds = Dataset.from_pandas(test_df)
tokenized_test_ds = test_ds.map(tokenize_function, batched=True, remove_columns=test_df.columns.tolist())

for fold in range(CFG.n_splits):
    print("\n" + "="*50)
    print(f"==========    Inferring with Fold: {fold}    ===========")
    print("="*50)

    # Construct the path to the model for the current fold.
    fold_model_path = os.path.join(CFG.input_model_dir, f"fold_{fold}")
    print(f"Loading model from: {fold_model_path}")
    model = AutoModelForSequenceClassification.from_pretrained(fold_model_path)

    # I'll use the Hugging Face `Trainer` as a convenient wrapper to handle the prediction loop.
    inference_args = TrainingArguments(
        output_dir=f'./temp_inference_fold_{fold}',
        per_device_eval_batch_size=CFG.eval_batch_size,
        report_to="none",
        fp16=True,
    )
    
    trainer = Trainer(
        model=model,
        args=inference_args,
        data_collator=DataCollatorWithPadding(tokenizer=tokenizer)
    )

    # Get the predictions (logits) for the test set from this fold's model.
    test_fold_preds = trainer.predict(tokenized_test_ds).predictions
    # I'll add this fold's predictions to my master array, averaging as I go.
    test_preds += test_fold_preds / CFG.n_splits

    del model, trainer
    gc.collect()
    torch.cuda.empty_cache()

print("\nEnsemble predictions generated successfully.")

# =========================================================================================
# --- XAI - Model Interpretability with SHAP ---
# =========================================================================================
#
# > Now for a really interesting part! I don't just want a prediction; I want to
# > understand *why* my model is making its decisions. This is where eXplainable AI (XAI)
# > comes in. I'll use the SHAP (SHapley Additive exPlanations) library to visualize which
# > words in a sentence contributed most to the model's final emotion classification.
# > This builds trust in the model and can reveal fascinating insights.
#
# -----------------------------------------------------------------------------------------
print("\n" + "="*50)
print("  Running SHAP Analysis for Model Interpretability")
print("="*50 + "\n")

# For this analysis, I'll just use my Fold 0 model as a representative example.
xai_model_path = os.path.join(CFG.input_model_dir, "fold_0")
print(f"Loading Fold 0 model for SHAP from: {xai_model_path}")
xai_model = AutoModelForSequenceClassification.from_pretrained(xai_model_path)

# Move the model to the GPU if available.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
xai_model.to(device)

# To get some interesting samples to explain, I'll pull from the validation set of Fold 0.
# The model has not been trained on these, so it's a fair test.
skf = StratifiedKFold(n_splits=CFG.n_splits, shuffle=True, random_state=CFG.seed)
train_df['fold'] = -1
for f, (train_idx, val_idx) in enumerate(skf.split(train_df, train_df['label'])):
    train_df.loc[val_idx, 'fold'] = f
xai_val_df = train_df[train_df['fold'] == 0].reset_index(drop=True)

# I'll randomly select 5 samples to explain.
samples_to_explain = xai_val_df.sample(5, random_state=CFG.seed)
sample_texts = samples_to_explain['text'].tolist()

print("Getting model predictions for the sample texts...")
inputs = tokenizer(sample_texts, padding=True, truncation=True, max_length=CFG.max_len, return_tensors="pt").to(device)
with torch.no_grad():
    logits = xai_model(**inputs).logits
sample_predictions = torch.argmax(logits, dim=1).cpu().numpy()

# SHAP's `Explainer` needs a predictor function that takes text and returns model logits.
# I'm defining that wrapper function here.
def shap_predictor(texts):
    # The input can be a numpy array, so I make sure it's a list.
    text_list = texts.tolist() if isinstance(texts, np.ndarray) else texts
    inputs = tokenizer(text_list, padding=True, truncation=True, max_length=CFG.max_len, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = xai_model(**inputs)
    return outputs.logits.cpu().numpy()

# Now I can create the explainer object. I pass it my predictor and the tokenizer.
explainer = shap.Explainer(shap_predictor, tokenizer, output_names=list(id2label.values()))

print(f"Generating SHAP explanations for {len(sample_texts)} sample(s)...")
# This is where the magic happens. SHAP runs the model on variations of the input to calculate feature importances.
shap_values = explainer(sample_texts)

# I'll now loop through each explained sample and visualize the results.
for i, text in enumerate(sample_texts):
    true_label_int = samples_to_explain.iloc[i]['label']
    true_label_name = id2label[true_label_int]
    predicted_label_int = sample_predictions[i]
    predicted_label_name = id2label[predicted_label_int]

    print(f"\n--- Explaining Sample {i+1} ---")
    print(f"Text: '{text}'")
    print(f"True Emotion: '{true_label_name}'")
    print(f"Predicted Emotion: '{predicted_label_name}'")
    print(f"SHAP Plot (explaining the prediction for class '{predicted_label_name}'):")
    
    # The plot will highlight words in red that pushed the prediction towards the chosen class,
    # and words in blue that pushed it away. This gives me incredible insight into my model's reasoning.
    shap.plots.text(shap_values[i, :, predicted_label_int])


# =========================================================================================
# Finalization and Submission
# =========================================================================================
#
# > The final step is to take my averaged predictions, determine the most likely class
# > for each test sample, and format it into the `submission.csv` file as required
# > by the competition.
#
# -----------------------------------------------------------------------------------------
print("\nFinalizing submission...")
# I take the argmax of my averaged logits to get the final predicted class index.
final_predictions = np.argmax(test_preds, axis=1)

print(f"Length of test_df: {len(test_df)}")
print(f"Length of predictions: {len(final_predictions)}")

# I'll create the submission DataFrame with the required columns.
submission_df = pd.DataFrame({
    'sentence_id': test_df['sentence_id'],  
    'predicted_emotion_int': final_predictions
})

# And save it to a file.
submission_df.to_csv('submission.csv', index=False)

print("\nSubmission file created successfully!")
print("Submission file format check:")
print(submission_df.head())

2025-08-11 12:50:20.917263: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1754916621.125305      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1754916621.190521      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Seeds set to 42
Loading and preprocessing data...
Test data loaded. Number of samples: 4611
Test data columns: ['sentence_id', 'item_id', 'title', 'sentence', 'upvotes', 'date', 'text']
Initializing tokenizer from a saved fold model: /kaggle/input/model-weights/modelsfold_0


Map:   0%|          | 0/4611 [00:00<?, ? examples/s]


Loading model from: /kaggle/input/model-weights/models/fold_0



Loading model from: /kaggle/input/model-weights/models/fold_1



Loading model from: /kaggle/input/model-weights/models/fold_2



Loading model from: /kaggle/input/model-weights/models/fold_3



Loading model from: /kaggle/input/model-weights/models/fold_4



Ensemble predictions generated successfully.

  Running SHAP Analysis for Model Interpretability

Loading Fold 0 model for SHAP from: /kaggle/input/model-weights/models/fold_0
Getting model predictions for sample texts...
Generating SHAP explanations for 5 sample(s)...


PartitionExplainer explainer: 6it [00:17,  5.72s/it]


--- Explaining Sample 1 ---
Text: 'Being around people is torture. [SEP] No family, no friends, no career, health issues.'
True Emotion: 'emptiness'
Predicted Emotion: 'emptiness'
SHAP Plot (explaining the prediction for class 'emptiness'):






--- Explaining Sample 2 ---
Text: 'If anyone is available to talk I’d appreciate it. [SEP] If interested message me .'
True Emotion: 'emptiness'
Predicted Emotion: 'emptiness'
SHAP Plot (explaining the prediction for class 'emptiness'):



--- Explaining Sample 3 ---
Text: 'How do you deal with people telling you to give up and talking bad about your partner’s behavior as if they were being depressed on purpose ? [SEP] She called him names in the past …'
True Emotion: 'hopelessness'
Predicted Emotion: 'hopelessness'
SHAP Plot (explaining the prediction for class 'hopelessness'):



--- Explaining Sample 4 ---
Text: 'No food until the kitchen is spotless [SEP] So today she went for a 20 minute walk and expected me and my 6 year old sister to clean the whole kitchen by the time she returned.'
True Emotion: 'hopelessness'
Predicted Emotion: 'hopelessness'
SHAP Plot (explaining the prediction for class 'hopelessness'):



--- Explaining Sample 5 ---
Text: 'Why is it always me? [SEP] Everytime we argue, it’s my fault.'
True Emotion: 'hopelessness'
Predicted Emotion: 'loneliness'
SHAP Plot (explaining the prediction for class 'loneliness'):



Finalizing submission...
Length of test_df: 4611
Length of predictions: 4611

Submission file created successfully!
Submission file format check:
  sentence_id  predicted_emotion_int
0   102a69k_1                      0
1   102a69k_2                      0
2   102a69k_3                      0
3   102a69k_4                      2
4   102a69k_5                      2
