In [1]:
# ================================================
# GOOGLE COLAB SETUP - Mount Drive & Extract Data
# ================================================

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')
print("DONE")

Mounted at /content/drive
DONE


In [7]:
# Clone the repository
!git clone https://github.com/UmerFruit/Hate_Explain.git
%cd Hate_Explain
print("DONE")

Cloning into 'Hate_Explain'...
remote: Enumerating objects: 103, done.[K
remote: Counting objects: 100% (103/103), done.[K
remote: Compressing objects: 100% (74/74), done.[K
remote: Total 103 (delta 32), reused 99 (delta 28), pack-reused 0 (from 0)[K
Receiving objects: 100% (103/103), 2.35 MiB | 4.79 MiB/s, done.
Resolving deltas: 100% (32/32), done.
/content/Hate_Explain
DONE


In [8]:
# LOAD THE SMALL DATASET (GIVES KEY ERRORS SINCE IT DOESNT HAVE THE MOST OF THE WORDS)
# !cp /content/drive/MyDrive/glove.42B.300d.small.zip ./Data/
# Extract the zip file
# !unzip -q ./Data/glove.42B.300d.small.zip -d ./Data/
# !mv ./Data/glove.42B.300d.small.txt  ./Data/glove.42B.300d.txt

# LOAD THE FULL DATASET
print("Starting copy")
!cp /content/drive/MyDrive/glove.42B.300d.zip ./Data/
# Extract the zip file
print("Starting extraction")
!unzip -q ./Data/glove.42B.300d.zip -d ./Data/
print("Done extraction")

# Clean up zip file (if needed)
# !rm ./Data/glove.42B.300d.small.zip
# !rm ./Data/glove.42B.300d.zip

# Copy the model files
!cp /content/drive/MyDrive/word2vec.model /content/Hate_Explain/Data
!cp /content/drive/MyDrive/word2vec.model.vectors.npy /content/Hate_Explain/Data

# Check GPU availability
import torch
print(f"\nGPU Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU Name: {torch.cuda.get_device_name(0)}")

Starting copy
Starting extraction
Done extraction

GPU Available: True
GPU Name: Tesla T4


## 1. Setup and Installation

In [4]:
# Suppress warnings for cleaner output
import warnings
warnings.filterwarnings('ignore')

In [11]:
# Create necessary directories
import os
os.makedirs('Saved', exist_ok=True)
os.makedirs('explanations_dicts', exist_ok=True)
print("Directories created successfully!")

Directories created successfully!


In [5]:
# @title
# Install required packages (run this if not already installed)
!pip install -r requirements.txt
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m12.8/12.8 MB[0m [31m108.1 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m‚úî Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m‚ö† Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


## 2. Download and Prepare GloVe Embeddings

**Note:** This step is only required once. Skip if you already have the file. Like i did with in the google drive mounted. If not it downloads it right here.

In [None]:
# Download GloVe embeddings (only run if needed)
# !wget http://nlp.stanford.edu/data/glove.42B.300d.zip -P Data/
# !unzip Data/glove.42B.300d.zip -d Data/
# !rm Data/glove.42B.300d.zip
# print("GloVe embeddings downloaded!")

In [21]:
# Convert GloVe to Word2Vec format (Run if model values are not already saved)
# from gensim.models import KeyedVectors
# from gensim.scripts.glove2word2vec import glove2word2vec

# # Convert GloVe format to Word2Vec format
# print("Converting GloVe to Word2Vec format...")
# glove2word2vec('Data/glove.42B.300d.txt', 'Data/glove.42B.300d_w2v.txt')

# # Load and save in gensim format
# print("Loading and saving model (this may take a few minutes)...")
# word2vecmodel1 = KeyedVectors.load_word2vec_format('Data/glove.42B.300d_w2v.txt', binary=False)
# word2vecmodel1.save("Data/word2vec.model")

# # Clean up intermediate files
# import gc
# del word2vecmodel1
# gc.collect()

# # Remove large text files to save space
# import os
# os.remove('Data/glove.42B.300d.txt')
# os.remove('Data/glove.42B.300d_w2v.txt')
# print("Done! word2vec.model saved.")

Converting GloVe to Word2Vec format...
Loading and saving model (this may take a few minutes)...
Done! word2vec.model saved.


In [None]:
%cd /content/Hate_Explain

In [6]:
# Generate vocabulary embeddings
import sys
import os
sys.path.append('/content/Hate_Explain')

from TensorDataset.datsetSplitter import createDatasetSplit

# IMPORTANT: These parameters must match training configuration!
# Especially: variance, type_attention, max_length, num_classes
params = {
    # Data parameters
    'num_classes': 3,
    'data_file': 'Data/dataset.json',
    'class_names': 'Data/classes.npy',

    # Tokenization parameters
    'bert_tokens': False,
    'max_length': 128,
    'include_special': False,

    # Attention parameters - MUST MATCH TRAINING
    'type_attention': 'softmax',
    'variance': 1,  # ‚Üê Changed to match training (was 5)
    'decay': False,
    'method': 'additive',
    'window': 4,
    'alpha': 0.5,
    'p_value': 0.8,

    # Label parameters
    'majority': 2,

    # Preprocessing control
    'not_recollect': True,  # Not used by createDatasetSplit

    # Other required parameters
    'random_seed': 42,
    'normalized': False
}

# FORCE REGENERATION: Delete existing directory if it exists
from Preprocess.dataCollect import set_name
filename = set_name(params)
vocab_dir = filename[:-7]  # Remove '.pickle' extension to get directory name

if os.path.exists(vocab_dir):
    import shutil
    print(f"üóëÔ∏è  Removing existing directory: {vocab_dir}")
    shutil.rmtree(vocab_dir)
    print("‚úì Directory removed, will regenerate fresh files...")

# Also remove the pickle file if it exists
if os.path.exists(filename):
    print(f"üóëÔ∏è  Removing existing pickle file: {filename}")
    os.remove(filename)
    print("‚úì Pickle file removed")

print("\nüìä Generating vocabulary and embeddings...")
print(f"   Expected directory: {vocab_dir}")
print(f"   Expected pickle: {filename}")

# This function will:
# 1. Call collect_data() to create the main pickle file
# 2. Create train/val/test splits
# 3. Build vocabulary from word2vec embeddings
# 4. Create directory with vocab_own.pickle and split files
train, val, test, vocab_own = createDatasetSplit(params)

print(f"\n‚úì Vocabulary files generated successfully!")
print(f"   Training samples: {len(train)}")
print(f"   Validation samples: {len(val)}")
print(f"   Test samples: {len(test)}")

# Verify the pickle file was created
vocab_pickle_path = f'{vocab_dir}/vocab_own.pickle'

if os.path.exists(vocab_pickle_path):
    file_size = os.path.getsize(vocab_pickle_path)
    print(f"\n‚úì Vocab pickle file created: {file_size:,} bytes ({file_size/1024/1024:.1f} MB)")
    print(f"  Location: {vocab_pickle_path}")

    # Verify it's valid
    try:
        import pickle
        with open(vocab_pickle_path, 'rb') as f:
            data = pickle.load(f)
        print(f"‚úì Pickle file is valid and loadable!")
        print(f"  Vocabulary size: {len(data.vocab)}")
        print(f"  Embedding shape: {data.embeddings.shape}")
    except Exception as e:
        print(f"‚úó ERROR: Pickle file is corrupted: {str(e)}")
else:
    print(f"\n‚úó ERROR: Vocab pickle file not found at {vocab_pickle_path}")
    if os.path.exists(vocab_dir):
        print(f"  Directory exists, checking contents:")
        for f in os.listdir(vocab_dir):
            filepath = os.path.join(vocab_dir, f)
            size = os.path.getsize(filepath)
            print(f"    - {f} ({size:,} bytes)")
    else:
        print(f"  Directory {vocab_dir} not found!")

üóëÔ∏è  Removing existing directory: Data/Total_data_normal_softmax_1_128_3
‚úì Directory removed, will regenerate fresh files...
üóëÔ∏è  Removing existing pickle file: Data/Total_data_normal_softmax_1_128_3.pickle
‚úì Pickle file removed

üìä Generating vocabulary and embeddings...
   Expected directory: Data/Total_data_normal_softmax_1_128_3
   Expected pickle: Data/Total_data_normal_softmax_1_128_3.pickle
total_data 20148


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20148/20148 [00:57<00:00, 352.17it/s]


attention_error: 0
no_majority: 919


 17%|‚ñà‚ñã        | 2597/15383 [00:00<00:00, 13095.73it/s]

unk


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 15383/15383 [00:01<00:00, 13574.29it/s]


(22236, 300)


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 15383/15383 [00:00<00:00, 22106.83it/s]
100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1922/1922 [00:00<00:00, 21508.62it/s]
100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1924/1924 [00:00<00:00, 21665.92it/s]


total dataset size: 19229

‚úì Vocabulary files generated successfully!
   Training samples: 15383
   Validation samples: 1922
   Test samples: 1924

‚úì Vocab pickle file created: 2,417,514,432 bytes (2305.5 MB)
  Location: Data/Total_data_normal_softmax_1_128_3/vocab_own.pickle
‚úì Pickle file is valid and loadable!
  Vocabulary size: 22236
  Embedding shape: (22236, 300)


## 3. Import Dependencies and Train Model

In [6]:
# Import the training module
from manual_training_inference import *

Reading twitter - 1grams ...
Reading twitter - 2grams ...
Reading english - 1grams ...


In [8]:
# Load model parameters from JSON configuration
import json
import ast
import torch

path_file = 'best_model_json/bestModel_birnnscrat.json'
with open(path_file, mode='r') as f:
    params = json.load(f)

# Convert string values to appropriate types
for key in params:
    if params[key] == 'True':
        params[key] = True
    elif params[key] == 'False':
        params[key] = False
    if key in ['batch_size', 'num_classes', 'hidden_size', 'supervised_layer_pos',
               'num_supervised_heads', 'random_seed', 'max_length']:
        if params[key] != 'N/A':
            params[key] = int(params[key])
    if (key == 'weights') and (params['auto_weights'] == False):
        params[key] = ast.literal_eval(params[key])

# Configure for Colab execution
params['logging'] = 'local'
params['device'] = 'cuda'  # Use GPU in Colab
params['best_params'] = False

# Setup device
if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f'Using GPU: {torch.cuda.get_device_name(0)}')
else:
    print('WARNING: GPU not available. Using CPU (training will be slow).')
    print('Go to Runtime ‚Üí Change runtime type ‚Üí GPU')
    device = torch.device("cpu")

Using GPU: Tesla T4


In [9]:
# Data folder configuration
dict_data_folder = {
    '2': {'data_file': 'Data/dataset.json', 'class_label': 'Data/classes_two.npy'},
    '3': {'data_file': 'Data/dataset.json', 'class_label': 'Data/classes.npy'}
}

# Configure training parameters
params['variance'] = 1
params['epochs'] = 5  # Reduce for faster testing
params['to_save'] = True

# Ensure critical parameters are set (in case they're missing)
if 'auto_weights' not in params:
    params['auto_weights'] = True
if 'att_lambda' not in params:
    params['att_lambda'] = 1.0
if 'train_att' not in params:
    params['train_att'] = False

In [10]:
# Train with 2 classes (toxic vs non-toxic)
params['num_classes'] = 2
params['data_file'] = dict_data_folder[str(params['num_classes'])]['data_file']
params['class_names'] = dict_data_folder[str(params['num_classes'])]['class_label']

if params['num_classes'] == 2 and params['auto_weights'] == False:
    params['weights'] = [1.0, 1.0]

print(f"Training {params['num_classes']}-class model...")
train_model(params, device)

Training 2-class model...
[1.2301791 0.8423818]

Training...


481it [00:19, 24.72it/s]


avg_train_loss 295.31358239060876
model previously passed
Running eval on  train ...


481it [00:00, 516.10it/s]


 Accuracy: 0.64
 Fscore: 0.63
 Precision: 0.73
 Recall: 0.69
 Roc Auc: 0.00
 Test took: 0:00:01
model previously passed
Running eval on  val ...


61it [00:00, 458.59it/s]


 Accuracy: 0.62
 Fscore: 0.61
 Precision: 0.72
 Recall: 0.67
 Roc Auc: 0.00
 Test took: 0:00:00
model previously passed
Running eval on  test ...


61it [00:00, 568.78it/s]


 Accuracy: 0.62
 Fscore: 0.61
 Precision: 0.71
 Recall: 0.67
 Roc Auc: 0.00
 Test took: 0:00:00
  Test - fscore: 0.6108, accuracy: 0.6190
  Val  - fscore: 0.6090, accuracy: 0.6186
  Train- fscore: 0.6281, accuracy: 0.6355
0.6090377004692404 0
Saving model
Saved/birnnscrat_lstm_64_2_100.pth

Training...


481it [00:11, 41.47it/s]


avg_train_loss 295.190023949637
model previously passed
Running eval on  train ...


481it [00:00, 554.16it/s]


 Accuracy: 0.71
 Fscore: 0.71
 Precision: 0.76
 Recall: 0.75
 Roc Auc: 0.00
 Test took: 0:00:01
model previously passed
Running eval on  val ...


61it [00:00, 492.21it/s]


 Accuracy: 0.66
 Fscore: 0.66
 Precision: 0.73
 Recall: 0.70
 Roc Auc: 0.00
 Test took: 0:00:00
model previously passed
Running eval on  test ...


61it [00:00, 567.93it/s]


 Accuracy: 0.67
 Fscore: 0.67
 Precision: 0.73
 Recall: 0.71
 Roc Auc: 0.00
 Test took: 0:00:00
  Test - fscore: 0.6684, accuracy: 0.6705
  Val  - fscore: 0.6606, accuracy: 0.6634
  Train- fscore: 0.7125, accuracy: 0.7133
0.6606218896307607 0.6090377004692404
Saving model
Saved/birnnscrat_lstm_64_2_100.pth

Training...


481it [00:11, 41.76it/s]


avg_train_loss 295.1041867579343
model previously passed
Running eval on  train ...


481it [00:00, 549.13it/s]


 Accuracy: 0.81
 Fscore: 0.81
 Precision: 0.81
 Recall: 0.82
 Roc Auc: 0.00
 Test took: 0:00:01
model previously passed
Running eval on  val ...


61it [00:00, 580.21it/s]


 Accuracy: 0.71
 Fscore: 0.71
 Precision: 0.73
 Recall: 0.73
 Roc Auc: 0.00
 Test took: 0:00:00
model previously passed
Running eval on  test ...


61it [00:00, 576.93it/s]


 Accuracy: 0.72
 Fscore: 0.72
 Precision: 0.73
 Recall: 0.74
 Roc Auc: 0.00
 Test took: 0:00:00
  Test - fscore: 0.7190, accuracy: 0.7193
  Val  - fscore: 0.7131, accuracy: 0.7133
  Train- fscore: 0.8085, accuracy: 0.8096
0.713133007357349 0.6606218896307607
Saving model
Saved/birnnscrat_lstm_64_2_100.pth

Training...


481it [00:11, 41.64it/s]


avg_train_loss 295.0351021939157
model previously passed
Running eval on  train ...


481it [00:00, 539.12it/s]


 Accuracy: 0.86
 Fscore: 0.86
 Precision: 0.86
 Recall: 0.87
 Roc Auc: 0.00
 Test took: 0:00:01
model previously passed
Running eval on  val ...


61it [00:00, 574.16it/s]


 Accuracy: 0.73
 Fscore: 0.73
 Precision: 0.73
 Recall: 0.74
 Roc Auc: 0.00
 Test took: 0:00:00
model previously passed
Running eval on  test ...


61it [00:00, 504.62it/s]


 Accuracy: 0.72
 Fscore: 0.72
 Precision: 0.72
 Recall: 0.73
 Roc Auc: 0.00
 Test took: 0:00:00
  Test - fscore: 0.7189, accuracy: 0.7214
  Val  - fscore: 0.7315, accuracy: 0.7336
  Train- fscore: 0.8616, accuracy: 0.8641
0.7314602264974758 0.713133007357349
Saving model
Saved/birnnscrat_lstm_64_2_100.pth

Training...


481it [00:11, 41.35it/s]


avg_train_loss 294.9944699420255
model previously passed
Running eval on  train ...


481it [00:00, 557.49it/s]


 Accuracy: 0.76
 Fscore: 0.76
 Precision: 0.81
 Recall: 0.80
 Roc Auc: 0.00
 Test took: 0:00:01
model previously passed
Running eval on  val ...


61it [00:00, 566.93it/s]


 Accuracy: 0.63
 Fscore: 0.63
 Precision: 0.71
 Recall: 0.68
 Roc Auc: 0.00
 Test took: 0:00:00
model previously passed
Running eval on  test ...


61it [00:00, 573.58it/s]


 Accuracy: 0.63
 Fscore: 0.63
 Precision: 0.72
 Recall: 0.68
 Roc Auc: 0.00
 Test took: 0:00:00
  Test - fscore: 0.6252, accuracy: 0.6315
  Val  - fscore: 0.6258, accuracy: 0.6316
  Train- fscore: 0.7612, accuracy: 0.7616
best_val_fscore 0.7314602264974758
best_test_fscore 0.7188983855650521
best_val_rocauc 0
best_test_rocauc 0
best_val_precision 0.7344066913781584
best_test_precision 0.7212201763485477
best_val_recall 0.7429125786509352
best_test_recall 0.7292451435763523


1

In [11]:
# Train with 3 classes (hatespeech, offensive, normal)
params['num_classes'] = 3
params['data_file'] = dict_data_folder[str(params['num_classes'])]['data_file']
params['class_names'] = dict_data_folder[str(params['num_classes'])]['class_label']

if params['num_classes'] == 2 and params['auto_weights'] == False:
    params['weights'] = [1.0, 1.0]

print(f"Training {params['num_classes']}-class model...")
train_model(params, device)

Training 3-class model...
[1.0796857 0.8201194 1.1703163]

Training...


481it [00:11, 41.57it/s]


avg_train_loss 295.68468743401604
model previously passed
Running eval on  train ...


481it [00:00, 537.12it/s]


 Accuracy: 0.61
 Fscore: 0.58
 Precision: 0.63
 Recall: 0.58
 Roc Auc: 0.79
 Test took: 0:00:01
model previously passed
Running eval on  val ...


61it [00:00, 573.38it/s]


 Accuracy: 0.60
 Fscore: 0.56
 Precision: 0.63
 Recall: 0.56
 Roc Auc: 0.77
 Test took: 0:00:00
model previously passed
Running eval on  test ...


61it [00:00, 577.63it/s]


 Accuracy: 0.59
 Fscore: 0.55
 Precision: 0.62
 Recall: 0.55
 Roc Auc: 0.77
 Test took: 0:00:00
  Test - fscore: 0.5524, accuracy: 0.5878
  Val  - fscore: 0.5617, accuracy: 0.5963
  Train- fscore: 0.5782, accuracy: 0.6105
0.5616702167867701 0
Saving model
Saved/birnnscrat_lstm_64_3_100.pth

Training...


481it [00:11, 41.35it/s]


avg_train_loss 295.5435634938198
model previously passed
Running eval on  train ...


481it [00:00, 547.27it/s]


 Accuracy: 0.69
 Fscore: 0.67
 Precision: 0.69
 Recall: 0.67
 Roc Auc: 0.84
 Test took: 0:00:01
model previously passed
Running eval on  val ...


61it [00:00, 551.90it/s]


 Accuracy: 0.62
 Fscore: 0.60
 Precision: 0.62
 Recall: 0.60
 Roc Auc: 0.79
 Test took: 0:00:00
model previously passed
Running eval on  test ...


61it [00:00, 580.82it/s]


 Accuracy: 0.64
 Fscore: 0.61
 Precision: 0.64
 Recall: 0.61
 Roc Auc: 0.79
 Test took: 0:00:00
  Test - fscore: 0.6150, accuracy: 0.6362
  Val  - fscore: 0.5992, accuracy: 0.6223
  Train- fscore: 0.6704, accuracy: 0.6873
0.5992262854884399 0.5616702167867701
Saving model
Saved/birnnscrat_lstm_64_3_100.pth

Training...


481it [00:11, 41.67it/s]


avg_train_loss 295.4403857193469
model previously passed
Running eval on  train ...


481it [00:00, 543.34it/s]


 Accuracy: 0.74
 Fscore: 0.73
 Precision: 0.74
 Recall: 0.73
 Roc Auc: 0.89
 Test took: 0:00:01
model previously passed
Running eval on  val ...


61it [00:00, 578.77it/s]


 Accuracy: 0.65
 Fscore: 0.63
 Precision: 0.65
 Recall: 0.63
 Roc Auc: 0.81
 Test took: 0:00:00
model previously passed
Running eval on  test ...


61it [00:00, 577.81it/s]


 Accuracy: 0.65
 Fscore: 0.63
 Precision: 0.65
 Recall: 0.63
 Roc Auc: 0.81
 Test took: 0:00:00
  Test - fscore: 0.6304, accuracy: 0.6492
  Val  - fscore: 0.6292, accuracy: 0.6483
  Train- fscore: 0.7318, accuracy: 0.7435
0.6292216536100552 0.5992262854884399
Saving model
Saved/birnnscrat_lstm_64_3_100.pth

Training...


481it [00:11, 41.50it/s]


avg_train_loss 295.37425653850215
model previously passed
Running eval on  train ...


481it [00:00, 565.35it/s]


 Accuracy: 0.78
 Fscore: 0.78
 Precision: 0.78
 Recall: 0.77
 Roc Auc: 0.92
 Test took: 0:00:01
model previously passed
Running eval on  val ...


61it [00:00, 549.50it/s]


 Accuracy: 0.65
 Fscore: 0.64
 Precision: 0.65
 Recall: 0.63
 Roc Auc: 0.81
 Test took: 0:00:00
model previously passed
Running eval on  test ...


61it [00:00, 576.57it/s]


 Accuracy: 0.64
 Fscore: 0.63
 Precision: 0.64
 Recall: 0.63
 Roc Auc: 0.80
 Test took: 0:00:00
  Test - fscore: 0.6332, accuracy: 0.6435
  Val  - fscore: 0.6400, accuracy: 0.6504
  Train- fscore: 0.7758, accuracy: 0.7817
0.639963820145986 0.6292216536100552
Saving model
Saved/birnnscrat_lstm_64_3_100.pth

Training...


481it [00:11, 41.50it/s]


avg_train_loss 295.3143782585921
model previously passed
Running eval on  train ...


481it [00:00, 552.20it/s]


 Accuracy: 0.76
 Fscore: 0.75
 Precision: 0.78
 Recall: 0.74
 Roc Auc: 0.92
 Test took: 0:00:01
model previously passed
Running eval on  val ...


61it [00:00, 587.49it/s]


 Accuracy: 0.60
 Fscore: 0.57
 Precision: 0.62
 Recall: 0.57
 Roc Auc: 0.77
 Test took: 0:00:00
model previously passed
Running eval on  test ...


61it [00:00, 563.46it/s]


 Accuracy: 0.60
 Fscore: 0.57
 Precision: 0.62
 Recall: 0.56
 Roc Auc: 0.76
 Test took: 0:00:00
  Test - fscore: 0.5673, accuracy: 0.5951
  Val  - fscore: 0.5701, accuracy: 0.5968
  Train- fscore: 0.7524, accuracy: 0.7645
best_val_fscore 0.639963820145986
best_test_fscore 0.6332487108352621
best_val_rocauc 0.8071006833165059
best_test_rocauc 0.8039198584224749
best_val_precision 0.6517608919125689
best_test_precision 0.6419289825978067
best_val_recall 0.6348165172902596
best_test_recall 0.6290887648657669


1

In [12]:
# Clean up memory
import gc
gc.collect()

0

## 4. Testing and Evaluation

In [13]:
# Run testing scripts
!python testing_with_rational.py birnn_scrat 100
# !python testing_for_bias.py birnn_scrat 100

2025-12-07 13:07:47.651728: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1765112867.693933   34702 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1765112867.705516   34702 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1765112867.745428   34702 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1765112867.745474   34702 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1765112867.745483   34702 computation_placer.cc:177] computation placer alr

In [14]:
# Check generated explanation files
!ls explanations_dicts

bestModel_birnnscrat_100_explanation_top5.json


---

# Bias Calculation

Based on: Borkan et al. (2019) - "Nuanced Metrics for Measuring Unintended Bias with Real Data for Text Classification"

---

In [15]:
# Import required libraries for bias calculation
from collections import Counter, defaultdict
from tqdm.notebook import tqdm
import json
import numpy as np

In [16]:
# Import data collection utilities
from Preprocess.dataCollect import get_annotated_data

In [17]:
# Configure data loading for 2-class (toxic/non-toxic)
dict_data_folder = {
    '2': {'data_file': 'Data/dataset.json', 'class_label': 'Data/classes_two.npy'},
    '3': {'data_file': 'Data/dataset.json', 'class_label': 'Data/classes.npy'}
}

params = {}
params['num_classes'] = 2  # toxic vs non-toxic
params['data_file'] = dict_data_folder[str(params['num_classes'])]['data_file']
params['class_names'] = dict_data_folder[str(params['num_classes'])]['class_label']

# Load the annotated dataset
data_all_labelled = get_annotated_data(params)
print(f"Loaded {len(data_all_labelled)} samples")

Loaded 20148 samples


In [18]:
# Display sample data
data_all_labelled.head()

Unnamed: 0,post_id,text,annotatorid1,target1,label1,annotatorid2,target2,label2,annotatorid3,target3,label3,rationales,final_label
0,1179055004553900032_twitter,"[i, dont, think, im, getting, my, baby, them, ...",1,[None],normal,2,[None],normal,3,[None],normal,[],non-toxic
1,1179063826874032128_twitter,"[we, cannot, continue, calling, ourselves, fem...",1,[None],normal,2,[None],normal,3,[None],normal,[],non-toxic
2,1178793830532956161_twitter,"[nawt, yall, niggers, ignoring, me]",4,[African],normal,2,[None],normal,3,[African],hatespeech,[],non-toxic
3,1179088797964763136_twitter,"[<user>, i, am, bit, confused, coz, chinese, p...",1,[Asian],hatespeech,4,[Asian],offensive,3,[Asian],hatespeech,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",toxic
4,1179085312976445440_twitter,"[this, bitch, in, whataburger, eating, a, burg...",4,"[Caucasian, Women]",hatespeech,2,"[Women, Caucasian]",hatespeech,3,"[Women, Caucasian]",offensive,"[[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",toxic


In [19]:
def generate_target_information(dataset):
    """Extract target community based on majority voting among annotators."""
    final_target_output = defaultdict(list)
    all_communities_selected = []

    for each in dataset.iterrows():
        # Combine all target communities from 3 annotators
        all_targets = each[1]['target1'] + each[1]['target2'] + each[1]['target3']
        community_dict = dict(Counter(all_targets))

        # Select communities mentioned by at least 2 annotators
        for key in community_dict:
            if community_dict[key] > 1:
                final_target_output[each[1]['post_id']].append(key)
                all_communities_selected.append(key)

        # If no majority, mark as 'None'
        if each[1]['post_id'] not in final_target_output:
            final_target_output[each[1]['post_id']].append('None')
            all_communities_selected.append('None')

    return final_target_output, all_communities_selected

In [20]:
# Generate target information
target_information, all_communities_selected = generate_target_information(data_all_labelled)

In [21]:
# Get top 10 communities for bias calculation
community_count_dict = Counter(all_communities_selected)

# Remove 'None' and 'Other' from consideration
community_count_dict.pop('None', None)
community_count_dict.pop('Other', None)

# Select top 10 communities
list_selected_community = [community for community, value in community_count_dict.most_common(10)]
print(f"Top 10 communities: {list_selected_community}")

Top 10 communities: ['African', 'Islam', 'Jewish', 'Homosexual', 'Women', 'Refugee', 'Arab', 'Caucasian', 'Asian', 'Hispanic']


In [22]:
# Filter target information to only include top 10 communities
final_target_information = {}
for each in target_information:
    temp = list(set(target_information[each]) & set(list_selected_community))
    if len(temp) == 0:
        final_target_information[each] = None
    else:
        final_target_information[each] = temp

In [23]:
# Add target category column to dataset
data_all_labelled['final_target_category'] = data_all_labelled['post_id'].map(final_target_information)

In [24]:
# Load test split IDs and filter data
with open('./Data/post_id_divisions.json', 'r') as fp:
    post_id_dict = json.load(fp)

data_all_labelled_bias = data_all_labelled[data_all_labelled['post_id'].isin(post_id_dict['test'])]
print(f"Test samples for bias evaluation: {len(data_all_labelled_bias)}")

Test samples for bias evaluation: 1924


In [25]:
from sklearn.metrics import roc_auc_score

# Bias score file mapping for the trained model
bias_score_file_mapping = {
    'BiRNN-Attn': 'bestModel_birnnscrat_bias.json',
}

parent_path = './explanations_dicts/'
method_list = ['subgroup', 'bpsn', 'bnsp']
community_list = list(list_selected_community)

In [26]:
def convert_to_score(label_name, label_dict):
    """Convert classification to toxicity score [0-1]."""
    if label_name == 'non-toxic':
        return 1 - label_dict[label_name]
    else:
        return label_dict[label_name]


def bias_evaluation_metric(dataset, method, community):
    """Divide IDs into positive/negative based on bias evaluation method."""
    positive_ids = []
    negative_ids = []

    for eachrow in dataset.iterrows():
        if eachrow[1]['final_target_category'] is None:
            continue

        is_community = community in eachrow[1]['final_target_category']
        is_toxic = eachrow[1]['final_label'] != 'non-toxic'

        if method == 'subgroup':
            if is_community:
                if is_toxic:
                    positive_ids.append(eachrow[1]['post_id'])
                else:
                    negative_ids.append(eachrow[1]['post_id'])
        elif method == 'bpsn':
            if is_community and not is_toxic:
                negative_ids.append(eachrow[1]['post_id'])
            elif not is_community and is_toxic:
                positive_ids.append(eachrow[1]['post_id'])
        elif method == 'bnsp':
            if is_community and is_toxic:
                positive_ids.append(eachrow[1]['post_id'])
            elif not is_community and not is_toxic:
                negative_ids.append(eachrow[1]['post_id'])
        else:
            print('Incorrect method selected!')

    return {'positiveID': positive_ids, 'negativeID': negative_ids}

In [27]:
# Calculate bias scores
final_bias_dictionary = defaultdict(lambda: defaultdict(dict))

for each_model in tqdm(bias_score_file_mapping, desc="Processing models"):
    total_data = {}
    filepath = parent_path + bias_score_file_mapping[each_model]

    # Check if file exists
    if not os.path.exists(filepath):
        print(f"Warning: {filepath} not found. Run testing scripts first.")
        continue

    with open(filepath) as fp:
        for line in fp:
            data = json.loads(line)
            total_data[data['annotation_id']] = data

    for each_method in method_list:
        for each_community in community_list:
            community_data = bias_evaluation_metric(data_all_labelled_bias, each_method, each_community)
            truth_values = []
            prediction_values = []

            label_to_value = {'toxic': 1.0, 'non-toxic': 0.0}

            for each in community_data['positiveID']:
                if each in total_data:
                    truth_values.append(label_to_value[total_data[each]['ground_truth']])
                    prediction_values.append(convert_to_score(
                        total_data[each]['classification'],
                        total_data[each]['classification_scores']
                    ))

            for each in community_data['negativeID']:
                if each in total_data:
                    truth_values.append(label_to_value[total_data[each]['ground_truth']])
                    prediction_values.append(convert_to_score(
                        total_data[each]['classification'],
                        total_data[each]['classification_scores']
                    ))

            if len(truth_values) > 0 and len(set(truth_values)) > 1:
                roc_output_value = roc_auc_score(truth_values, prediction_values)
                final_bias_dictionary[each_model][each_method][each_community] = roc_output_value

Processing models:   0%|          | 0/1 [00:00<?, ?it/s]



In [29]:
# Calculate generalized mean of bias scores
power_value = -5
num_communities = len(community_list)

print("\nBias Scores (Generalized Mean):")
print("=" * 50)
for each_model in final_bias_dictionary:
    for each_method in final_bias_dictionary[each_model]:
        temp_value = []
        for each_community in final_bias_dictionary[each_model][each_method]:
            temp_value.append(pow(final_bias_dictionary[each_model][each_method][each_community], power_value))
        if len(temp_value) > 0:
            score = pow(np.sum(temp_value) / num_communities, 1 / power_value)
            print(f"{each_model} | {each_method}: {score:.4f}")


Bias Scores (Generalized Mean):


---

# Calculate Explainability

Based on: DeYoung et al. (2020) - "ERASER: A Benchmark to Evaluate Rationalized NLP Models"

---

In [30]:
# Import required libraries
import json
from tqdm.notebook import tqdm
import more_itertools as mit
import os

In [31]:
# Import preprocessing utilities
from Preprocess.dataCollect import get_annotated_data
from Preprocess.spanMatcher import returnMask
from transformers import BertTokenizer

In [32]:
# Load 3-class dataset for explainability
dict_data_folder = {
    '2': {'data_file': 'Data/dataset.json', 'class_label': 'Data/classes_two.npy'},
    '3': {'data_file': 'Data/dataset.json', 'class_label': 'Data/classes.npy'}
}

params = {}
params['num_classes'] = 3  # hatespeech, offensive, normal
params['data_file'] = dict_data_folder[str(params['num_classes'])]['data_file']
params['class_names'] = dict_data_folder[str(params['num_classes'])]['class_label']

data_all_labelled = get_annotated_data(params)
print(f"Loaded {len(data_all_labelled)} samples for explainability evaluation")

Loaded 20148 samples for explainability evaluation


In [33]:
# Configure tokenization parameters
params_data = {
    'include_special': False,
    'bert_tokens': False,  # Set True for BERT models
    'type_attention': 'softmax',
    'set_decay': 0.1,
    'majority': 2,
    'max_length': 128,
    'variance': 10,
    'window': 4,
    'alpha': 0.5,
    'p_value': 0.8,
    'method': 'additive',
    'decay': False,
    'normalized': False,
    'not_recollect': True,
}

# Initialize tokenizer
if params_data['bert_tokens']:
    print('Loading BERT tokenizer...')
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=False)
else:
    print('Using standard tokenizer...')
    tokenizer = None

Using standard tokenizer...


In [34]:
def get_training_data(data):
    """Load dataset and extract token-wise rationales."""
    final_output = []
    print(f'Processing {len(data)} samples...')

    for index, row in tqdm(data.iterrows(), total=len(data)):
        annotation = row['final_label']
        post_id = row['post_id']
        annotation_list = [row['label1'], row['label2'], row['label3']]

        if annotation != 'undecided':
            tokens_all, attention_masks = returnMask(row, params_data, tokenizer)
            final_output.append([post_id, annotation, tokens_all, attention_masks, annotation_list])

    return final_output

In [35]:
# Process training data
training_data = get_training_data(data_all_labelled)
print(f"Processed {len(training_data)} valid samples")

Processing 20148 samples...


  0%|          | 0/20148 [00:00<?, ?it/s]

Processed 19229 valid samples


In [36]:
def find_ranges(iterable):
    """Yield ranges of consecutive numbers."""
    for group in mit.consecutive_groups(iterable):
        group = list(group)
        if len(group) == 1:
            yield group[0]
        else:
            yield group[0], group[-1]


def get_evidence(post_id, anno_text, explanations):
    """Convert explanations to ERASER evidence format."""
    output = []
    indexes = sorted([i for i, each in enumerate(explanations) if each == 1])
    span_list = list(find_ranges(indexes))

    for each in span_list:
        if isinstance(each, int):
            start, end = each, each + 1
        elif len(each) == 2:
            start, end = each[0], each[1] + 1
        else:
            print('Error in span processing')
            continue

        output.append({
            "docid": post_id,
            "end_sentence": -1,
            "end_token": end,
            "start_sentence": -1,
            "start_token": start,
            "text": ' '.join([str(x) for x in anno_text[start:end]])
        })
    return output


def convert_to_eraser_format(dataset, method, save_split, save_path, id_division):
    """Convert dataset to ERASER benchmark format."""
    final_output = []

    if save_split:
        os.makedirs(save_path, exist_ok=True)
        os.makedirs(os.path.join(save_path, 'docs'), exist_ok=True)
        train_fp = open(os.path.join(save_path, 'train.jsonl'), 'w')
        val_fp = open(os.path.join(save_path, 'val.jsonl'), 'w')
        test_fp = open(os.path.join(save_path, 'test.jsonl'), 'w')

    for eachrow in dataset:
        post_id = eachrow[0]
        post_class = eachrow[1]
        anno_text_list = eachrow[2]

        if post_class == 'normal':
            continue

        explanations = [list(each_explain) for each_explain in eachrow[3]]

        # Union of explanations from all annotators
        if method == 'union':
            final_explanation = [int(any(each)) for each in zip(*explanations)]

        temp = {
            'annotation_id': post_id,
            'classification': post_class,
            'evidences': [get_evidence(post_id, list(anno_text_list), final_explanation)],
            'query': "What is the class?",
            'query_type': None
        }
        final_output.append(temp)

        if save_split:
            # Save document
            with open(os.path.join(save_path, 'docs', post_id), 'w') as fp:
                fp.write(' '.join([str(x) for x in list(anno_text_list)]))

            # Save to appropriate split
            if post_id in id_division['train']:
                train_fp.write(json.dumps(temp) + '\n')
            elif post_id in id_division['val']:
                val_fp.write(json.dumps(temp) + '\n')
            elif post_id in id_division['test']:
                test_fp.write(json.dumps(temp) + '\n')

    if save_split:
        train_fp.close()
        val_fp.close()
        test_fp.close()

    return final_output

In [37]:
# Load data splits
with open('./Data/post_id_divisions.json') as fp:
    id_division = json.load(fp)

In [38]:
# Create evaluation directory
os.makedirs('./Data/Evaluation/Model_Eval', exist_ok=True)

In [39]:
# Convert to ERASER format
method = 'union'
save_split = True
save_path = './Data/Evaluation/Model_Eval/'

output_eraser = convert_to_eraser_format(training_data, method, save_split, save_path, id_division)
print(f"Converted {len(output_eraser)} samples to ERASER format")

Converted 11415 samples to ERASER format


In [40]:
# List generated files
!ls Data/Evaluation/Model_Eval/

docs  test.jsonl  train.jsonl  val.jsonl


In [41]:
# Run ERASER metrics
explanation_file = './explanations_dicts/bestModel_birnnscrat_100_explanation_top5.json'
if os.path.exists(explanation_file):
    !cd eraserbenchmark && PYTHONPATH=./:$PYTHONPATH python rationale_benchmark/metrics.py \
        --split test \
        --data_dir ../Data/Evaluation/Model_Eval \
        --results ../explanations_dicts/bestModel_birnnscrat_100_explanation_top5.json \
        --score_file ../model_explain_output.json
else:
    print(f"Explanation file not found: {explanation_file}")
    print("Run testing_with_rational.py first.")

  6039 MainThread Error in instances: 0 instances fail validation: set()
  9510 MainThread No sentence level predictions detected, skipping sentence-level diagnostic
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
{'classification_scores': {'accuracy': 0.5779334500875657,
                           'aopc_thresholds': None,
                           'comprehensiveness': np.float64(0.3071190950170407),
                           'comprehensiveness_aopc': None,
                           'comprehensiveness_aopc_points': None,
                           'comprehensiveness_entropy': np.float64(0.16702580912683954),
                           'comprehensiveness_kl': np.float64(0.8087778324712184),
                           'prf': {'accuracy': 0.5779334500875657,
                                   'hatespeech': {'f1-score'

In [42]:
# Print explainability results
output_file = './model_explain_output.json'
if os.path.exists(output_file):
    with open(output_file) as fp:
        output_data = json.load(fp)

    print('\n' + '=' * 50)
    print('EXPLAINABILITY RESULTS')
    print('=' * 50)

    print('\nPlausibility:')
    print(f"  IOU F1:   {output_data['iou_scores'][0]['macro']['f1']:.4f}")
    print(f"  Token F1: {output_data['token_prf']['instance_macro']['f1']:.4f}")
    print(f"  AUPRC:    {output_data['token_soft_metrics']['auprc']:.4f}")

    print('\nFaithfulness:')
    print(f"  Comprehensiveness: {output_data['classification_scores']['comprehensiveness']:.4f}")
    print(f"  Sufficiency:       {output_data['classification_scores']['sufficiency']:.4f}")
else:
    print(f"Output file not found: {output_file}")


EXPLAINABILITY RESULTS

Plausibility:
  IOU F1:   0.2224
  Token F1: 0.5040
  AUPRC:    0.8412

Faithfulness:
  Comprehensiveness: 0.3071
  Sufficiency:       0.0432


---

## Summary

This notebook demonstrates:
1. **Model Training**: Training BiRNN-SCRAT model for hate speech detection
2. **Bias Evaluation**: Computing subgroup, BPSN, and BNSP bias metrics
3. **Explainability Evaluation**: Computing plausibility and faithfulness metrics

---