# Load the Dataset

In [1]:
import pandas as pd
import numpy as np

In [2]:
ben_df_folder = "/kaggle/input/modified-hate-speech-bengali-hindi/bengali-hate-speech/"
ben_df_test = pd.read_csv(ben_df_folder + "bengali-hate-speech_test.csv")

In [3]:
hin_df_folder = "/kaggle/input/modified-hate-speech-bengali-hindi/hindi-hate-speech/"
hin_df_test = pd.read_csv(hin_df_folder + "hindi-hate-speech_test.csv")

In [4]:
ben_df_size = ben_df_test['text'].count()
hin_df_size = hin_df_test['text'].count()

Find out the available devices

In [5]:
import torch

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the Required Model

In [7]:
from transformers import pipeline

2024-05-10 19:24:11.545806: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-10 19:24:11.545916: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-10 19:24:11.682999: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [8]:
pipe = pipeline(
    "text-classification",
    model="kingshukroy/twhin-bert-base-hate-speech-ben-hin",
    device=device,
)

config.json:   0%|          | 0.00/934 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/964 [00:00<?, ?B/s]

## Get the features

In [9]:
features = list(pipe.model.config.label2id.keys())
num_features = len(features)
features, num_features

(['defamation', 'hate', 'non-hate', 'violence', 'vulgar'], 5)

# Create a Labels Column in the DataFrane

In [10]:
ben_df_test['labels'] = ben_df_test.drop(['text'], axis=1).apply(lambda row: [row[x] for x in features], axis=1)
hin_df_test['labels'] = hin_df_test.drop(['text'], axis=1).apply(lambda row: [row[x] for x in features], axis=1)

# Merge the Two DataFranes

In [11]:
merged_df_test = pd.concat([ben_df_test, hin_df_test], axis=0, ignore_index=True)

# Convert the DataFrames into PyTorch Datasets

In [12]:
from torch.utils.data import Dataset

In [13]:
class TextDatasetWrapper(Dataset):
    def __init__(self, df, text_col='text'):
        self.df = df
        self.__text_col__ = text_col
    def __getitem__(self, index):
        text = self.df[self.__text_col__][index]
        return text
    def __len__(self):
        return len(self.df)
    
ben_dataset_text = TextDatasetWrapper(ben_df_test)
hin_dataset_text = TextDatasetWrapper(hin_df_test)
merged_dataset_text = TextDatasetWrapper(merged_df_test)

# Get the predicted Labels

In [14]:
tokenize_args = {
    "padding": "max_length",
    "max_length": 256,
    "truncation": True
}

In [15]:
from tqdm.auto import tqdm

In [16]:
def get_labels_from_scores(scores_list, threshold = 0.5):
    scores_df = pd.DataFrame(scores_list)
    scores_df = scores_df.T.reset_index(drop=True)
    scores_df.columns = scores_df.iloc[0]
    scores_df = scores_df[1:].reset_index(drop=True)
    probs_list = [scores_df[x].values[0] for x in features]
    labels_list = [1 if x > threshold else 0 for x in probs_list]
    return labels_list

In [17]:
merged_pred_labels = []
for pred in tqdm(pipe(merged_dataset_text, batch_size=32, top_k=None, **tokenize_args), total=len(merged_dataset_text)):
    merged_pred_labels.append(get_labels_from_scores(pred))
ben_pred_labels = merged_pred_labels[:ben_df_size]
hin_pred_labels = merged_pred_labels[ben_df_size:]

  0%|          | 0/4868 [00:00<?, ?it/s]

# Performence Matrices for the Predictions

In [18]:
import sklearn.metrics as matrices

In [19]:
def show_report(actual_labels, y_pred):
    zero_division_value = 0
    print('Accuracy Score: ', matrices.accuracy_score(actual_labels, y_pred))
    print('F1 Score: ', matrices.f1_score(actual_labels, y_pred, average='micro',
                                          zero_division=zero_division_value))
    print('ROC AUC: ', matrices.roc_auc_score(actual_labels, y_pred, average='micro'))
    print('Hamming Loss: ', matrices.hamming_loss(actual_labels, y_pred))
    print('Jaccard Score: ', matrices.jaccard_score(actual_labels, y_pred, average='micro'))
    print('Zero-One Loss:', matrices.zero_one_loss(actual_labels, y_pred))

    print('\nClassification Report :-\n',
          matrices.classification_report(
              actual_labels,
              y_pred,
              output_dict=False,
              target_names=features,
              zero_division=zero_division_value,
          )
         )

## Performance Matrices on Bengali Dataset

In [20]:
show_report(list(ben_df_test['labels']), ben_pred_labels)

Accuracy Score:  0.7471228615863141
F1 Score:  0.8188891870136837
ROC AUC:  0.881683922226718
Hamming Loss:  0.08398133748055987
Jaccard Score:  0.6933212176283507
Zero-One Loss: 0.25287713841368586

Classification Report :-
               precision    recall  f1-score   support

  defamation       0.69      0.76      0.72       828
        hate       0.64      0.58      0.61       364
    non-hate       0.95      0.93      0.94      1548
    violence       0.75      0.76      0.76       482
      vulgar       0.87      0.80      0.83       511

   micro avg       0.82      0.82      0.82      3733
   macro avg       0.78      0.77      0.77      3733
weighted avg       0.82      0.82      0.82      3733
 samples avg       0.82      0.83      0.82      3733



## Performance Matrices on Hinidi Dataset

In [21]:
show_report(list(hin_df_test['labels']), hin_pred_labels)

Accuracy Score:  0.7513611615245009
F1 Score:  0.7864010989010989
ROC AUC:  0.8628545175202422
Hamming Loss:  0.07525710828796128
Jaccard Score:  0.6479909451046972
Zero-One Loss: 0.24863883847549906

Classification Report :-
               precision    recall  f1-score   support

  defamation       0.47      0.28      0.35       169
        hate       0.53      0.50      0.51       234
    non-hate       0.98      0.97      0.97       873
    violence       0.00      0.00      0.00         0
      vulgar       0.59      0.61      0.60       219

   micro avg       0.81      0.77      0.79      1495
   macro avg       0.51      0.47      0.49      1495
weighted avg       0.79      0.77      0.78      1495
 samples avg       0.66      0.66      0.65      1495



## Performance Matrices on Merged Dataset
Merged Dataset = Bengali Dataset + Hindi Dataset

In [22]:
show_report(list(merged_df_test['labels']), merged_pred_labels)

Accuracy Score:  0.7485620377978636
F1 Score:  0.8097626857032606
ROC AUC:  0.876778286446135
Hamming Loss:  0.0810188989317995
Jaccard Score:  0.6803371697195656
Zero-One Loss: 0.25143796220213643

Classification Report :-
               precision    recall  f1-score   support

  defamation       0.66      0.68      0.67       997
        hate       0.60      0.55      0.57       598
    non-hate       0.96      0.94      0.95      2421
    violence       0.75      0.76      0.76       482
      vulgar       0.78      0.74      0.76       730

   micro avg       0.82      0.80      0.81      5228
   macro avg       0.75      0.74      0.74      5228
weighted avg       0.82      0.80      0.81      5228
 samples avg       0.77      0.77      0.76      5228

