# **HATESPEECH MULTILABEL CLASSIFICATION**

In [30]:
tokenizer = AutoTokenizer.from_pretrained("indobenchmark/indobert-base-p1")

def tokenize_text(text, max_length=500):
    encoded = tokenizer(text.to_list(), padding=True, truncation=True, max_length=max_length, return_tensors='tf')
    token_ids = encoded['input_ids']
    attention_mask = encoded['attention_mask']

    return token_ids, attention_mask

tokenizer_config.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.53k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/229k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

In [31]:
multitrain_token_ids, multitrain_attention_mask = tokenize_text(df_multi_train['text'])
multival_token_ids, multival_attention_mask = tokenize_text(df_multi_val['text'])
multitest_token_ids, multitest_attention_mask = tokenize_text(df_multi_test['text'])

In [32]:
label_multi_train = tf.convert_to_tensor(df_multi_train.iloc[:, 1:].values, dtype=tf.float32)
label_multi_val = tf.convert_to_tensor(df_multi_val.iloc[:, 1:].values, dtype=tf.float32)
label_multi_test = tf.convert_to_tensor(df_multi_test.iloc[:, 1:].values, dtype=tf.float32)

In [33]:
formatted_multi_train = tf.data.Dataset.from_tensor_slices(({"input_ids":multitrain_token_ids, "attention_mask":multitrain_attention_mask}, label_multi_train))
formatted_multi_val = tf.data.Dataset.from_tensor_slices(({"input_ids":multival_token_ids, "attention_mask":multival_attention_mask}, label_multi_val))
formatted_multi_test = tf.data.Dataset.from_tensor_slices(({"input_ids":multitest_token_ids, "attention_mask":multitest_attention_mask}, label_multi_test))

In [34]:
batch_size = 8
formatted_multi_train = formatted_multi_train.shuffle(buffer_size=len(df_multi_train)).batch(batch_size)
formatted_multi_val = formatted_multi_val.batch(batch_size)
formatted_multi_test = formatted_multi_test.batch(batch_size)

## **FINE-TUNING IndoBERT**

In [35]:
model = TFAutoModelForSequenceClassification.from_pretrained("indobenchmark/indobert-base-p1", num_labels=label_multi_train.shape[1])

tf_model.h5:   0%|          | 0.00/656M [00:00<?, ?B/s]

All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [36]:
optimizer = Adam(learning_rate=3e-5)
loss = BinaryCrossentropy(from_logits=True)

In [39]:
tolerance = 3
min_delta = 1e-4
num_epoch = 4
wait = 0
best_loss = float('inf')

for epoch in range(num_epoch):
    print(f"Epoch {epoch+1}/{num_epoch}")
    print("="*30)
    train_loss_total = 0
    train_step = 0

    for (x_batch_train, y_batch_train) in formatted_multi_train:
      with tf.GradientTape() as tape:
        logits = model(x_batch_train, training=True).logits
        loss_value = loss(y_batch_train, logits)

      grads = tape.gradient(loss_value, model.trainable_variables)
      optimizer.apply_gradients(zip(grads, model.trainable_variables))

      train_loss_total += loss_value
      train_step += 1

      if train_step % 100 == 0:
        print(f"Training loss (step {train_step}): {loss_value:.4f}")

      train_loss_avg = train_loss_total / train_step

      val_loss_total = 0

    for x_batch_val, y_batch_val in formatted_multi_test:
      val_logits = model(x_batch_val, training=False).logits
      val_loss_value = loss(y_batch_val, val_logits)

      val_loss_total += val_loss_value

    val_loss_avg = val_loss_total / len(formatted_multi_test)
    print("="*30)
    print(f"Training loss: {train_loss_avg:.4f}")
    print(f"Val loss: {val_loss_avg:.4f}")

    if val_loss_avg < best_loss - min_delta:
      best_loss = val_loss_avg
      wait = 0
      print(f"Loss berkurang!")
    else:
      wait += 1
      print(f"Loss meningkat! {wait}/{tolerance}")

    if wait >= tolerance:
      print("Early stopping.....")
      break

Epoch 1/4
Training loss (step 100): 0.2916
Training loss (step 200): 0.2612
Training loss (step 300): 0.3227
Training loss (step 400): 0.2785
Training loss (step 500): 0.2315
Training loss (step 600): 0.3148
Training loss (step 700): 0.1709
Training loss (step 800): 0.1271
Training loss (step 900): 0.2547
Training loss (step 1000): 0.1347
Training loss (step 1100): 0.3693
Training loss (step 1200): 0.2529
Training loss (step 1300): 0.2546
Training loss (step 1400): 0.2386
Training loss (step 1500): 0.1522
Training loss (step 1600): 0.1242
Training loss (step 1700): 0.2895
Training loss (step 1800): 0.1161
Training loss: 0.2314
Val loss: 0.1867
Loss berkurang!
Epoch 2/4
Training loss (step 100): 0.0904
Training loss (step 200): 0.0592
Training loss (step 300): 0.0431
Training loss (step 400): 0.0918
Training loss (step 500): 0.0795
Training loss (step 600): 0.4166
Training loss (step 700): 0.1247
Training loss (step 800): 0.1662
Training loss (step 900): 0.1050
Training loss (step 1000)

## **EVALUATION**

In [40]:
threshold = 0.5

preds = model.predict(formatted_multi_test)
probs = tf.sigmoid(preds.logits).numpy()
preds_label = (probs > threshold).astype(int)

true_labels = []
for _, label in formatted_multi_test:
  true_labels.extend(label.numpy())

true_labels = np.array(true_labels)

target_names = ['abusive', 'hs_individual', 'hs_group', 'hs_religion',
       'hs_race', 'hs_other']

report = classification_report(true_labels, preds_label, target_names=target_names, zero_division=0)
accuracy = accuracy_score(true_labels, preds_label)

print(report)
print(f"Accuracy: {accuracy}")

               precision    recall  f1-score   support

      abusive       0.95      0.93      0.94       285
hs_individual       0.86      0.74      0.80       191
     hs_group       0.75      0.83      0.79       104
  hs_religion       0.78      0.96      0.86        49
      hs_race       0.71      0.94      0.81        31
     hs_other       0.91      0.77      0.83       191

    micro avg       0.87      0.84      0.86       851
    macro avg       0.83      0.86      0.84       851
 weighted avg       0.88      0.84      0.86       851
  samples avg       0.47      0.46      0.46       851

Accuracy: 0.8145161290322581


## **SAVE MODEL AND TOKENIZER**

In [41]:
model.save_pretrained('hs_multilabel_bert')
tokenizer.save_pretrained('hs_multilabel_bert')

shutil.make_archive('hs_multilabel_bert', 'zip', 'hs_multilabel_bert')
files.download('hs_multilabel_bert.zip')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>