# Comparison of IndoBERT and DistilBERT

In [1]:
# !pip install transformers datasets

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import random
import numpy as np
import pandas as pd
import timeit
import torch
from torch import optim
import torch.nn.functional as F
from tqdm import tqdm

from transformers import BertForSequenceClassification, BertConfig, BertTokenizer
from transformers import DistilBertForSequenceClassification, DistilBertConfig, DistilBertTokenizer

## Benchmarking

In [4]:
indobert_path = "/content/drive/MyDrive/Models/indobert"

# Load tokenizer and model
tokenizer = BertTokenizer.from_pretrained(indobert_path)
model = BertForSequenceClassification.from_pretrained(indobert_path).to("cpu")

In [5]:
size_ib = model.num_parameters()
print(f"IndoBERT params: {size_ib}")

IndoBERT params: 124443651


In [6]:
distilbert_path = "/content/drive/MyDrive/Models/distilbert"

# Load tokenizer and model
tokenizer_distil = DistilBertTokenizer.from_pretrained(distilbert_path)
model_distil = DistilBertForSequenceClassification.from_pretrained(distilbert_path).to("cpu")

In [7]:
size_db = model_distil.num_parameters()
print(f"DistilBERT params: {size_db}")

DistilBERT params: 68090883


In [8]:
def benchmark(f, name=""):
    # warmup
    for _ in range(10):
        f()
    seconds_per_iter = timeit.timeit(f, number=100) / 100
    print(
        f"{name}:",
        f"{seconds_per_iter * 1000:.3f} ms",
    )

    return seconds_per_iter * 1000

In [9]:
text = 'Bahagia hatiku melihat pernikahan putri sulungku yang cantik jelita'

inputs = tokenizer.encode(text)
inputs = torch.LongTensor(inputs).view(1, -1).to(model.device)

inputs_distil = tokenizer_distil.encode(text)
inputs_distil = torch.LongTensor(inputs_distil).view(1, -1).to(model_distil.device)

In [10]:
speed_ib = benchmark(lambda: model(inputs), "IndoBERT")
speed_db = benchmark(lambda: model_distil(inputs_distil), "DistilBERT")

IndoBERT: 182.401 ms
DistilBERT: 72.668 ms


In [11]:
df_indo = pd.read_csv("/content/indobert_performance.csv")
df_indo = df_indo.drop(0)
df_indo

Unnamed: 0,Accuracy,Precision,Recall,F1
1,0.916,0.91558,0.875811,0.890512


In [12]:
df_distil = pd.read_csv("/content/distilbert_performance.csv")
df_distil = df_distil.drop(0)
df_distil

Unnamed: 0,Accuracy,Precision,Recall,F1
1,0.89,0.886538,0.854852,0.866431


In [13]:
df_perf = pd.concat([df_indo, df_distil], axis=0)
df_perf = df_perf.reset_index()
df_perf = df_perf.drop(columns=['index'])
df_perf = df_perf.rename(index={0: 'IndoBERT', 1: 'DistilBERT'})
df_perf

Unnamed: 0,Accuracy,Precision,Recall,F1
IndoBERT,0.916,0.91558,0.875811,0.890512
DistilBERT,0.89,0.886538,0.854852,0.866431


In [14]:
df_perf['Size'] = [size_ib, size_db]
df_perf['Inference Time'] = [speed_ib, speed_db]
df_perf

Unnamed: 0,Accuracy,Precision,Recall,F1,Size,Inference Time
IndoBERT,0.916,0.91558,0.875811,0.890512,124443651,182.401327
DistilBERT,0.89,0.886538,0.854852,0.866431,68090883,72.667718


In [28]:
df_perf.to_csv("comparison.csv")

## Conclusion

In [27]:
acc_diff = round((df_perf.loc['IndoBERT']['Accuracy'] - df_perf.loc['DistilBERT']['Accuracy']) * 100, 2)
size_diff = round(size_db / size_ib * 100, 2)
time_diff = round(speed_ib / speed_db, 2)

print(f"""
DistilBERT may have lower accuracy (± {acc_diff}%) but its size is only {size_diff}% of IndoBERT's.
At the same time, DistilBERT gives {time_diff}x faster inference time than IndoBERT.

So, if you are not bothered by the slightly decreased performance of the model and are more concerned with the inference speed of the model,
then using DistilBERT can be a good alternative.
""")


DistilBERT may have lower accuracy (± 2.6%) but its size is only 54.72% of IndoBERT's.
At the same time, DistilBERT gives 2.51x faster inference time than IndoBERT.

So, if you are not bothered by the slightly decreased performance of the model and are more concerned with the inference speed of the model,
then using DistilBERT can be a good alternative.

