In [1]:
# Libraries
import tensorflow
import torch
import torch.nn as nn
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader, ConcatDataset
from transformers import AutoTokenizer, BertTokenizer 

import pandas as pd
import time
import numpy as np
import matplotlib.pyplot as plt

import seaborn as sn
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

from ABSA_SentimentMultiEmiten.model.bert import bert_ABSA
from ABSA_SentimentMultiEmiten.data.dataset import dataset_ABSA

2023-06-19 22:59:20.981267: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


In [2]:
# Menentukan device yang akan digunakan untuk melakukan komputasi
DEVICE = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

print("Tensorflow\t: ", tensorflow.__version__)
print("Torch\t\t: ", torch.__version__)
print("Device\t\t: ", DEVICE)
print("GPU\t\t: ", torch.cuda.get_device_name())
print("CUDA\t\t: ", torch.version.cuda)

Tensorflow	:  2.4.1
Torch		:  1.1.0
Device		:  cuda:1
GPU		:  Tesla T4
CUDA		:  9.0.176


In [3]:
bs = 16
lr = 0.00002

In [4]:
# Inisialisasi pre-trained model IndoBERT
model_name = "bert-multilingual-2.pkl"
pretrained_model_name = "bert-base-multilingual-uncased"
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name)
model_ABSA = bert_ABSA(pretrained_model_name)
model_ABSA.to(DEVICE)
optimizer_ABSA = torch.optim.Adam(model_ABSA.parameters(), lr=lr)

In [5]:
# Function untuk load model
def load_model(model, path):
    model.load_state_dict(torch.load(path), strict=False)
    return model

In [6]:
# Mengubah dataset menjadi beberapa mini-batch
def create_mini_batch(samples):
    ids_tensors = [s[1] for s in samples]
    ids_tensors = pad_sequence(ids_tensors, batch_first=True)

    segments_tensors = [s[2] for s in samples]
    segments_tensors = pad_sequence(segments_tensors, batch_first=True)

    label_ids = torch.stack([s[3] for s in samples])
    
    masks_tensors = torch.zeros(ids_tensors.shape, dtype=torch.long)
    masks_tensors = masks_tensors.masked_fill(ids_tensors != 0, 1)

    return ids_tensors, segments_tensors, masks_tensors, label_ids

In [7]:
# Function untuk pengujian model
def test_model_ABSA(loader):
    pred = []
    truth = []
    
    with torch.no_grad():
        # Pengulangan setiap mini-batch
        for data in loader:
            ids_tensors, segments_tensors, masks_tensors, label_ids = data
            ids_tensors = ids_tensors.to(DEVICE)
            segments_tensors = segments_tensors.to(DEVICE)
            masks_tensors = masks_tensors.to(DEVICE)

            outputs = model_ABSA(ids_tensors, None, masks_tensors=masks_tensors, segments_tensors=segments_tensors)
            
            _, predictions = torch.max(outputs, dim=1)

            pred += list([int(i) for i in predictions])
            truth += list([int(i) for i in label_ids])

    return truth, pred

## Load Data Testing Unbalance

In [8]:
# Inisialisasi dataset eksperiment max
emiten_max = dataset_ABSA(pd.read_csv("data/data_eksperimen_kalimat/data_lama/data_experiment_max.csv"), tokenizer)
test_max = ConcatDataset([emiten_max])
loader_max = DataLoader(test_max, batch_size=bs, collate_fn=create_mini_batch, shuffle = True)

# Inisialisasi dataset eksperiment avg
emiten_avg = dataset_ABSA(pd.read_csv("data/data_eksperimen_kalimat/data_lama/data_experiment_avg.csv"), tokenizer)
test_avg = ConcatDataset([emiten_avg])
loader_avg = DataLoader(test_avg, batch_size=bs, collate_fn=create_mini_batch, shuffle = True)

# Inisialisasi dataset eksperiment min
emiten_min = dataset_ABSA(pd.read_csv("data/data_eksperimen_kalimat/data_lama/data_experiment_min.csv"), tokenizer)
test_min = ConcatDataset([emiten_min])
loader_min = DataLoader(test_min, batch_size=bs, collate_fn=create_mini_batch, shuffle = True)

## Load Data Testing Balance

In [9]:
# Inisialisasi dataset eksperiment max
emiten_max_b = dataset_ABSA(pd.read_csv("data/data_eksperimen_kalimat/data_lama/data_experiment_max_balance.csv"), tokenizer)
test_max_b = ConcatDataset([emiten_max_b])
loader_max_b = DataLoader(test_max_b, batch_size=bs, collate_fn=create_mini_batch, shuffle = True)

# Inisialisasi dataset eksperiment avg
emiten_avg_b = dataset_ABSA(pd.read_csv("data/data_eksperimen_kalimat/data_lama/data_experiment_avg_balance.csv"), tokenizer)
test_avg_b = ConcatDataset([emiten_avg_b])
loader_avg_b = DataLoader(test_avg_b, batch_size=bs, collate_fn=create_mini_batch, shuffle = True)

# Inisialisasi dataset eksperiment min
emiten_min_b = dataset_ABSA(pd.read_csv("data/data_eksperimen_kalimat/data_lama/data_experiment_min_balance.csv"), tokenizer)
test_min_b = ConcatDataset([emiten_min_b])
loader_min_b = DataLoader(test_min_b, batch_size=bs, collate_fn=create_mini_batch, shuffle = True)

In [10]:
model_ABSA = load_model(model_ABSA, model_name)

## Classification Report Unbalance

In [11]:
# Classification report eksperiment max
x, y = test_model_ABSA(loader_max)
print(classification_report(x, y, target_names=[str(i) for i in range(3)]))

              precision    recall  f1-score   support

           0       0.96      0.99      0.98      1687
           1       0.91      0.96      0.93       465
           2       1.00      0.96      0.98      2662

    accuracy                           0.97      4814
   macro avg       0.95      0.97      0.96      4814
weighted avg       0.98      0.97      0.97      4814



In [12]:
# Classification report eksperiment avg
x, y = test_model_ABSA(loader_avg)
print(classification_report(x, y, target_names=[str(i) for i in range(3)]))

              precision    recall  f1-score   support

           0       0.91      0.96      0.93       917
           1       0.73      0.97      0.84      1058
           2       0.98      0.81      0.89      2292

    accuracy                           0.88      4267
   macro avg       0.87      0.91      0.89      4267
weighted avg       0.90      0.88      0.88      4267



In [13]:
# Classification report eksperiment min
x, y = test_model_ABSA(loader_min)
print(classification_report(x, y, target_names=[str(i) for i in range(3)]))

              precision    recall  f1-score   support

           0       0.89      0.94      0.91       828
           1       0.71      0.96      0.81      1040
           2       0.97      0.76      0.85      1887

    accuracy                           0.85      3755
   macro avg       0.86      0.88      0.86      3755
weighted avg       0.88      0.85      0.85      3755



## Classification Report Balance

In [14]:
# Classification report eksperiment max balanced
x, y = test_model_ABSA(loader_max_b)
print(classification_report(x, y, target_names=[str(i) for i in range(3)]))

              precision    recall  f1-score   support

           0       0.94      1.00      0.97       465
           1       0.98      0.96      0.97       465
           2       0.99      0.96      0.98       465

    accuracy                           0.97      1395
   macro avg       0.97      0.97      0.97      1395
weighted avg       0.97      0.97      0.97      1395



In [15]:
# Classification report eksperiment avg balanced
x, y = test_model_ABSA(loader_avg_b)
print(classification_report(x, y, target_names=[str(i) for i in range(3)]))

              precision    recall  f1-score   support

           0       0.96      0.96      0.96       916
           1       0.84      0.97      0.90       916
           2       0.95      0.81      0.87       916

    accuracy                           0.91      2748
   macro avg       0.92      0.91      0.91      2748
weighted avg       0.92      0.91      0.91      2748



In [16]:
# Classification report eksperiment min balanced
x, y = test_model_ABSA(loader_min_b)
print(classification_report(x, y, target_names=[str(i) for i in range(3)]))

              precision    recall  f1-score   support

           0       0.95      0.94      0.94       824
           1       0.79      0.96      0.87       824
           2       0.95      0.75      0.84       824

    accuracy                           0.88      2472
   macro avg       0.89      0.88      0.88      2472
weighted avg       0.89      0.88      0.88      2472

