In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from sklearn.model_selection import KFold
from torch.utils.data import DataLoader, TensorDataset
import random
import os

In [5]:
# Load data
# X = np.load('./#SKRIPSI/bert_embedding.npy')
# y_df = pd.read_csv('./#SKRIPSI/preprocessed_data.csv')
X = X = np.load('/content/drive/MyDrive/Colab Notebooks/Tugas Akhir/bert_embedding.npy')
y = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Tugas Akhir/preprocessed_data.csv')
y = y.drop(columns=['Tweet']).values

In [6]:
print(X.shape)
print(y.shape)

(12961, 40, 768)
(12961, 9)


In [7]:
np.set_printoptions(threshold=np.inf, linewidth=200, suppress=True)

# Melihat bentuk dari sampel pertama
print("\nShape dari X[0]:", X[0].shape)

# Melihat sampel pertama
print("\nIsi sampel (X[0]):")
print(X[0])


Shape dari X[0]: (40, 768)

Isi sampel (X[0]):
[[-0.07174455 -0.1491953  -0.31003416 -0.6153574   0.0674201  -1.0181938  -1.3296492  -1.5492475   0.48776466 -0.20918894  0.99915457  0.46860045  0.12667312  0.29971194  0.29753327  0.19196038
  -0.7386304  -0.04318139  0.29143348 -0.58470696 -0.46940207 -0.38292825 -1.0309865  -0.5081094  -1.6950365  -0.7309096  -0.3230784  -0.75085545  0.05023831 -0.32015264 -0.67560744  0.15424562
   0.03465622  0.43284646  1.3821334  -0.22751714 -0.8356764  -0.75247306 -0.02994846 -1.2008178  -0.19937031 -0.00033454 -1.3182765  -0.44962794 -0.10140744  1.2245622  -0.130944    0.34099567
   0.26472566 -0.7957868   0.19444121 -0.38784325  0.05679796 -0.8797441  -0.15784419 -0.5843624  -0.5060142   0.3718733  -1.0917821   0.01265593  0.8011     -0.52780014  0.01851491  0.5996008
   3.8245645  -0.20697229  0.85990137  0.03054521 -0.63246375 -0.73152286  0.47793612  0.24416474 -0.7967539  -2.5776925  -1.0459813  -1.2778983   1.282125   -1.2184938  -0.4499

In [8]:
# Melihat sampel pertama
print("\nIsi sampel pertama (y[0]):")
print(y[0])

# Melihat bentuk dari sampel pertama
print("\nShape dari y[0]:", y[0].shape)


Isi sampel pertama (y[0]):
[1 1 1 0 0 0 0 0 1]

Shape dari y[0]: (9,)


In [21]:
# Split data into training and test sets
X_trainval, X_test, y_trainval, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [22]:
print(X_trainval.shape)
print(y_trainval.shape)
print(X_test.shape)
print(y_test.shape)

(10368, 40, 768)
(10368, 9)
(2593, 40, 768)
(2593, 9)


In [23]:
class BiGRUModel(nn.Module):
    def __init__(self, units):
        super(BiGRUModel, self).__init__()
        self.gru = nn.GRU(input_size=768, hidden_size=units, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(units * 2, 9)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        _, h = self.gru(x)
        h_concat = torch.cat((h[0], h[1]), dim=1)
        out = self.fc(h_concat)
        return self.sigmoid(out)

def train_model(model, optimizer, criterion, train_loader, val_loader, epochs, device):
    model.to(device)
    for epoch in range(epochs):
        model.train()
        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)
            optimizer.zero_grad()
            preds = model(xb).squeeze()
            loss = criterion(preds, yb.float())
            loss.backward()
            optimizer.step()

    # Evaluation
    def evaluate(loader):
        model.eval()
        losses, accs = [], []
        with torch.no_grad():
            for xb, yb in loader:
                xb, yb = xb.to(device), yb.to(device)
                preds = model(xb).squeeze()
                loss = criterion(preds, yb.float())
                preds_binary = (preds > 0.5).float()
                acc = (preds_binary == yb).float().mean().item()
                losses.append(loss.item())
                accs.append(acc)
        return np.mean(losses), np.mean(accs)

    train_loss, train_acc = evaluate(train_loader)
    val_loss, val_acc = evaluate(val_loader)

    return train_loss, train_acc, val_loss, val_acc

In [24]:
# Convert training data to tensors
X_trainval_tensor = torch.tensor(X_trainval, dtype=torch.float32)
y_trainval_tensor = torch.tensor(y_trainval, dtype=torch.float32)

In [25]:
# Hyperparameter space
search_space = {
    'epochs': [20, 30, 40],
    'units': [10, 20, 30, 40, 50],
    'learning_rate': [5e-1, 1e-1, 1e-2],
    'batch_size': [128, 192, 256]
}

In [26]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
n_iterations = 5
results = []

kf = KFold(n_splits=5, shuffle=True, random_state=42)

print("Mulai Random Search Tuning...\n")

for i in range(n_iterations):
    params = {
        'epochs': random.choice(search_space['epochs']),
        'units': random.choice(search_space['units']),
        'learning_rate': random.choice(search_space['learning_rate']),
        'batch_size': random.choice(search_space['batch_size'])
    }

    fold_train_losses, fold_val_losses = [], []
    fold_train_accs, fold_val_accs = [], []

    print(f"\nIterasi {i+1}/{n_iterations} - Params: {params}")

    for fold, (train_index, val_index) in enumerate(kf.split(X_trainval_tensor), 1):
        X_train_fold = X_trainval_tensor[train_index]
        y_train_fold = y_trainval_tensor[train_index]
        X_val_fold = X_trainval_tensor[val_index]
        y_val_fold = y_trainval_tensor[val_index]

        train_loader = DataLoader(TensorDataset(X_train_fold, y_train_fold), batch_size=params['batch_size'], shuffle=True)
        val_loader = DataLoader(TensorDataset(X_val_fold, y_val_fold), batch_size=params['batch_size'])

        model = BiGRUModel(params['units'])
        optimizer = torch.optim.Adam(model.parameters(), lr=params['learning_rate'])
        criterion = nn.BCELoss()

        train_loss, train_acc, val_loss, val_acc = train_model(
            model, optimizer, criterion, train_loader, val_loader, params['epochs'], device)

        print(f"  Fold {fold} >> Train Acc: {train_acc:.4f} | Train Loss: {train_loss:.4f} | "
              f"Val Acc: {val_acc:.4f} | Val Loss: {val_loss:.4f}")

        fold_train_losses.append(train_loss)
        fold_train_accs.append(train_acc)
        fold_val_losses.append(val_loss)
        fold_val_accs.append(val_acc)

    avg_train_loss = np.mean(fold_train_losses)
    avg_train_acc = np.mean(fold_train_accs)
    avg_val_loss = np.mean(fold_val_losses)
    avg_val_acc = np.mean(fold_val_accs)

    print(f"  >> Avg Train Acc: {avg_train_acc:.4f} | Avg Train Loss: {avg_train_loss:.4f} | "
          f"Avg Val Acc: {avg_val_acc:.4f} | Avg Val Loss: {avg_val_loss:.4f}")

    results.append({
        'iteration': i+1,
        'params': params,
        'train_acc': avg_train_acc,
        'train_loss': avg_train_loss,
        'val_acc': avg_val_acc,
        'val_loss': avg_val_loss
    })

Mulai Random Search Tuning...


Iterasi 1/5 - Params: {'epochs': 20, 'units': 40, 'learning_rate': 0.1, 'batch_size': 192}
  Fold 1 >> Train Acc: 0.8621 | Train Loss: 0.3170 | Val Acc: 0.8567 | Val Loss: 0.3276
  Fold 2 >> Train Acc: 0.8673 | Train Loss: 0.3031 | Val Acc: 0.8589 | Val Loss: 0.3221
  Fold 3 >> Train Acc: 0.8596 | Train Loss: 0.3124 | Val Acc: 0.8485 | Val Loss: 0.3294
  Fold 4 >> Train Acc: 0.8725 | Train Loss: 0.2868 | Val Acc: 0.8625 | Val Loss: 0.3119
  Fold 5 >> Train Acc: 0.8771 | Train Loss: 0.2790 | Val Acc: 0.8744 | Val Loss: 0.2888
  >> Avg Train Acc: 0.8677 | Avg Train Loss: 0.2997 | Avg Val Acc: 0.8602 | Avg Val Loss: 0.3159

Iterasi 2/5 - Params: {'epochs': 20, 'units': 10, 'learning_rate': 0.5, 'batch_size': 128}
  Fold 1 >> Train Acc: 0.8242 | Train Loss: 0.4204 | Val Acc: 0.8208 | Val Loss: 0.4201
  Fold 2 >> Train Acc: 0.8220 | Train Loss: 0.4160 | Val Acc: 0.8190 | Val Loss: 0.4210
  Fold 3 >> Train Acc: 0.7977 | Train Loss: 0.4204 | Val Acc: 0.7975 | V

In [27]:
# Sorted best hyperparameters
results_sorted = sorted(results, key=lambda x: x['val_acc'], reverse=True)
print("\n5 Kombinasi Hyperparameter Terbaik berdasarkan Val Acc:")
for r in results_sorted[:5]:
    print(f"Iterasi {r['iteration']} - Val Acc: {r['val_acc']:.4f} - Params: {r['params']}")


5 Kombinasi Hyperparameter Terbaik berdasarkan Val Acc:
Iterasi 5 - Val Acc: 0.8613 - Params: {'epochs': 30, 'units': 30, 'learning_rate': 0.1, 'batch_size': 128}
Iterasi 1 - Val Acc: 0.8602 - Params: {'epochs': 20, 'units': 40, 'learning_rate': 0.1, 'batch_size': 192}
Iterasi 4 - Val Acc: 0.8569 - Params: {'epochs': 30, 'units': 20, 'learning_rate': 0.1, 'batch_size': 256}
Iterasi 2 - Val Acc: 0.8065 - Params: {'epochs': 20, 'units': 10, 'learning_rate': 0.5, 'batch_size': 128}
Iterasi 3 - Val Acc: 0.7861 - Params: {'epochs': 30, 'units': 50, 'learning_rate': 0.5, 'batch_size': 128}


In [28]:
# Retrain on full training set
best_params = results_sorted[0]['params']
print(f"\nMelatih ulang model dengan hyperparameter terbaik: {best_params}\n")
model = BiGRUModel(best_params['units'])
optimizer = torch.optim.Adam(model.parameters(), lr=best_params['learning_rate'])
criterion = nn.BCELoss()

train_loader = DataLoader(TensorDataset(X_trainval_tensor, y_trainval_tensor), batch_size=best_params['batch_size'], shuffle=True)

model.to(device)
for epoch in range(best_params['epochs']):
    model.train()
    epoch_losses = []
    epoch_accuracies = []
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        preds = model(xb).squeeze()
        loss = criterion(preds, yb.float())
        loss.backward()
        optimizer.step()

        epoch_losses.append(loss.item())
        preds_binary = (preds > 0.5).float()
        acc = (preds_binary == yb).float().mean().item()
        epoch_accuracies.append(acc)

    avg_loss = np.mean(epoch_losses)
    avg_acc = np.mean(epoch_accuracies)
    print(f"Epoch {epoch+1}/{best_params['epochs']} - Loss: {avg_loss:.4f} | Accuracy: {avg_acc:.4f}")


Melatih ulang model dengan hyperparameter terbaik: {'epochs': 30, 'units': 30, 'learning_rate': 0.1, 'batch_size': 128}

Epoch 1/30 - Loss: 0.3925 | Accuracy: 0.8231
Epoch 2/30 - Loss: 0.3514 | Accuracy: 0.8472
Epoch 3/30 - Loss: 0.3359 | Accuracy: 0.8537
Epoch 4/30 - Loss: 0.3327 | Accuracy: 0.8549
Epoch 5/30 - Loss: 0.3259 | Accuracy: 0.8556
Epoch 6/30 - Loss: 0.3324 | Accuracy: 0.8510
Epoch 7/30 - Loss: 0.3216 | Accuracy: 0.8585
Epoch 8/30 - Loss: 0.3145 | Accuracy: 0.8601
Epoch 9/30 - Loss: 0.3103 | Accuracy: 0.8634
Epoch 10/30 - Loss: 0.3107 | Accuracy: 0.8649
Epoch 11/30 - Loss: 0.3096 | Accuracy: 0.8656
Epoch 12/30 - Loss: 0.3152 | Accuracy: 0.8619
Epoch 13/30 - Loss: 0.3111 | Accuracy: 0.8634
Epoch 14/30 - Loss: 0.3105 | Accuracy: 0.8660
Epoch 15/30 - Loss: 0.3101 | Accuracy: 0.8636
Epoch 16/30 - Loss: 0.3036 | Accuracy: 0.8644
Epoch 17/30 - Loss: 0.3028 | Accuracy: 0.8657
Epoch 18/30 - Loss: 0.2994 | Accuracy: 0.8694
Epoch 19/30 - Loss: 0.3024 | Accuracy: 0.8667
Epoch 20/30 -

In [29]:
# Simpan model
torch.save(model, 'Bi-GRU.pt')

In [30]:
# Salin Bi-GRU ke google drive
import shutil
import os

def get_unique_filename(base_path, filename):
    """Mengembalikan nama file unik jika file sudah ada di base_path."""
    name, ext = os.path.splitext(filename)
    counter = 1
    new_filename = filename
    while os.path.exists(os.path.join(base_path, new_filename)):
        new_filename = f"{name}({counter}){ext}"
        counter += 1
    return os.path.join(base_path, new_filename)

# Path tujuan
destination_folder = "/content/drive/MyDrive/Colab Notebooks/Tugas Akhir"
destination_file = get_unique_filename(destination_folder, "Bi-GRU.pt")

# Salin file ke Google Drive
shutil.copy("Bi-GRU.pt", destination_file)

print(f"File berhasil disalin sebagai: {os.path.basename(destination_file)}")


File berhasil disalin sebagai: Bi-GRU(3).pt


In [31]:
# Convert test data to tensors for Evaluation
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)
test_loader = DataLoader(TensorDataset(X_test_tensor, y_test_tensor), batch_size=best_params['batch_size'])

In [32]:
# Evaluation
print("\nMengevaluasi model yang sudah dilatih...\n")
model.eval()
test_losses = []
test_accs = []
with torch.no_grad():
    for xb, yb in test_loader:
        xb, yb = xb.to(device), yb.to(device)
        preds = model(xb).squeeze()
        loss = criterion(preds, yb.float())
        test_losses.append(loss.item())
        preds_binary = (preds > 0.5).float()
        acc = (preds_binary == yb).float().mean().item()
        test_accs.append(acc)
mean_test_loss = np.mean(test_losses)
mean_test_acc = np.mean(test_accs)
print(f"Test Loss: {mean_test_loss:.4f}")
print(f"Test Accuracy: {mean_test_acc:.4f}")


Mengevaluasi model yang sudah dilatih...

Test Loss: 0.3087
Test Accuracy: 0.8698
