In [None]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.models as models
from tqdm import tqdm
from torch.utils.data import DataLoader, Dataset
from PIL import Image
from sklearn.model_selection import train_test_split

In [None]:
# Seed untuk reproducibility
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Parameter
batch_size = 16
img_size = (380, 380)
epochs = 50
learning_rate = 0.0001

In [None]:
# Fungsi untuk memuat dataset
def load_dataset(path, folders):
    data = []
    labels = []
    for idx, folder in enumerate(folders):
        folderpath = os.path.join(path, folder)
        for file in os.listdir(folderpath):
            data.append(os.path.join(folderpath, file))
            labels.append(idx)
    return pd.DataFrame({'imgpath': data, 'labels': labels})

In [None]:
# Path dataset
path = '/kaggle/input/gamblingsitesid-img2/gamblingsitesid'
folders = ['judi_resized', 'non-judi_resized']

# Load dataset
dataset = load_dataset(path, folders)

In [None]:
# Membagi dataset menjadi train, validation, dan test
train_df, temp_df = train_test_split(dataset, train_size=0.8, stratify=dataset['labels'], random_state=seed)
val_df, test_df = train_test_split(temp_df, train_size=0.5, stratify=temp_df['labels'], random_state=seed)

In [None]:
# Transformasi data
transform = transforms.Compose([
    transforms.Resize(img_size),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [None]:
# Dataset PyTorch
class ImageDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_path = self.dataframe.iloc[idx]['imgpath']
        label = self.dataframe.iloc[idx]['labels']
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, label

train_data = ImageDataset(train_df, transform=transform)
val_data = ImageDataset(val_df, transform=transform)
test_data = ImageDataset(test_df, transform=transform)

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

In [None]:
# Fungsi untuk membangun model
def build_model(freeze_base=False, use_pretrained=True):
    weights = models.EfficientNet_B4_Weights.DEFAULT if use_pretrained else None
    model = models.efficientnet_b4(weights=weights)

    if freeze_base:
        for param in model.features.parameters():
            param.requires_grad = False
    
    model.classifier = nn.Sequential(
        nn.Linear(model.classifier[1].in_features, 512),
        nn.ReLU(),
        nn.Dropout(0.3),
        nn.Linear(512, 1),
        nn.Sigmoid()
    )
    return model.to(device)

In [None]:
def train_model(model, train_loader, val_loader, epochs=50, lr=0.0001, patience=5):
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    best_acc = 0
    counter = 0  # Early stopping counter
    history = {'loss': [], 'val_loss': [], 'accuracy': [], 'val_accuracy': []}

    for epoch in range(epochs):
        model.train()
        running_loss, correct, total = 0.0, 0, 0

        # Progress bar untuk training
        train_loader_tqdm = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}", leave=True)
        
        for images, labels in train_loader_tqdm:
            images, labels = images.to(device), labels.float().to(device)
            optimizer.zero_grad()
            outputs = model(images).squeeze(1)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            predicted = (outputs > 0.5).float()
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

            # Update progress bar
            train_loader_tqdm.set_postfix(loss=loss.item(), acc=correct/total)
        
        train_loss = running_loss / len(train_loader)
        train_acc = correct / total

        # Evaluasi di validation set
        model.eval()
        val_loss, correct, total = 0.0, 0, 0

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.float().to(device)
                outputs = model(images).squeeze(1)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                predicted = (outputs > 0.5).float()
                correct += (predicted == labels).sum().item()
                total += labels.size(0)
        
        val_loss /= len(val_loader)
        val_acc = correct / total

        # Tampilkan hasil epoch
        print(f"Epoch {epoch+1}/{epochs} | Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f} | Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")

        # # Simpan model terbaik
        # if val_acc > best_acc:
        #     best_acc = val_acc
        #     torch.save(model.state_dict(), 'best_model.pth')
        #     counter = 0  # Reset counter karena ada peningkatan akurasi validasi
        # else:
        #     counter += 1  # Tambah counter jika akurasi validasi tidak meningkat
        #     print(f"Early Stopping Counter: {counter}/{patience}")

        # # Cek apakah training harus dihentikan
        # if counter >= patience:
        #     print("Early stopping triggered. Training stopped.")
        #     break
        # Simpan model terbaik berdasarkan val_loss terkecil
        if val_loss < best_loss:
            best_loss = val_loss
            torch.save(model.state_dict(), 'best_model.pth')
            counter = 0  # Reset counter karena ada peningkatan performa validasi
        else:
            counter += 1  # Tambah counter jika val_loss tidak berkurang
            print(f"Early Stopping Counter: {counter}/{patience}")
        
        # Cek apakah training harus dihentikan
        if counter >= patience:
            print("Early stopping triggered. Training stopped.")
            break
            
        history['loss'].append(train_loss)
        history['accuracy'].append(train_acc)
        history['val_loss'].append(val_loss)
        history['val_accuracy'].append(val_acc)

    print(f"Best Validation Accuracy: {best_acc:.4f}")

In [None]:
# Fungsi untuk evaluasi
def evaluate_model(model, test_loader):
    model.load_state_dict(torch.load('best_model.pth'))
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.float().to(device)
            outputs = model(images).squeeze(1)
            predicted = (outputs > 0.5).float()
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
    print(f"Test Accuracy: {correct / total:.4f}")

In [None]:
# Eksperimen 1: Tanpa pretrained model
print("Eksperimen 1: Tanpa Pretrained Model")
model_no_pretrain = build_model(use_pretrained=False)
train_model(model_no_pretrain, train_loader, val_loader)
evaluate_model(model_no_pretrain, test_loader)

In [None]:
# Eksperimen 2: Fine-tuning hanya fully connected layer
print("\nEksperimen 2: Fine-tuning Fully Connected Layer")
model_freeze_base = build_model(freeze_base=True, use_pretrained=True)
train_model(model_freeze_base, train_loader, val_loader)
evaluate_model(model_freeze_base, test_loader)

In [None]:
# Eksperimen 3: Fine-tuning seluruh jaringan
print("\nEksperimen 3: Fine-tuning Seluruh Jaringan")
model_fine_tune_all = build_model(freeze_base=False, use_pretrained=True)
train_model(model_fine_tune_all, train_loader, val_loader)
evaluate_model(model_fine_tune_all, test_loader)

In [None]:
import matplotlib.pyplot as plt

# Fungsi untuk plotting hasil training
def plot_training_history(history, title):
    fig, ax = plt.subplots(1, 2, figsize=(15, 5))
    
    # Plot accuracy
    ax[0].plot(history.history['accuracy'], label='Train Accuracy')
    ax[0].plot(history.history['val_accuracy'], label='Validation Accuracy')
    ax[0].set_title(f'{title} - Accuracy')
    ax[0].set_xlabel('Epoch')
    ax[0].set_ylabel('Accuracy')
    ax[0].legend()
    ax[0].grid(alpha=0.2)
    
    # Plot loss
    ax[1].plot(history.history['loss'], label='Train Loss')
    ax[1].plot(history.history['val_loss'], label='Validation Loss')
    ax[1].set_title(f'{title} - Loss')
    ax[1].set_xlabel('Epoch')
    ax[1].set_ylabel('Loss')
    ax[1].legend()
    ax[1].grid(alpha=0.2)
    
    plt.show()

# Plot hasil eksperimen 1: Tanpa pretrained model
plot_training_history(history_no_pretrain, "No Pretrained Model")

# Plot hasil eksperimen 2: Fine-tuning hanya fully connected layer
plot_training_history(history_freeze_base, "Freeze Base Model")

# Plot hasil eksperimen 3: Fine-tuning seluruh jaringan
plot_training_history(history_fine_tune_all, "Fine-tune All Layers")