# PRESENSI MAHASISWA DENGAN FACE RECOGNITION

In [10]:
import os
import time
import copy
import random

import numpy as np
import torch
import torch.nn as nn
from torch.optim import AdamW
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models

from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns

In [11]:
# ==========================
# KONFIGURASI
# ==========================

DATASET_DIR = "dataset/split"   # berisi train/ dan val/
TRAIN_DIR = os.path.join(DATASET_DIR, "train")
VAL_DIR = os.path.join(DATASET_DIR, "val")

BATCH_SIZE = 16
NUM_EPOCHS = 50
LEARNING_RATE = 1e-4
WEIGHT_DECAY = 1e-4
INPUT_SIZE = 224   # harus sama dengan ukuran crop wajah

NUM_WORKERS = 0    # bisa dinaikkan kalau CPU kamu kuat
FINE_TUNE_MODE = "none"  # opsi: "none", "partial", "full"
SEED = 42
MODEL_NAME = "efficientnet_b1.pth"

In [12]:
# ==========================
# UTILITAS
# ==========================

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


def ensure_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)

In [13]:
# ==========================
# DATASET & DATALOADER
# ==========================

def get_dataloaders(input_size=224, batch_size=16, num_workers=2):
    # Augmentasi untuk train
    train_transform = transforms.Compose([
        transforms.Resize((input_size, input_size)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomRotation(degrees=10),
        transforms.ColorJitter(brightness=0.2, contrast=0.2),
        transforms.ToTensor(),
        transforms.Normalize(
            mean=[0.485, 0.456, 0.406],  # ImageNet mean
            std=[0.229, 0.224, 0.225]    # ImageNet std
        ),
    ])

    # Transform untuk val (tanpa augmentasi)
    val_transform = transforms.Compose([
        transforms.Resize((input_size, input_size)),
        transforms.ToTensor(),
        transforms.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225]
        ),
    ])

    train_dataset = datasets.ImageFolder(TRAIN_DIR, transform=train_transform)
    val_dataset = datasets.ImageFolder(VAL_DIR, transform=val_transform)

    train_loader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=num_workers,
        pin_memory=True
    )

    val_loader = DataLoader(
        val_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers,
        pin_memory=True
    )

    class_names = train_dataset.classes  # list nama folder kelas (NIM)
    return train_loader, val_loader, class_names

In [14]:
# ==========================
# EfficientNet-B1 pre-trained
# ==========================

def build_model(num_classes):
    try:
        weights = models.EfficientNet_B1_Weights.IMAGENET1K_V1
        model = models.efficientnet_b1(weights=weights)
    except:
        model = models.efficientnet_b1(pretrained=True)

    # ------- 1) Set semua layer awalnya frozen dulu -------
    for param in model.parameters():
        param.requires_grad = False

    # ------- 2) Atur mode fine-tuning -------
    mode = FINE_TUNE_MODE.lower()

    if mode == "none":
        # NO fine-tuning:
        # semua layer EfficientNet tetap frozen,
        # hanya classifier yang nanti akan dilatih.
        print("[INFO] Fine-tuning mode: NONE (hanya classifier yang dilatih)")

    elif mode == "partial":
        # PARTIAL fine-tuning:
        # buka beberapa layer terakhir supaya bisa adaptasi ke domain wajah
        print("[INFO] Fine-tuning mode: PARTIAL (unfreeze block terakhir)")
        for name, param in model.named_parameters():
            # Block fitur terakhir EfficientNet-B1 biasanya di "features.7" & "features.8"
            if "features.7" in name or "features.8" in name:
                param.requires_grad = True

    elif mode == "full":
        # FULL fine-tuning:
        # semua parameter boleh di-update
        print("[INFO] Fine-tuning mode: FULL (semua layer dilatih)")
        for param in model.parameters():
            param.requires_grad = True

    else:
        raise ValueError(f"FINE_TUNE_MODE tidak dikenal: {FINE_TUNE_MODE}. Gunakan 'none', 'partial', atau 'full'.")

    # ------- 3) Ganti classifier terakhir agar output = num_classes -------
    in_features = model.classifier[1].in_features
    model.classifier[1] = nn.Linear(in_features, num_classes)

    return model


In [15]:
def train_one_epoch(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    running_corrects = 0
    n_samples = 0

    for inputs, labels in dataloader:
        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)

        _, preds = torch.max(outputs, 1)

        loss.backward()
        optimizer.step()

        batch_size = inputs.size(0)
        running_loss += loss.item() * batch_size
        running_corrects += torch.sum(preds == labels).item()
        n_samples += batch_size

    epoch_loss = running_loss / n_samples
    epoch_acc = running_corrects / n_samples

    return epoch_loss, epoch_acc


def evaluate(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    running_corrects = 0
    n_samples = 0

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            _, preds = torch.max(outputs, 1)

            batch_size = inputs.size(0)
            running_loss += loss.item() * batch_size
            running_corrects += torch.sum(preds == labels).item()
            n_samples += batch_size

    epoch_loss = running_loss / n_samples
    epoch_acc = running_corrects / n_samples

    return epoch_loss, epoch_acc

In [16]:
# ==========================
# VISUALISASI TRAINING
# ==========================

def plot_training_curves(history, output_dir="models"):
    epochs = range(1, len(history["train_loss"]) + 1)

    # Plot Loss
    plt.figure(figsize=(8, 6))
    plt.plot(epochs, history["train_loss"], label="Train Loss")
    plt.plot(epochs, history["val_loss"], label="Val Loss")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title("Training & Validation Loss")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    loss_path = os.path.join(output_dir, "loss_curve.png")
    plt.savefig(loss_path)
    plt.close()
    print(f"Grafik loss disimpan ke: {loss_path}")

    # Plot Accuracy
    plt.figure(figsize=(8, 6))
    plt.plot(epochs, history["train_acc"], label="Train Acc")
    plt.plot(epochs, history["val_acc"], label="Val Acc")
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    plt.title("Training & Validation Accuracy")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    acc_path = os.path.join(output_dir, "accuracy_curve.png")
    plt.savefig(acc_path)
    plt.close()
    print(f"Grafik akurasi disimpan ke: {acc_path}")

In [17]:
# ==========================
# EVALUASI LENGKAP (CM + F1)
# ==========================

def evaluate_full_metrics(model, dataloader, class_names, device, output_dir="models"):
    model.eval()
    all_labels = []
    all_preds = []

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())

    all_labels = np.array(all_labels)
    all_preds = np.array(all_preds)

    # Confusion matrix
    cm = confusion_matrix(all_labels, all_preds)
    print("\n=== Confusion Matrix (angka) ===")
    print(cm)

    # Visualisasi confusion matrix
    plt.figure(figsize=(10, 8))
    sns.heatmap(
        cm,
        annot=True,
        fmt="d",
        cmap="Blues",
        xticklabels=class_names,
        yticklabels=class_names
    )
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.title("Confusion Matrix")
    plt.tight_layout()

    cm_path = os.path.join(output_dir, "confusion_matrix.png")
    plt.savefig(cm_path)
    plt.close()
    print(f"Confusion matrix disimpan ke: {cm_path}")

    # Metrics lainnya (precision, recall, F1, support, accuracy)
    print("\n=== Classification Report (Precision, Recall, F1) ===")
    report = classification_report(all_labels, all_preds, target_names=class_names)
    print(report)

    # Simpan juga ke file .txt
    report_path = os.path.join(output_dir, "classification_report.txt")
    with open(report_path, "w") as f:
        f.write(report)
    print(f"Classification report disimpan ke: {report_path}")

In [18]:
# ==========================
# VISUALISASI TRAINING
# ==========================

def plot_training_curves(history, output_dir="models"):
    epochs = range(1, len(history["train_loss"]) + 1)

    # Plot Loss
    plt.figure(figsize=(8, 6))
    plt.plot(epochs, history["train_loss"], label="Train Loss")
    plt.plot(epochs, history["val_loss"], label="Val Loss")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title("Training & Validation Loss")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    loss_path = os.path.join(output_dir, "loss_curve.png")
    plt.savefig(loss_path)
    plt.close()
    print(f"Grafik loss disimpan ke: {loss_path}")

    # Plot Accuracy
    plt.figure(figsize=(8, 6))
    plt.plot(epochs, history["train_acc"], label="Train Acc")
    plt.plot(epochs, history["val_acc"], label="Val Acc")
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    plt.title("Training & Validation Accuracy")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    acc_path = os.path.join(output_dir, "accuracy_curve.png")
    plt.savefig(acc_path)
    plt.close()
    print(f"Grafik akurasi disimpan ke: {acc_path}")

In [19]:
# ==========================
# MAIN TRAINING LOOP
# ==========================

def main():
    set_seed(SEED)

    device = "cpu"

    # Pastikan folder dataset ada
    if not os.path.isdir(TRAIN_DIR) or not os.path.isdir(VAL_DIR):
        print(f"Folder train/val tidak ditemukan di: {DATASET_DIR}")
        return

    train_loader, val_loader, class_names = get_dataloaders(
        input_size=INPUT_SIZE,
        batch_size=BATCH_SIZE,
        num_workers=NUM_WORKERS
    )

    num_classes = len(class_names)
    print(f"Jumlah kelas (NIM): {num_classes}")
    print("Daftar kelas:", class_names)

    model = build_model(num_classes=num_classes)
    model = model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = AdamW(
        model.parameters(),
        lr=LEARNING_RATE,
        weight_decay=WEIGHT_DECAY
    )

    # Scheduler: turunkan LR setiap beberapa epoch
    scheduler = torch.optim.lr_scheduler.StepLR(
        optimizer,
        step_size=10,
        gamma=0.1
    )

    best_model_wts = copy.deepcopy(model.state_dict())
    best_val_acc = 0.0

    history = {
        "train_loss": [],
        "train_acc": [],
        "val_loss": [],
        "val_acc": []
    }

    start_time = time.time()

    for epoch in range(NUM_EPOCHS):
        print(f"\nEpoch {epoch+1}/{NUM_EPOCHS}")
        print("-" * 40)

        train_loss, train_acc = train_one_epoch(
            model, train_loader, criterion, optimizer, device
        )
        val_loss, val_acc = evaluate(
            model, val_loader, criterion, device
        )

        scheduler.step()

        history["train_loss"].append(train_loss)
        history["train_acc"].append(train_acc)
        history["val_loss"].append(val_loss)
        history["val_acc"].append(val_acc)

        print(f"Train Loss: {train_loss:.4f}  |  Train Acc: {train_acc:.4f}")
        print(f"Val   Loss: {val_loss:.4f}  |  Val   Acc: {val_acc:.4f}")

        # Simpan model terbaik berdasarkan val_acc
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_model_wts = copy.deepcopy(model.state_dict())
            print(f"==> Model terbaik diperbarui (val_acc = {best_val_acc:.4f})")

    total_time = time.time() - start_time
    print("\n===============================")
    print("Training selesai")
    print(f"Waktu total: {total_time/60:.2f} menit")
    print(f"Akurasi validasi terbaik: {best_val_acc:.4f}")
    print("===============================")

    # Load weight terbaik
    model.load_state_dict(best_model_wts)

    # Pastikan folder models ada
    ensure_dir("models")

    # Visualisasi kurva training
    print("\nMembuat grafik training (loss & accuracy)...")
    plot_training_curves(history, output_dir="models")

    # Evaluasi lengkap di data validasi (confusion matrix + F1, precision, recall)
    print("\nMelakukan evaluasi lengkap pada data validasi...")
    evaluate_full_metrics(model, val_loader, class_names, device, output_dir="models")

    # Simpan model
    model_path = os.path.join("models", MODEL_NAME)
    torch.save({
        "model_state_dict": model.state_dict(),
        "class_names": class_names
    }, model_path)
    print(f"\nModel terbaik disimpan ke: {model_path}")


if __name__ == "__main__":
    main()

Jumlah kelas (NIM): 70
Daftar kelas: ['119140141', '120140156', '121140135', '122140001', '122140005', '122140006', '122140008', '122140009', '122140010', '122140012', '122140016', '122140018', '122140027', '122140038', '122140039', '122140043', '122140055', '122140056', '122140076', '122140077', '122140087', '122140095', '122140098', '122140100', '122140101', '122140103', '122140104', '122140107', '122140116', '122140117', '122140118', '122140119', '122140122', '122140127', '122140129', '122140130', '122140132', '122140134', '122140135', '122140137', '122140138', '122140140', '122140141', '122140144', '122140145', '122140150', '122140152', '122140153', '122140155', '122140156', '122140160', '122140163', '122140164', '122140165', '122140169', '122140170', '122140171', '122140172', '122140173', '122140182', '122140187', '122140198', '122140202', '122140207', '122140208', '122140209', '122140219', '122140222', '122140236', '122140239']
[INFO] Fine-tuning mode: NONE (hanya classifier yang

  type_true = type_of_target(y_true, input_name="y_true")
  type_pred = type_of_target(y_pred, input_name="y_pred")
  ys_types = set(type_of_target(x) for x in ys)
  ys_types = set(type_of_target(x) for x in ys)



=== Confusion Matrix (angka) ===
[[0 0 0 ... 0 0 0]
 [0 1 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 1 0]
 [0 0 0 ... 0 0 0]]
Confusion matrix disimpan ke: models\confusion_matrix.png

=== Classification Report (Precision, Recall, F1) ===
              precision    recall  f1-score   support

   119140141       0.00      0.00      0.00         1
   120140156       1.00      1.00      1.00         1
   121140135       0.00      0.00      0.00         1
   122140001       0.00      0.00      0.00         1
   122140005       0.00      0.00      0.00         1
   122140006       0.00      0.00      0.00         1
   122140008       0.00      0.00      0.00         1
   122140009       0.00      0.00      0.00         1
   122140010       0.00      0.00      0.00         1
   122140012       1.00      1.00      1.00         1
   122140016       0.00      0.00      0.00         1
   122140018       1.00      1.00      1.00         1
   122140027       0.00      0

  type_true = type_of_target(y_true, input_name="y_true")
  type_pred = type_of_target(y_pred, input_name="y_pred")
  ys_types = set(type_of_target(x) for x in ys)
  ys_types = set(type_of_target(x) for x in ys)
  type_true = type_of_target(y_true, input_name="y_true")
  type_pred = type_of_target(y_pred, input_name="y_pred")
  ys_types = set(type_of_target(x) for x in ys)
  ys_types = set(type_of_target(x) for x in ys)
  type_true = type_of_target(y_true, input_name="y_true")
  type_pred = type_of_target(y_pred, input_name="y_pred")
  ys_types = set(type_of_target(x) for x in ys)
  ys_types = set(type_of_target(x) for x in ys)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  type_true = type_of_target(y_true, input_name="y_true")
  type_pred = type_of_target(y_pred, input_name="y_pred")
  ys_types = set(type_of_target(x) for x in ys)
  ys_types = set(type_of_target(x) for x in ys)
  type_true = type_of_target(y_true, input_name="y_true")
  type_pred = typ