## MODEL PREPROCESSING

In [None]:
import pandas as pd
from PIL import Image
import random
import numpy as np
import cv2 as cv
import matplotlib.pyplot as plt
import os



file_path = "./data/gray_data.csv"

gray_image_path = "./"

# gray_image_path = "/content/drive/MyDrive/datacompvis/grayimage/"
# file_path = "/content/drive/MyDrive/datacompvis/gray_data.csv"
df = pd.read_csv(file_path)

def flip(image, orientation=1):
    if image is None:
        raise ValueError("Image must not be None")
    return cv.flip(image, orientation)

def rotate(image, angle=15):
    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)
    M = cv.getRotationMatrix2D(center, angle, 1.0)
    return cv.warpAffine(image, M, (w, h))

def add_noise(image):
    noise = np.random.normal(0, 10, image.shape).astype(np.uint8)
    return cv.add(image, noise)

def brighten(image, value=30):
    return cv.convertScaleAbs(image, alpha=1, beta=value)

def resize(np_img, w = 512, h = 512):
    return cv.resize(np_img, dsize=(w, h), interpolation=cv.INTER_LINEAR)

def Proccess(df = df, idx = None, biar_rapih = False):
    if idx == None:
        idx = random.randint(0, len(df['file_path'].tolist()))

    sample_image_name = df['file_path'].tolist()[idx]
    sample_image_path = os.path.join(gray_image_path, sample_image_name)
    score = df['score'].tolist()[idx]

    img_np = np.array(Image.open(sample_image_path))

    if not biar_rapih:
        # gausah didebug ini [biar apa biarin | pusing gw njir ngeliatnya]
        print(f"image idx: {idx} | score: {score} | filename: {sample_image_name}")
        print(f"image size: {img_np.shape}")

    blank = np.zeros(shape=img_np.shape, dtype='uint8')

    gaussBlur = cv.GaussianBlur(img_np, (175, 175), 0.3)
    canny = cv.Canny(gaussBlur, 100, 100)

    _, tresh = cv.threshold(img_np, 125, 255, cv.THRESH_BINARY)
    _, blurtresh = cv.threshold(gaussBlur, 125, 255, cv.THRESH_BINARY)

    contours, _ = cv.findContours(canny, cv.RETR_LIST, cv.CHAIN_APPROX_SIMPLE)
    cv.drawContours(blank, contours, -1, 255, 1)

    images = [img_np, gaussBlur, canny, tresh, blank]
    titles = ['Gray', 'Gaussian Blur', 'Canny', 'Threshold', 'Contours Drawn']

    return images, titles

def ShowImage(images, titles):

    plt.figure(figsize=(15, 8))

    for i in range(len(images)):
        plt.subplot(2, 3, i + 1)  # 2 baris, 3 kolom
        if len(images[i].shape) == 2:  # grayscale
            plt.imshow(images[i], cmap='gray')
        else:  # RGB image
            plt.imshow(cv.cvtColor(images[i], cv.COLOR_BGR2RGB))
        plt.title(titles[i])
        plt.axis('off')

    plt.tight_layout()
    plt.show()

## Splitting Before Augmenting

### Data Splitting

In [None]:
from torch.utils.data import random_split

# Jumlah data
total_samples = len(df)
train_size = int(0.75 * total_samples)
val_size = total_samples - train_size

# Random split DataFrame
train_df, val_df = random_split(df, [train_size, val_size])

# Ambil indeks dari Subset
train_indices = train_df.indices if hasattr(train_df, 'indices') else train_df
val_indices = val_df.indices if hasattr(val_df, 'indices') else val_df

# Buat list file path dan label dari df
file_paths = df['file_path'].tolist()
scores = df['score'].tolist()


### Data Augmentation

In [None]:
# Inisialisasi array kosong
gray_np = []
y_actual = []

# Fungsi augmentasi sederhana
def augment_pipeline(image):
    aug_images = [
        image,
        flip(image),
        flip(image, -1),
        add_noise(image),
        rotate(image, angle=15),
        rotate(image, angle=30),
        rotate(image, angle=60),
    ]
    return aug_images

# LOOP: TRAIN SPLIT DULU
for i in train_indices:
    image_path = os.path.join(gray_image_path, file_paths[i])
    image = resize(np.array(Image.open(image_path)))
    print(image.shape)
    label = scores[i]

    for aug_img in augment_pipeline(image):
        gray_np.append(aug_img)
        y_actual.append(label)

# VAL SET: tanpa augmentasi
val_np = []
val_labels = []

for i in val_indices:
    image_path = os.path.join(gray_image_path, file_paths[i])
    image = resize(np.array(Image.open(image_path)))
    print(image.shape)
    val_np.append(image)
    val_labels.append(scores[i])

# Convert to NumPy
gray_np = np.array(gray_np)
val_np = np.array(val_np)

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image

def show_augmented_images_3x3(image):
    # Konversi ke grayscale
    grayscale_image = Image.fromarray(image).convert('L')
    gray_np = np.array(grayscale_image)

    # Daftar augmentasi (tanpa brighten)
    aug_images = [
        gray_np,                    # original grayscale
        flip(gray_np),
        flip(gray_np, -1),
        add_noise(gray_np),
        rotate(gray_np, 15),
        rotate(gray_np, 30),
        rotate(gray_np, 60),                   # duplikat untuk mengisi grid ke-9
    ]

    # Tampilkan dalam grid 3x3
    fig, axs = plt.subplots(2, 4, figsize=(12, 6))
    for i, ax in enumerate(axs.flat):
        if i < len(aug_images):  # Hanya tampilkan gambar sampai 7
            ax.imshow(aug_images[i], cmap='gray')
            # ax.set_title(f"Augment {i+1}")
        ax.axis('off')
    plt.tight_layout()
    plt.show()


In [None]:
# Ambil satu gambar sebagai contoh
image_path = 'data\datasets\\5.png'
image = resize(np.array(Image.open(image_path)))  # Pastikan fungsi resize sudah ada
show_augmented_images_3x3(image)

In [None]:
print(gray_np.shape)
print(len(y_actual))

In [None]:
print(val_np.shape)
print(len(val_labels))

## Processing Data

In [None]:
max_w = max(df['width'].tolist())
min_w = min(df['width'].tolist())

max_h = max(df['height'].tolist())
min_h = min(df['height'].tolist())

print(f"max-width: {max_w} | min-width: {min_w} | max-height: {max_h} | min-height: {min_h}")

In [None]:
for i in range(3):
    images, titles = Proccess()
    ShowImage(images, titles)

In [None]:
print(len(gray_np), len(y_actual))

In [None]:
import torch
from torch.utils.data import Dataset

class CannyContourDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx].astype(np.float32) / 255.0  # normalize ke [0, 1]
        image = np.expand_dims(image, axis=0)  # tambahkan channel dimensi [1, H, W]
        label = self.labels[idx]

        image = torch.tensor(image, dtype=torch.float32)
        label = torch.tensor(label, dtype=torch.long)

        return image, label

In [None]:
from torch.utils.data import DataLoader, random_split

train_dataset = CannyContourDataset(gray_np, y_actual)
val_dataset = CannyContourDataset(val_np, val_labels)
# train_size = int(0.75 * len(dataset))
# val_size = len(dataset) - train_size

# train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=32)

print(len(train_dataset), train_size, val_size)


In [None]:
import torch.nn as nn
import torch.nn.functional as F

class SimpleCNN(nn.Module):
    def __init__(self, num_classes=5):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, 3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
        self.conv3 = nn.Conv2d(32, 64, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)

        self.fc1 = nn.Linear(64 * 64 * 512, 512)
        # self.fc1 = nn.Linear(32 * 128 * 128, 128)
        self.fc2 = nn.Linear(512, 1024)
        self.fc3 = nn.Linear(1024, 512)
        self.fc4 = nn.Linear(512, 526)
        self.fc5 = nn.Linear(526, 128)
        self.fc6 = nn.Linear(128, 64)
        self.fc7 = nn.Linear(64, 32)
        self.fc8 = nn.Linear(32, num_classes)

        # Tambahkan dropout dengan probabilitas 0.5 (bisa disesuaikan)
        self.dropout = nn.Dropout(p=0.5)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))  # (16, 256, 128)
        x = self.pool(F.relu(self.conv2(x)))  # (32, 128, 64)
        x = self.pool(F.relu(self.conv3(x)))  # (64, 64, 32)
        x = x.view(x.size(0), -1)             # flatten to (B, 131072)

        x = self.dropout(F.relu(self.fc1(x)))
        x = self.dropout(F.relu(self.fc2(x)))
        x = self.dropout(F.relu(self.fc3(x)))
        x = F.relu(self.fc4(x))
        x = F.relu(self.fc5(x))
        x = F.relu(self.fc6(x))
        x = F.relu(self.fc7(x))
        x = self.fc8(x)
        return x


In [None]:
import numpy as np
from collections import Counter
import torch

# Hitung jumlah tiap kelas
label_counts = Counter(y_actual)
total_samples = sum(label_counts.values())

# Hitung bobot kelas: total / (jumlah kelas * count per class)
num_classes = 5
class_weights = []

for i in range(num_classes):
    count = label_counts.get(i, 1)  # supaya tidak divide by zero
    weight = total_samples / (num_classes * count)
    class_weights.append(weight)


print("Class Weights:", class_weights)

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class SimpleCNN(nn.Module):
    def __init__(self, num_classes=5):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, 3, padding=1)
        self.bn1 = nn.BatchNorm2d(16)

        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
        self.bn2 = nn.BatchNorm2d(32)

        self.pool = nn.MaxPool2d(2, 2)

        self.fc1 = nn.Linear(524288, 512)
        self.bn3 = nn.BatchNorm1d(512)

        self.fc2 = nn.Linear(512, 512)
        self.bn4 = nn.BatchNorm1d(512)

        self.fc3 = nn.Linear(512, 256)
        self.bn5 = nn.BatchNorm1d(256)

        self.fc4 = nn.Linear(256, 128)
        self.bn6 = nn.BatchNorm1d(128)

        self.fc5 = nn.Linear(128, 64)
        self.bn7 = nn.BatchNorm1d(64)

        self.fc6 = nn.Linear(64, 32)
        self.bn8 = nn.BatchNorm1d(32)

        self.fc7 = nn.Linear(32, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))  # (16, H/2, W/2)
        x = self.pool(F.sigmoid(self.bn2(self.conv2(x))))  # (32, H/4, W/4)

        x = x.view(x.size(0), -1)

        x = F.relu(self.bn3(self.fc1(x)))
        x = F.relu(self.bn4(self.fc2(x)))
        x = F.relu(self.bn5(self.fc3(x)))
        x = F.relu(self.bn6(self.fc4(x)))
        x = F.relu(self.bn7(self.fc5(x)))
        x = F.relu(self.bn8(self.fc6(x)))

        x = self.fc7(x)
        return x


In [None]:
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
model = SimpleCNN().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=3, factor=0.5, verbose=True)

In [None]:
print(images.shape)

In [None]:
import torch
import torch.nn as nn

# Hyperparameter Early Stopping
patience = 10
best_val_acc = 0.0
counter = 0

for epoch in range(100):  # ubah jumlah epoch sesuai kebutuhan
    model.train()
    running_loss = 0.0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    avg_train_loss = running_loss / len(train_loader)
    print("=======================================================")
    print(f"Epoch {epoch+1}, Loss: {avg_train_loss:.4f}")

    # VALIDASI
    model.eval()
    val_loss = 0.0
    correct = 0  # FIXED: Reset setiap epoch
    total = 0

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    avg_val_loss = val_loss / len(val_loader)
    scheduler.step(val_loss)  # <-- di sini tempatnya
    val_acc = 100 * correct / total
    print(f"Validation Loss: {avg_val_loss:.4f}, Accuracy: {val_acc:.2f}%")

    # EARLY STOPPING
    if epoch > 10:
      if val_acc > best_val_acc:
          best_val_acc = val_acc
          counter = 0
          torch.save(model.state_dict(), 'best_model_hafidh_ver.pt')
          print("Validation accuracy improved. Model saved.")
      else:
          counter += 1
          print(f"No improvement. EarlyStopping counter: {counter}/{patience}")
          if counter >= patience:
              print("Early stopping triggered.")
              break

In [None]:
loaded_model = torch.load('best_model_hafidh_ver.pt')
model.load_state_dict(loaded_model)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns

all_labels = []
all_predictions = []
class_names = ['0', '1', '2', '3' ,'4']

model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        all_labels.extend(labels.cpu().numpy())
        all_predictions.extend(predicted.cpu().numpy())

cm = confusion_matrix(all_labels, all_predictions)
print(f"Akurasi Validasi: {100 * correct / total:.2f}%")
plt.figure(figsize=(8,6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=range(cm.shape[1]), yticklabels=range(cm.shape[0]))
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show

#classification report
print("\nClassification Report :")
print(classification_report(all_labels, all_predictions, target_names=class_names))