<a href="https://colab.research.google.com/github/azmiak/Kuliah-Deep-Learning/blob/main/UTS%20/%20FishImgDataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.preprocessing.image import DirectoryIterator
from tensorflow.keras import layers, models
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.utils import Sequence
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, roc_curve, auc
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from collections import Counter
import cv2
from google.colab import drive
import zipfile

In [3]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Path to zip file
train_zip_path = '/content/drive/MyDrive/FishImgDataset-20250503T153940Z-001.zip/FishImgDataset/train'
val_zip_path = '/content/drive/MyDrive/FishImgDataset-20250503T153940Z-001.zip/FishImgDataset/val'
test_zip_path = '/content/drive/MyDrive/FishImgDataset-20250503T153940Z-001.zip/FishImgDataset/test'

# Extract zip to temporary folder
train_dir = '/content/train'
val_dir = '/content/val'
test_dir = '/content/test'

with zipfile.ZipFile(train_zip_path, 'r') as zip_ref:
    zip_ref.extractall(train_dir)
with zipfile.ZipFile(val_zip_path, 'r') as zip_ref:
    zip_ref.extractall(val_dir)
with zipfile.ZipFile(test_zip_path, 'r') as zip_ref:
    zip_ref.extractall(test_dir)

# Build DataFrame
def create_dataframe_from_dir(directory):
    image_paths = []
    labels = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith(('.png', '.jpg', '.jpeg')):
                path = os.path.join(root, file)
                label = os.path.basename(root)
                image_paths.append(path)
                labels.append(label)
    return pd.DataFrame({'filepath': image_paths, 'label': labels})

train_df = create_dataframe_from_dir(train_dir)
val_df = create_dataframe_from_dir(val_dir)
test_df = create_dataframe_from_dir(test_dir)

print(f"Train: {train_df.shape}")
print(f"Val: {val_df.shape}")
print(f"Test: {test_df.shape}")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


NotADirectoryError: [Errno 20] Not a directory: '/content/drive/MyDrive/FishImgDataset-20250503T153940Z-001.zip/FishImgDataset/train'

In [None]:
# Label Encoding
le = LabelEncoder()

train_df['label_encoded'] = le.fit_transform(train_df['label'])
val_df['label_encoded'] = le.transform(val_df['label'])
test_df['label_encoded'] = le.transform(test_df['label'])

print("Contoh Label Encoding TRAIN:")
print(train_df[['label', 'label_encoded']].head())

# One-Hot Encoding
num_classes = len(le.classes_)
train_df['label_onehot'] = train_df['label_encoded'].apply(lambda x: to_categorical(x, num_classes=num_classes))
val_df['label_onehot'] = val_df['label_encoded'].apply(lambda x: to_categorical(x, num_classes=num_classes))
test_df['label_onehot'] = test_df['label_encoded'].apply(lambda x: to_categorical(x, num_classes=num_classes))

print("\nContoh One-Hot Encoding TRAIN:")
print(train_df[['label', 'label_onehot']].head())

In [None]:
# Mencari kategori/kelas ikan dari nama folder
fish_categories = os.listdir(train_dir)
num_classes = len(fish_categories)
print(f"Kategori ikan yang ditemukan: {fish_categories}")
print(f"Jumlah kelas: {num_classes}")

In [None]:
IMG_SIZE = (128, 128)  # Atur sesuai ukuran gambar dataset-mu
INPUT_SHAPE = IMG_SIZE + (3, )

# CNN model
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    layers.MaxPooling2D((2, 2)),

    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),

    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),

    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(31, activation='softmax')
])

# Compile model
model.compile(optimizer='adam',
                 loss='categorical_crossentropy',
                 metrics=['accuracy'])

# Summary
model.summary()

In [None]:
print(train_df.columns)
print(train_df.head())
print(train_df.columns)

In [None]:
IMG_SIZE = (128, 128)
BATCH_SIZE = 32

# ImageDataGenerator for training
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# ImageDataGenerator for validation
val_datagen = ImageDataGenerator(rescale=1./255)

# Generator
train_gen = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col='filepath',
    y_col='label',
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

val_gen = val_datagen.flow_from_dataframe(
    dataframe=val_df,
    x_col='filepath',
    y_col='label',
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

test_gen = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    x_col='filepath',
    y_col='label',
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False
)

train_dir = '/content/drive/MyDrive/dataset/train'
val_dir = '/content/drive/MyDrive/dataset/val'

history_tf = model.fit(
    train_gen,
    epochs=10,
    validation_data=val_gen
)

In [None]:
# Setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_pt = CNNModel(num_classes=31).to(device)

# Loss & Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_pt.parameters(), lr=0.001)

print(model_pt)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

train_dir = '/content/train'
val_dir = '/content/val'
test_dir = '/content/test'

# Transform dataset
transform = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    transforms.ToTensor(),
])

# Dataset & Dataloader
train_data = datasets.ImageFolder(train_dir, transform=transform)
val_data = datasets.ImageFolder(val_dir, transform=transform)
test_data = datasets.ImageFolder(test_dir, transform=transform)

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
val_loader = DataLoader(val_data, batch_size=32, shuffle=False)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

num_classes = len(train_data.classes)
print(f"Jumlah kelas: {num_classes}")

# CNN Model
class SimpleCNN(nn.Module):
    def __init__(self, num_classes):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.fc1 = nn.Linear(128 * (IMG_SIZE[0]//8) * (IMG_SIZE[1]//8), 128)
        self.fc2 = nn.Linear(128, num_classes)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = torch.flatten(x, 1)
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.fc2(x)
        return x

# Model & Optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_pt = SimpleCNN(num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_pt.parameters(), lr=0.001)

# Training Loop
for epoch in range(10):
    model_pt.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model_pt(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/10] - Loss: {running_loss/len(train_loader):.4f}")

In [None]:
import torch

model_pt.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model_pt(images)

        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

In [None]:
# Calculate Metrics
accuracy = accuracy_score(all_labels, all_preds)
precision = precision_score(all_labels, all_preds, average='macro')
recall = recall_score(all_labels, all_preds, average='macro')
f1 = f1_score(all_labels, all_preds, average='macro')

print(f'Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1-Score: {f1:.4f}')

In [None]:
from sklearn.preprocessing import label_binarize

n_classes = len(test_loader.dataset.classes)

# Reset list
all_probs = []

with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        outputs = model_pt(images)
        probs = torch.softmax(outputs, dim=1)
        all_probs.extend(probs.cpu().numpy())

# Binarize labels for ROC-AUC multiclass
all_labels_bin = label_binarize(all_labels, classes=range(n_classes))
all_probs = np.array(all_probs)

auc_macro = roc_auc_score(all_labels_bin, all_probs, average="macro", multi_class="ovr")
print(f'ROC AUC (macro): {auc_macro:.4f}')

In [None]:
cm = confusion_matrix(all_labels, all_preds)

plt.figure(figsize=(12, 10))
sns.heatmap(cm, annot=True, fmt='d', cmap='OrRed')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()