APPROACH THAT WE USED IN THIS RESEARCH

In [None]:
import tensorflow as tf
import numpy as np
import cv2
import os

# Paths
train_dir = "../data/final/train"
test_dir = "../data/final/test"
batch_size = 4
img_size = (224, 224)  # Resize images

# Get all .tif images
image_files = [f for f in os.listdir(train_dir) if f.endswith(".tif")]

# Extract class labels from filenames (e.g., "001_1.tif" → class 001 → index 0)
class_labels = sorted(set(f.split("_")[0] for f in image_files))  # Unique classes
class_to_index = {label: i for i, label in enumerate(class_labels)}

# Function to load and preprocess images
def load_image(filename, dir_path, class_to_index):
    img_path = os.path.join(dir_path, filename)
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)  # still loading in grayscale
    img = cv2.resize(img, img_size)
    img = img / 255.0
    img = np.expand_dims(img, axis=-1)           # (224, 224, 1)
    img = np.repeat(img, 3, axis=-1)             # (224, 224, 3) → RGB from grayscale

    class_id = filename.split("_")[0]
    label = class_to_index[class_id]

    return img, label

# Load all images into memory
dataset = [load_image(f, train_dir, class_to_index) for f in image_files]

# Convert to TensorFlow dataset
X, y = zip(*dataset)  # Split images and labels
X = np.array(X, dtype=np.float32)  # Convert to NumPy array
y = np.array(y, dtype=np.int32)

# Create tf.data.Dataset
tf_dataset = tf.data.Dataset.from_tensor_slices((X, y))
tf_dataset = tf_dataset.batch(batch_size).shuffle(len(y))

print(f"✅ Loaded {len(y)} images into a TensorFlow dataset!")

In [None]:
train_files = [f for f in os.listdir(train_dir) if f.endswith(".tif")]
train_labels = sorted(set(f.split("_")[0] for f in train_files))
class_to_index = {label: i for i, label in enumerate(train_labels)}

# Load training data
train_data = [load_image(f, train_dir, class_to_index) for f in train_files]
X_train, y_train = zip(*train_data)
X_train = np.array(X_train, dtype=np.float32)
y_train = np.array(y_train, dtype=np.int32)

# Convert to tf.data.Dataset and batch it
tf_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
tf_dataset = tf_dataset.shuffle(len(y_train)).batch(batch_size)

print(f"✅ Loaded {len(y_train)} training images into TensorFlow dataset")

In [None]:
test_image_files = [f for f in os.listdir(test_dir) if f.endswith(".tif")]

# Function to load and preprocess test images
test_dataset = [load_image(f, test_dir, class_to_index) for f in test_image_files]

# Convert to TensorFlow test dataset
X_test, y_test = zip(*test_dataset)
X_test = np.array(X_test, dtype=np.float32)
y_test = np.array(y_test, dtype=np.int32)

# Create tf.data.Dataset for testing
tf_test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))
tf_test_dataset = tf_test_dataset.batch(batch_size)
print(f"✅ Loaded {len(y_test)} training images into TensorFlow dataset")

In [None]:
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomRotation(0.05),
    tf.keras.layers.RandomZoom(0.1),
    tf.keras.layers.RandomBrightness(factor=0.1),
    tf.keras.layers.RandomContrast(0.1),
])

def augment(image, label):
    return data_augmentation(image), label

# Apply augmentation before batching
tf_dataset = tf_dataset.map(augment)

In [None]:
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, Dropout, Input, Conv2D, MaxPooling2D

# Number of classes (adjust dynamically)
num_classes = 80  # Since your classes range from 001 to 080

# Input shape (grayscale images need 3 channels for VGG16)
input_shape = (224, 224, 3)

# Load VGG16 with pre-trained weights (exclude top layers)
base_model = VGG16(weights="imagenet", include_top=False, input_shape=input_shape)

# Freeze the base model (VGG16) to retain pre-trained weights
base_model.trainable = False  

# Add custom layers on top of VGG16
# Add simplified custom layers on top of VGG16
x = base_model.output
x = Flatten()(x)
x = Dense(256, activation="relu")(x)
x = Dropout(0.3)(x)
output = Dense(num_classes, activation="softmax")(x)


# Define model
model = Model(inputs=base_model.input, outputs=output)

# Compile model
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

# Print summary
model.summary()

In [None]:
from tensorflow.keras.callbacks import ReduceLROnPlateau
for layer in base_model.layers[-16:]:
    layer.trainable = True

# Re-compile with a smaller learning rate
model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-5),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)
# Learning rate scheduler
lr_scheduler = ReduceLROnPlateau(
    monitor='val_loss', factor=0.5, patience=3, min_lr=1e-7, verbose=1
)

In [None]:
model.fit(tf_dataset,
          validation_data=tf_test_dataset,
          epochs=30,
          callbacks=[lr_scheduler])

In [None]:
model.fit(tf_dataset,
          validation_data=tf_test_dataset,
          epochs=4,
          callbacks=[lr_scheduler])

In [None]:
model.fit(tf_dataset,
          validation_data=tf_test_dataset,
          epochs=15,
          callbacks=[lr_scheduler])

In [None]:
model.fit(tf_dataset,
          validation_data=tf_test_dataset,
          epochs=15,
          callbacks=[lr_scheduler])

In [None]:
test_loss, test_acc = model.evaluate(tf_test_dataset) # 60 epochs -> 0.9301
print(f"Test Accuracy: {test_acc:.4f}")               # 69 epochs -> 0.9462
print(f"Test Loss: {test_loss:.4f}")                  # 88 epochs -> 0.9624

In [None]:
from sklearn.metrics import classification_report, confusion_matrix

X_test = []
y_test = []

for x, y in tf_test_dataset:
    X_test.append(x.numpy())
    y_test.append(y.numpy())

X_test = np.concatenate(X_test, axis=0)
y_test = np.concatenate(y_test, axis=0)

# Predict
y_pred_probs = model.predict(X_test, batch_size=8)
y_pred = np.argmax(y_pred_probs, axis=1)

# Classification Report & Confusion Matrix
print("\nClassification Report:")
print(classification_report(y_test, y_pred, digits=4))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import numpy as np

# Ambil semua label asli & prediksi
y_true = np.concatenate([y.numpy() for _, y in tf_test_dataset], axis=0)
y_pred = np.concatenate([np.argmax(model.predict(x), axis=1) for x, _ in tf_test_dataset], axis=0)

# Buat confusion matrix
cm = confusion_matrix(y_true, y_pred)

# Visualisasi pakai heatmap dengan angka
plt.figure(figsize=(12, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap="Blues", linewidths=0.5, linecolor='gray')
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.show()

In [None]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
print(len(base_model.layers))