In [8]:
# ==============================================================================
# Cell 1: Setup - Importing Necessary Libraries
# ==============================================================================
import os
import numpy as np
import tensorflow as tf
from google.colab import drive
from PIL import Image
import matplotlib.pyplot as plt
import random

from tensorflow.keras.layers import Input, TimeDistributed, LSTM, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.optimizers import Adam

print("TensorFlow Version:", tf.__version__)


TensorFlow Version: 2.18.0


In [9]:
# ==============================================================================
# Cell 2: Mount Google Drive
# ==============================================================================
try:
    drive.mount('/content/drive')
except Exception as e:
    print(f"Drive already mounted or error: {e}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [10]:
# ==============================================================================
# Cell 3: Configuration and Hyperparameters
# !!! IMPORTANT: Update DATA_DIR to point to your dataset location. !!!
# ==============================================================================
# --- Data Configuration ---
DATA_DIR = '/content/drive/MyDrive/MODEL_RM_BARU_2/dataset/'
TRAIN_DIR = os.path.join(DATA_DIR, 'train')
VALID_DIR = os.path.join(DATA_DIR, 'valid')
TEST_DIR = os.path.join(DATA_DIR, 'test')

# --- Model Hyperparameters ---
IMAGE_SIZE = (224, 224)
SEQUENCE_LENGTH = 5
BATCH_SIZE = 8 # Reduced batch size to start
EPOCHS_PHASE_1 = 10
EPOCHS_PHASE_2 = 20
LEARNING_RATE_PHASE_1 = 1e-3
LEARNING_RATE_PHASE_2 = 1e-5

In [11]:
# ==============================================================================
# Cell 4: NEW Memory-Efficient Data Generator
# This part is rewritten to load data on-the-fly, preventing RAM crashes.
# ==============================================================================
def get_file_paths_and_labels(base_dir):
    """Gets a list of all file paths and their corresponding labels."""
    all_files = []
    all_labels = []
    class_names = [d for d in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, d))]

    for class_name in class_names:
        label = 0 if class_name.lower() == 'normal' else 1
        class_dir = os.path.join(base_dir, class_name)
        image_files = sorted([os.path.join(class_dir, f) for f in os.listdir(class_dir)])

        for i in range(len(image_files) - SEQUENCE_LENGTH + 1):
            sequence_files = image_files[i:i+SEQUENCE_LENGTH]
            all_files.append(sequence_files)
            all_labels.append(label)

    return all_files, all_labels

# Get the lists of file paths and labels
train_files, train_labels = get_file_paths_and_labels(TRAIN_DIR)
val_files, val_labels = get_file_paths_and_labels(VALID_DIR)
test_files, test_labels = get_file_paths_and_labels(TEST_DIR)

# Shuffle the training data
temp_list = list(zip(train_files, train_labels))
random.shuffle(temp_list)
train_files, train_labels = zip(*temp_list)

print(f"Found {len(train_files)} training sequences.")
print(f"Found {len(val_files)} validation sequences.")
print(f"Found {len(test_files)} test sequences.")

def preprocess_image(file_path):
    """Loads and preprocesses a single image."""
    img = tf.io.read_file(file_path)
    img = tf.image.decode_png(img, channels=1) # Read as grayscale
    img = tf.image.resize(img, IMAGE_SIZE)
    img = tf.cast(img, tf.float32) / 255.0
    img = tf.image.grayscale_to_rgb(img) # Convert to 3 channels for EfficientNet
    return img

def data_generator(file_list, label_list):
    """Yields sequences and labels."""
    for files, label in zip(file_list, label_list):
        sequence_images = [preprocess_image(f) for f in files]
        yield tf.stack(sequence_images), tf.constant(label, dtype=tf.int32)

def create_dataset(files, labels, is_training=True):
    """Creates a tf.data.Dataset from the generator."""
    dataset = tf.data.Dataset.from_generator(
        lambda: data_generator(files, labels),
        output_signature=(
            tf.TensorSpec(shape=(SEQUENCE_LENGTH, *IMAGE_SIZE, 3), dtype=tf.float32),
            tf.TensorSpec(shape=(), dtype=tf.int32)
        )
    )
    if is_training:
        dataset = dataset.shuffle(buffer_size=1000)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(buffer_size=tf.data.AUTOTUNE)
    return dataset

# Create the final datasets
train_dataset = create_dataset(train_files, train_labels)
val_dataset = create_dataset(val_files, val_labels, is_training=False)
test_dataset = create_dataset(test_files, test_labels, is_training=False)


Found 597 training sequences.
Found 56 validation sequences.
Found 299 test sequences.


In [12]:
# ==============================================================================
# Cell 5: Build the Hybrid EfficientNet-LSTM Model (No Changes Here)
# ==============================================================================
print("\n--- Building Hybrid-EffNet-LSTM Model ---")
input_shape = (SEQUENCE_LENGTH, IMAGE_SIZE[0], IMAGE_SIZE[1], 3)
img_sequence_input = Input(shape=input_shape)
base_model = EfficientNetB0(include_top=False, weights='imagenet', pooling='avg')
base_model.trainable = False
encoded_sequence = TimeDistributed(base_model)(img_sequence_input)
context_vector = LSTM(128, dropout=0.3)(encoded_sequence)
x = Dropout(0.5)(context_vector)
x = Dense(64, activation='relu')(x)
output = Dense(1, activation='sigmoid')(x)
model = Model(img_sequence_input, output)
model.summary()



--- Building Hybrid-EffNet-LSTM Model ---


In [None]:
# ==============================================================================
# Cell 6: Train the Model using the New Datasets
# ==============================================================================
print("\n--- Starting Training: Phase 1 (Feature Extraction) ---")
model.compile(optimizer=Adam(learning_rate=LEARNING_RATE_PHASE_1),
              loss='binary_crossentropy',
              metrics=['accuracy'])
history_phase1 = model.fit(
    train_dataset,
    epochs=EPOCHS_PHASE_1,
    validation_data=val_dataset
)

print("\n--- Starting Training: Phase 2 (Fine-Tuning) ---")
base_model.trainable = True
model.compile(optimizer=Adam(learning_rate=LEARNING_RATE_PHASE_2),
              loss='binary_crossentropy',
              metrics=['accuracy'])
history_phase2 = model.fit(
    train_dataset,
    epochs=EPOCHS_PHASE_2,
    validation_data=val_dataset,
    initial_epoch=history_phase1.epoch[-1]
)



--- Starting Training: Phase 1 (Feature Extraction) ---
Epoch 1/10
     75/Unknown [1m313s[0m 2s/step - accuracy: 0.6924 - loss: 0.6051



[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m381s[0m 3s/step - accuracy: 0.6931 - loss: 0.6047 - val_accuracy: 0.8393 - val_loss: 0.4669
Epoch 2/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m238s[0m 3s/step - accuracy: 0.7469 - loss: 0.5930 - val_accuracy: 0.8393 - val_loss: 0.4447
Epoch 3/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m268s[0m 3s/step - accuracy: 0.7657 - loss: 0.5770 - val_accuracy: 0.8393 - val_loss: 0.4496
Epoch 4/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m262s[0m 3s/step - accuracy: 0.7453 - loss: 0.5770 - val_accuracy: 0.8393 - val_loss: 0.4576
Epoch 5/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m241s[0m 3s/step - accuracy: 0.7660 - loss: 0.5498 - val_accuracy: 0.8393 - val_loss: 0.4805
Epoch 6/10
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m320s[0m 3s/step - accuracy: 0.7419 - loss: 0.5897 - val_accuracy: 0.8393 -

In [None]:
# ==============================================================================
# Cell 7: Evaluate and Save the Final Model
# ==============================================================================
print("\n--- Evaluating Final Model on Test Data ---")
test_loss, test_accuracy = model.evaluate(test_dataset, verbose=1)
print(f"Test Accuracy: {test_accuracy*100:.2f}%")
print(f"Test Loss:     {test_loss:.4f}")

# (Plotting and saving code would go here, same as before)

# ==============================================================================
# Cell 10: Plot Training History
# ==============================================================================
def plot_history(history1, history2):
    acc = history1.history['accuracy'] + history2.history['accuracy']
    val_acc = history1.history['val_accuracy'] + history2.history['val_accuracy']
    loss = history1.history['loss'] + history2.history['loss']
    val_loss = history1.history['val_loss'] + history2.history['val_loss']

    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(acc, label='Training Accuracy')
    plt.plot(val_acc, label='Validation Accuracy')
    plt.axvline(x=len(history1.history['accuracy'])-1, color='r', linestyle='--', label='Start of Fine-Tuning')
    plt.title('Model Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend(loc='lower right')
    plt.subplot(1, 2, 2)
    plt.plot(loss, label='Training Loss')
    plt.plot(val_loss, label='Validation Loss')
    plt.axvline(x=len(history1.history['loss'])-1, color='r', linestyle='--', label='Start of Fine-Tuning')
    plt.title('Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend(loc='upper right')
    plt.tight_layout()
    plt.show()

plot_history(history_phase1, history_phase2)

In [None]:
# ==============================================================================
# Cell 11: Save the Final Model
# ==============================================================================
print("\n--- Saving Final Model ---")
model_save_path = '/content/drive/MyDrive/MODEL_RM_BARU_2/figshare_hybrid_model.h5'
model.save(model_save_path)
print(f"Model saved successfully to: {model_save_path}")