In [4]:
#Mendownload dataset
!gdown 1PwzKTSqLUEKMqTPnII6D3GkHHd1Xtfba

#Melakukan unzip file dataset
!unzip -q lung_image_sets.zip

#Menghapus file zip
!rm -rf lung_image_sets.zip

Downloading...
From (original): https://drive.google.com/uc?id=1PwzKTSqLUEKMqTPnII6D3GkHHd1Xtfba
From (redirected): https://drive.google.com/uc?id=1PwzKTSqLUEKMqTPnII6D3GkHHd1Xtfba&confirm=t&uuid=c28cad7a-3a36-40e1-a452-b039e45ed026
To: /workspace/TelU-TugasAkhir/lung_image_sets.zip
100%|████████████████████████████████████████| 929M/929M [00:44<00:00, 21.1MB/s]


In [2]:
import os
from sklearn.model_selection import train_test_split
import tensorflow as tf
import shutil   
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D, Input
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.applications.efficientnet import preprocess_input as efficientnet_preprocess
import matplotlib.pyplot as plt
from tensorflow.keras.applications import EfficientNetB0, EfficientNetB1, EfficientNetB2, EfficientNetB3, EfficientNetB4, EfficientNetB5, EfficientNetB6, EfficientNetB7

In [3]:
CWD = os.getcwd()
CWD

'/workspace/TelU-TugasAkhir'

In [5]:
extracted_folder_path = os.path.join(CWD, 'lung_image_sets')
source_files = os.listdir(extracted_folder_path)
print(source_files)

['lung_scc', 'lung_n', 'lung_aca']


In [8]:
train_dir = os.path.join(CWD, 'lung_image_sets', 'train')
val_dir = os.path.join(CWD, 'lung_image_sets', 'validation')
test_dir = os.path.join(CWD, 'lung_image_sets', 'test')

# Create directories
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

def split_data(source_dir, train_dir, val_dir, test_dir, train_ratio=0.72, val_ratio=0.2, test_ratio=0.08):
    # Get all the files in the source directory
    file_names = os.listdir(source_dir)
    
    # Split data into train and remaining (val + test)
    train_files, temp_files = train_test_split(file_names, test_size=(1 - train_ratio))
    
    # Now split the remaining data into validation and test sets
    val_files, test_files = train_test_split(temp_files, test_size=(test_ratio / (val_ratio + test_ratio)))
    
    # Move files to respective directories
    for file_name in train_files:
        shutil.move(os.path.join(source_dir, file_name), os.path.join(train_dir, file_name))
        
    for file_name in val_files:
        shutil.move(os.path.join(source_dir, file_name), os.path.join(val_dir, file_name))
        
    for file_name in test_files:
        shutil.move(os.path.join(source_dir, file_name), os.path.join(test_dir, file_name))

    print("Data split into train, validation, and test sets.")

split_data(source_files, train_dir, val_dir, test_dir)

ValueError: With n_samples=1, test_size=0.2857142857142857 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.

In [10]:
def create_image_generators(train_dir, val_dir, test_dir, target_size=(224, 224), batch_size=32, preprocessing_func=None):
    # ImageDataGenerator without augmentation (for comparison)
    train_datagen_no_aug = ImageDataGenerator(
        rescale=1./255
    )

    # ImageDataGenerator with augmentation
    train_datagen_with_aug = ImageDataGenerator(
        rescale=1./255,
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        preprocessing_function=preprocessing_func
    )
    
    # For validation and test sets, we do not apply augmentation
    val_test_datagen = ImageDataGenerator(
        rescale=1./255,
        preprocessing_function=preprocessing_func
    )
    
    # Flow images from directories
    train_generator_no_aug = train_datagen_no_aug.flow_from_directory(
        train_dir,
        target_size=target_size,
        batch_size=batch_size,
        class_mode='categorical'
    )
    
    train_generator_with_aug = train_datagen_with_aug.flow_from_directory(
        train_dir,
        target_size=target_size,
        batch_size=batch_size,
        class_mode='categorical'
    )
    
    val_generator = val_test_datagen.flow_from_directory(
        val_dir,
        target_size=target_size,
        batch_size=batch_size,
        class_mode='categorical'
    )
    
    test_generator = val_test_datagen.flow_from_directory(
        test_dir,
        target_size=target_size,
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=False
    )
    
    return train_generator_no_aug, train_generator_with_aug, val_generator, test_generator

# Pilih fungsi preprocessing yang sesuai dengan model
preprocess_input_func = efficientnet_preprocess

train_generator_no_aug, train_generator_with_aug, val_generator, test_generator = create_image_generators(
    train_dir=train_dir,
    val_dir=val_dir,
    test_dir=test_dir,
    target_size=(224, 224),  # Image size
    batch_size=32,           # Batch size
    preprocessing_func=preprocess_input_func  # Optional preprocessing function (e.g., for VGG16 or other models)
)

In [None]:
def create_model(base_model_class, input_shape):
    base_model = base_model_class(input_shape=input_shape, include_top=False, weights='imagenet')
    base_model.trainable = False  # Freeze the base model

    inputs = Input(shape=input_shape)
    # Tidak perlu preprocessing lagi di sini
    x = base_model(inputs, training=False)
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.5)(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)

    outputs = Dense(3, activation='softmax')(x)

    model = Model(inputs, outputs)

    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

efficientnet_b0 = create_model(EfficientNetB0, input_shape=(224, 224, 3))

In [None]:
def create_callbacks(model_name, checkpoint_path, patience=5, reduce_lr_factor=0.5, reduce_lr_patience=5):
    if not os.path.exists(checkpoint_path):
        os.makedirs(checkpoint_path)

    saving_path = os.path.join(checkpoint_path, f'model_{model_name}.h5')

    # ModelCheckpoint to save the best model based on validation accuracy
    checkpoint = ModelCheckpoint(
        filepath=saving_path,  # Path to save the model
        monitor='val_accuracy',    # Monitor validation accuracy
        save_best_only=True,       # Save the model only when val_accuracy improves
        mode='max',                # Maximize validation accuracy
        verbose=1                  # Show messages when saving
    )
    
    # EarlyStopping to stop training when the model stops improving
    early_stopping = EarlyStopping(
        monitor='val_loss',        # Monitor validation loss
        patience=patience,         # Stop after 'patience' epochs without improvement
        mode='min',                # Minimize validation loss
        restore_best_weights=True, # Restore the best weights after stopping
        verbose=1
    )
    
    # ReduceLROnPlateau to reduce the learning rate when a plateau is detected
    reduce_lr = ReduceLROnPlateau(
        monitor='val_loss',        # Monitor validation loss
        factor=reduce_lr_factor,   # Factor by which to reduce learning rate
        patience=reduce_lr_patience, # How many epochs to wait before reducing
        mode='min',                # Minimize validation loss
        min_lr=1e-6,               # Lower bound on the learning rate
        verbose=1
    )
    
    # Return the list of callbacks
    return [checkpoint, early_stopping, reduce_lr]

# Define the path to save the best model
checkpoint_path = CWD + '/model'

# Create the callbacks
callbacks_eb0_aug = create_callbacks(
    model_name='EfficientNetB0_aug',
    checkpoint_path=checkpoint_path,
    patience=5,       
)

callbacks_eb0_no_aug = create_callbacks(
    model_name='EfficientNetB0_no_aug',
    checkpoint_path=checkpoint_path,
    patience=5,       
)

In [None]:
# Fungsi untuk melatih model dan menampilkan grafik
def train_and_plot(model, model_name, train_generator, validation_generator, callbacks, epochs=20):
    history = model.fit(
        train_generator,
        validation_data=validation_generator,
        epochs=epochs,
        callbacks= callbacks
    )

    # Plotting hasil
    plt.figure(figsize=(12, 4))

    # Plot training & validation accuracy
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title(f'{model_name} Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')

    # Plot training & validation loss
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title(f'{model_name} Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')

    plt.show()

In [None]:
train_and_plot(efficientnet_b0, 'EfficientNetB0_aug', train_generator_no_aug, val_generator, callbacks_eb0_aug)

In [None]:

train_and_plot(efficientnet_b0, 'EfficientNetB0_no_aug', train_generator_with_aug, val_generator, callbacks_eb0_no_aug)