##**Mount Drive**

In [2]:
import tensorflow as tf
from google.colab import drive
drive.mount('/content/drive')
print("GPU Available: ", tf.test.is_gpu_available())

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
GPU Available:  True


##**Split 90% Train, 5% Val, 5% Test**

In [None]:
import os
import shutil
import random

# Tentukan path dataset
dataset_path = "/content/drive/MyDrive/Dataset 224x224"

# Tentukan path output untuk menyimpan dataset terbagi
output_path = "/content/drive/MyDrive/DatasetSplit"

# Buat direktori output jika belum ada
os.makedirs(output_path, exist_ok=True)

# Membaca nama file dari setiap kelas
normal_files = os.listdir(os.path.join(dataset_path, "Normal"))
pneumonia_files = os.listdir(os.path.join(dataset_path, "Pneumonia"))
mass_files = os.listdir(os.path.join(dataset_path, "Mass"))
nodule_files = os.listdir(os.path.join(dataset_path, "Nodule"))
tuberculosis_files = os.listdir(os.path.join(dataset_path, "Tuberculosis"))

# Menggabungkan semua kategori
all_files = {
    "Normal": normal_files,
    "Pneumonia": pneumonia_files,
    "Mass": mass_files,
    "Nodule": nodule_files,
    "Tuberculosis": tuberculosis_files
}

# Mengacak urutan file di setiap kategori
for category in all_files:
    random.shuffle(all_files[category])

# Tentukan rasio pembagian dataset
train_ratio = 0.9
validation_ratio = 0.05
test_ratio = 0.05

# Memisahkan dataset menjadi bagian pelatihan, validasi, dan pengujian
train_data = {}
validation_data = {}
test_data = {}

for category, files in all_files.items():
    total_files = len(files)
    train_split = int(train_ratio * total_files)
    validation_split = int(validation_ratio * total_files)

    train_data[category] = files[:train_split]
    validation_data[category] = files[train_split:train_split+validation_split]
    test_data[category] = files[train_split+validation_split:]

# Fungsi untuk memindahkan file ke direktori output
def move_files(files, source_path, destination_path):
    for file in files:
        source_file = os.path.join(source_path, file)
        destination_file = os.path.join(destination_path, file)
        shutil.copyfile(source_file, destination_file)

# Memindahkan file ke direktori output
for category, files in train_data.items():
    source_path = os.path.join(dataset_path, category)
    destination_path = os.path.join(output_path, "train", category)
    os.makedirs(destination_path, exist_ok=True)
    move_files(files, source_path, destination_path)

for category, files in validation_data.items():
    source_path = os.path.join(dataset_path, category)
    destination_path = os.path.join(output_path, "validation", category)
    os.makedirs(destination_path, exist_ok=True)
    move_files(files, source_path, destination_path)

for category, files in test_data.items():
    source_path = os.path.join(dataset_path, category)
    destination_path = os.path.join(output_path, "test", category)
    os.makedirs(destination_path, exist_ok=True)
    move_files(files, source_path, destination_path)
print("Split Data Selesai")

In [None]:
import os

train_folder = "/content/drive/MyDrive/DatasetSplit/train"
validation_folder = "/content/drive/MyDrive/DatasetSplit/validation"
test_folder = "/content/drive/MyDrive/DatasetSplit/test"

categories = ["Mass", "Nodule", "Normal", "Pneumonia", "Tuberculosis"]

print("TRAIN")
for category in categories:
    category_folder = os.path.join(train_folder, category)
    file_count = len(os.listdir(category_folder))
    print(f"Jumlah file pada folder {category}: {file_count}")
print("VALIDATION")
for category in categories:
    category_folder = os.path.join(validation_folder, category)
    file_count = len(os.listdir(category_folder))
    print(f"Jumlah file pada folder {category}: {file_count}")
print("TEST")
for category in categories:
    category_folder = os.path.join(test_folder, category)
    file_count = len(os.listdir(category_folder))
    print(f"Jumlah file pada folder {category}: {file_count}")

TRAIN
Jumlah file pada folder Mass: 1788
Jumlah file pada folder Nodule: 1800
Jumlah file pada folder Normal: 1800
Jumlah file pada folder Pneumonia: 1786
Jumlah file pada folder Tuberculosis: 1800
VALIDATION
Jumlah file pada folder Mass: 100
Jumlah file pada folder Nodule: 100
Jumlah file pada folder Normal: 100
Jumlah file pada folder Pneumonia: 100
Jumlah file pada folder Tuberculosis: 100
TEST
Jumlah file pada folder Mass: 100
Jumlah file pada folder Nodule: 100
Jumlah file pada folder Normal: 100
Jumlah file pada folder Pneumonia: 100
Jumlah file pada folder Tuberculosis: 100


##**MobileNet**

In [None]:
import tensorflow as tf
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNet
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report, confusion_matrix

# Tentukan path dataset split
train_dir = "/content/drive/MyDrive/DatasetSplit/train"
validation_dir = "/content/drive/MyDrive/DatasetSplit/validation"
test_dir = "/content/drive/MyDrive/DatasetSplit/test"

# Tentukan parameter model
input_shape = (224, 224)
num_classes = 5
batch_size = 64
epochs = 50

# Preprocess data train
train_datagen = ImageDataGenerator(
    # rescale=1.0/255.0,
    # shear_range=0.2,
    # zoom_range=0.2,
    # horizontal_flip=True
    )

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=input_shape,
    batch_size=batch_size,
    class_mode='categorical'
    )

# Preprocess data validasi
validation_datagen = ImageDataGenerator(
    # rescale=1.0/255.0
    )

validation_generator = validation_datagen.flow_from_directory(
    validation_dir,
    target_size=input_shape,
    batch_size=batch_size,
    class_mode='categorical',
    )


# Load model MobileNet tanpa bagian fully connected terakhir (top)
base_model = MobileNet(weights='imagenet', include_top=False, input_shape=(input_shape[0], input_shape[1], 3))

# Tetapkan lapisan base_model agar tidak dilatih
for layer in base_model.layers:
    layer.trainable = False

# Tambahkan fully connected layer baru
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(num_classes, activation='softmax')(x)

# Buat model baru dengan arsitektur yang telah diubah
model = Model(inputs=base_model.input, outputs=predictions)

# Compile model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Latih model
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // batch_size,
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // batch_size
    )

# Test Generator
test_datagen = ImageDataGenerator()

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=input_shape,
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False)

# Prediksi data test
y_pred = model.predict(test_generator)

# Prediksi berdasaarkan class/label
y_pred_labels = np.argmax(y_pred, axis=1)
class_names = ["Mass", "Nodule", "Normal", "Pneumonia", "Tuberculosis"]

# get true labels from the generator
y_true = test_generator.classes

# Confusion Matrix
report = classification_report(y_true, y_pred_labels, target_names=class_names, digits=5)
conf_mat = confusion_matrix(y_true, y_pred_labels)

print(report)
print(conf_mat)

# Load batch data
batch_images, batch_labels = next(test_generator)

# Predict labels/class dengan model
predicted_labels = model.predict(batch_images)

# Daftar class
class_names = list(train_generator.class_indices.keys())

# Membuat plot
grid_size = (2, 4)
fig, ax = plt.subplots(nrows=grid_size[0], ncols=grid_size[1], figsize=(10, 10))
for i , axi in enumerate(ax.flat):
    # Plot image
    axi.imshow(batch_images[i])
    axi.set_title(f"True: {class_names[np.argmax(batch_labels[i])]}\nPredicted: {class_names[np.argmax(predicted_labels[i])]}")
    axi.axis("off")

plt.tight_layout()
plt.show()

plt.figure(figsize=(10,8))
sns.heatmap(conf_mat, annot=True, cmap='Blues', fmt='g', 
            xticklabels=["Mass", "Nodule", "Normal", "Pneumonia", "Tuberculosis"], 
            yticklabels=["Mass", "Nodule", "Normal", "Pneumonia", "Tuberculosis"])
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.show()

# Mengambil nilai loss dan akurasi dari history
train_loss = history.history['loss']
val_loss = history.history['val_loss']
train_acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

# Membuat grafik loss
plt.plot(train_loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend()
plt.show()

# Membuat grafik akurasi
plt.plot(train_acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend()
plt.show()

# Simpan model
model.save("/content/drive/MyDrive/MobileNet_model.h5")


Found 8918 images belonging to 5 classes.
Found 500 images belonging to 5 classes.
Epoch 1/50
 18/139 [==>...........................] - ETA: 8:00 - loss: 1.8513 - accuracy: 0.4459