In [82]:
import os
import numpy as np
import pandas as pd
import zipfile
import shutil
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.optimizers import Adam

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Define folder paths
folder_path = '/content/drive/MyDrive/Images'
train_dir = '/content/drive/MyDrive/Train'
val_dir = '/content/drive/MyDrive/Validation'
test_dir = '/content/drive/MyDrive/Test'
#ujicoba_dir = '/content/drive/MyDrive/Ujicoba'
root_path = '/content/drive/MyDrive/'

# Create destination folders if they don't exist
os.makedirs(os.path.join(train_dir, 'Male'), exist_ok=True)
os.makedirs(os.path.join(train_dir, 'Female'), exist_ok=True)
os.makedirs(os.path.join(val_dir, 'Val_Male'), exist_ok=True)
os.makedirs(os.path.join(val_dir, 'Val_Female'), exist_ok=True)
os.makedirs(os.path.join(test_dir, 'Test_Male'), exist_ok=True)
os.makedirs(os.path.join(test_dir, 'Test_Female'), exist_ok=True)

# List files in the specified Google Drive folder
existing_filenames = os.listdir(folder_path)

# Paths to the extracted files
list_attributes_path = root_path + 'list_attribute.txt'
class_identity_path = root_path + 'class_identity.txt'
gender_classification_path = root_path + 'gender_classification.csv'


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Process List Attributes Data Frame

In [83]:
# Reading the list_attributes file
with open(list_attributes_path, 'r') as file:
    list_attributes_content = file.readlines()

# Extracting attribute names
attribute_names = list_attributes_content[1].strip().split()

# Loading the list_attributes file and adding the Filename column
list_attributes_df = pd.read_csv(list_attributes_path, sep="\s+", header=None, skiprows=2)
list_attributes_df.columns = ['Filename'] + attribute_names


# Process Class identify Data Frame

In [84]:
# Reading the class_identify file
class_identity_df = pd.read_csv(class_identity_path, sep="\s+", header=None)
class_identity_df.columns = ['Filename', 'Class_id']


# Menggabungkan DataFrame List Attributes dan Class identify B/O Nama File


In [85]:
# Merging the DataFrames
merged_df = pd.merge(list_attributes_df, class_identity_df, on='Filename')

# Sorting the merged DataFrame by Filename
merged_df = merged_df.sort_values(by='Filename')

# Filter the merged_df to keep only rows where Filename exists in existing_filenames
clean_merged_df = merged_df[merged_df['Filename'].isin(existing_filenames)]


In [None]:
# Preview dataframe with specific columns using .head()
preview_df = clean_merged_df[['Filename', 'Class_id', 'Male']].head()
print(preview_df)

clean_merged_df.to_excel('/content/drive/MyDrive/preview_merged_df.xlsx', index=False)
print("DataFrame has been exported to Excel file successfully.")

# Process memisahkan Data Train , Validasi dan Testing

In [None]:
# Split the clean_merged_df into training, validation, and testing sets
train_val_df, test_df = train_test_split(clean_merged_df, test_size=0.2, random_state=42)
train_df, val_df = train_test_split(train_val_df, test_size=0.25, random_state=42)  # 0.25 x 0.8 = 0.2

# Print the number of rows in the datasets
print(f"Number of rows in the training set: {len(train_df)}")
print(f"Number of rows in the validation set: {len(val_df)}")
print(f"Number of rows in the testing set: {len(test_df)}")

# Memisahkan Folder gambar untuk Training, Validation dan Testing

In [None]:
# Memisahkan gambar ke dalam folder berdasarkan kolom 'Male' untuk data pelatihan
for index, row in train_df.iterrows():
    src_file_path = os.path.join(folder_path, row['Filename'])
    if row['Male'] == 1:
        dest_file_path = os.path.join(train_dir, 'Male', row['Filename'])
    else:
        dest_file_path = os.path.join(train_dir, 'Female', row['Filename'])
    shutil.copy(src_file_path, dest_file_path)

# Memisahkan gambar ke dalam folder berdasarkan kolom 'Male' untuk data validasi
for index, row in val_df.iterrows():
    src_file_path = os.path.join(folder_path, row['Filename'])
    if row['Male'] == 1:
        dest_file_path = os.path.join(val_dir, 'Val_Male', row['Filename'])
    else:
        dest_file_path = os.path.join(val_dir, 'Val_Female', row['Filename'])
    shutil.copy(src_file_path, dest_file_path)

# Memisahkan gambar ke dalam folder berdasarkan kolom 'Male' untuk data pengujian
for index, row in test_df.iterrows():
    src_file_path = os.path.join(folder_path, row['Filename'])
    if row['Male'] == 1:
        dest_file_path = os.path.join(test_dir, 'Test_Male', row['Filename'])
    else:
        dest_file_path = os.path.join(test_dir, 'Test_Female', row['Filename'])
    shutil.copy(src_file_path, dest_file_path)


# Setup Generatator untuk Train, Validation, dan Testing




In [None]:
# Menyiapkan data generator dengan augmentasi
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

val_test_datagen = ImageDataGenerator(rescale=1./255)

# Mengatur generator data untuk pelatihan
train_generator = train_datagen.flow_from_directory(
    directory=train_dir,  # Direktori root yang berisi subfolder 'Male' dan 'Female'
    target_size=(224, 224),
    batch_size=64,
    class_mode='binary',
    classes=['Male', 'Female']
)

# Mengatur generator data untuk validasi
validation_generator = val_test_datagen.flow_from_directory(
    directory=val_dir,  # Direktori root yang berisi subfolder 'Val_Male' dan 'Val_Female'
    target_size=(224, 224),
    batch_size=64,
    class_mode='binary',
    classes=['Val_Male', 'Val_Female']
)

# Mengatur generator data untuk pengujian
test_generator = val_test_datagen.flow_from_directory(
    directory=test_dir,  # Direktori root yang berisi subfolder 'Test_Male' dan 'Test_Female'
    target_size=(224, 224),
    batch_size=64,
    class_mode='binary',
    classes=['Test_Male', 'Test_Female']
)

# Mengatur generator data untuk pelatihan (Tanpa pakai Label)
#ujicoba_generator = val_test_datagen.flow_from_directory(
#    directory=ujicoba_dir,  # Direktori root yang berisi subfolder 'Male' dan 'Female'
#    target_size=(224, 224),
#    batch_size=64,
#    class_mode='binary',
#    classes=['Male', 'Female']
#)



# Traing Model VGG16 Batch size 32, Adam (Optimize dengan BayesianOptimization)


In [None]:
!pip install keras-tuner

from kerastuner.tuners import BayesianOptimization
from kerastuner import HyperModel
from tensorflow.keras.callbacks import EarlyStopping

class VGG16HyperModel(HyperModel):
    def build(self, hp):
        base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
        x = base_model.output
        x = Flatten()(x)
        x = Dense(units=hp.Int('units', min_value=256, max_value=1024, step=128), activation='relu')(x)
        x = Dropout(rate=hp.Float('dropout', min_value=0.3, max_value=0.7, step=0.1))(x)
        predictions = Dense(1, activation='sigmoid')(x)
        model = Model(inputs=base_model.input, outputs=predictions)

        for layer in base_model.layers:
            layer.trainable = False

        model.compile(
            optimizer=Adam(learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG')),
            loss='binary_crossentropy',
            metrics=['accuracy']
        )
        return model

tuner = BayesianOptimization(
    VGG16HyperModel(),
    objective='val_accuracy',
    max_trials=3,
    executions_per_trial=1,
    directory='/content/drive/MyDrive/Hyperparameter_Tuning_Trial3',
    project_name='VGG16_gender_classification'
)

tuner.search_space_summary()

# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Run the hyperparameter search
tuner.search(
    train_generator,
    epochs=50,
    validation_data=validation_generator,
    callbacks=[early_stopping]
)

# Get the best model
best_model = tuner.get_best_models(num_models=1)[0]

# Save the best model`
best_model.save('/content/drive/MyDrive/vgg16_gender_classification_best_model3.h5')

# Evaluate the best model
test_loss, test_accuracy = best_model.evaluate(test_generator)
print(f'Test Loss: {test_loss}')
print(f'Test Accuracy: {test_accuracy}')

# Visualisasi Data Training

In [None]:
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator


# Generate and show augmented images for training data
fig, ax = plt.subplots(5, 10, figsize=(20, 10))
ax = ax.flatten()

# Generate a single batch of images
batch = next(train_generator)

# Map class indices to class labels
labels_map = {v: k for k, v in train_generator.class_indices.items()}

# Iterate over the images and labels in the batch
for i in range(min(50, len(batch[0]))):
    img = batch[0][i]  # Get the ith image of the batch
    label_idx = int(batch[1][i])  # Get the corresponding label index
    label = labels_map[label_idx]  # Map index to label
    augmented_img = img  # Images are already augmented by the generator
    ax[i].imshow(augmented_img)
    ax[i].set_title(label)
    ax[i].axis('off')

plt.tight_layout()
plt.show()


# Evaluasi Model

In [None]:
# Memuat model jika belum dimuat
model_path = '/content/drive/MyDrive/vgg16_gender_classification_best_model2.h5'
model = load_model(model_path)

# Mengevaluasi model pada data pelatihan
train_loss, train_accuracy = model.evaluate(train_generator)
print(f'Train Loss: {train_loss}')
print(f'Train Accuracy: {train_accuracy}')

# Mengevaluasi model pada data validasi
val_loss, val_accuracy = model.evaluate(validation_generator)
print(f'Validation Loss: {val_loss}')
print(f'Validation Accuracy: {val_accuracy}')

# Mengevaluasi model pada data pengujian
test_loss, test_accuracy = model.evaluate(test_generator)
print(f'Test Loss: {test_loss}')
print(f'Test Accuracy: {test_accuracy}')


# Pengujian Gambar

In [None]:
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Generate and show augmented images for training data
fig, ax = plt.subplots(5, 10, figsize=(20, 10))
ax = ax.flatten()

# Generate a single batch of images
batch = next(test_generator)

# Print class indices to verify the order
print(test_generator.class_indices)

# Assuming 'Male' is 0 and 'Female' is 1 based on class_indices
# Modify this according to your class_indices if different
labels_map = {0: 'Male', 1: 'Female'}

# Iterate over the images and labels in the batch
for i in range(min(50, len(batch[0]))):
    img = batch[0][i]  # Get the ith image of the batch
    label_idx = int(batch[1][i])  # Get the corresponding label index
    label = labels_map[label_idx]  # Map index to label
    augmented_img = img  # Images are already augmented by the generator
    ax[i].imshow(augmented_img)
    ax[i].set_title(label)
    ax[i].axis('off')

plt.tight_layout()
plt.show()


In [None]:
from google.colab import drive
drive.mount('/content/drive')

from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.models import load_model
import numpy as np
import matplotlib.pyplot as plt
import os

def predict_images_in_folder(model_path, folder_path, target_size=(224, 224)):
    """
    Memperkirakan label untuk gambar yang ada di folder tertentu menggunakan model yang telah dilatih.

    Parameters:
    - model_path: Path ke model yang telah dilatih (file .h5).
    - folder_path: Path ke folder yang berisi gambar.
    - target_size: Ukuran gambar yang diharapkan oleh model.

    Returns:
    - Tidak ada (menampilkan gambar beserta label yang diprediksi).
    """
    # Inisialisasi ImageDataGenerator untuk rescaling
    datagen = ImageDataGenerator(rescale=1./255)

    # Muat model yang telah dilatih
    model = load_model(model_path)

    # Daftar file gambar di folder
    image_files = [f for f in os.listdir(folder_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
    if len(image_files) == 0:
        raise ValueError("No images found in the provided folder.")

    # Prepare plot
    fig, ax = plt.subplots(1, len(image_files), figsize=(20, 5))
    ax = ax.flatten()

    # Pemetaan label biner ke nama kelas
    labels_map = {0: 'Male', 1: 'Female'}

    # Proses setiap gambar di folder
    for i, file in enumerate(image_files):
        img_path = os.path.join(folder_path, file)
        img = load_img(img_path, target_size=target_size)
        img_array = img_to_array(img)
        img_array = np.expand_dims(img_array, axis=0)
        img_array = datagen.flow(img_array, batch_size=1)[0]  # Preprocess the image

        # Prediksi label untuk gambar
        prediction = model.predict(img_array)
        label = labels_map[int(prediction > 0.5)]

        # Display the image with the predicted label
        ax[i].imshow(img)
        ax[i].set_title(label)
        ax[i].axis('off')

    plt.tight_layout()
    plt.show()

# Contoh penggunaan fungsi
model_path = '/content/drive/MyDrive/vgg16_gender_classification_best_model2.h5'
folder_path = '/content/drive/MyDrive/Test'
predict_images_in_folder(model_path, folder_path)



# Prediksi dan Hitung Precision, Recall, F1 Score dan Inference time (Trial 7x)

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.models import load_model
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.metrics import precision_score, recall_score, f1_score
import time

def predict_images_in_test_folder(model_path, base_folder, target_size=(224, 224), max_images_per_class=50):
    """
    Memperkirakan label untuk gambar yang ada di folder Test/Test_Male dan Test/Test_Female menggunakan model yang telah dilatih,
    dan menghitung waktu inferensi serta metrik evaluasi menggunakan hasil prediksi sebagai label.

    Parameters:
    - model_path: Path ke model yang telah dilatih (file .h5).
    - base_folder: Path ke folder dasar yang berisi folder 'Test_Male' dan 'Test_Female'.
    - target_size: Ukuran gambar yang diharapkan oleh model.
    - max_images_per_class: Jumlah maksimal gambar yang akan digunakan dari setiap kelas.

    Returns:
    - Tidak ada (menampilkan gambar beserta label yang diprediksi, waktu inferensi, precision, recall, dan F1-score).
    """
    # Inisialisasi ImageDataGenerator untuk rescaling
    datagen = ImageDataGenerator(rescale=1./255)

    # Muat model yang telah dilatih
    model = load_model(model_path)

    # Folder untuk kategori Test_Male dan Test_Female
    male_folder = os.path.join(base_folder, 'Test_Male')
    female_folder = os.path.join(base_folder, 'Test_Female')

    # Daftar file gambar di folder Test_Male dan Test_Female
    male_files = [os.path.join(male_folder, f) for f in os.listdir(male_folder) if f.lower().endswith(('.png', '.jpg', '.jpeg'))][:max_images_per_class]
    female_files = [os.path.join(female_folder, f) for f in os.listdir(female_folder) if f.lower().endswith(('.png', '.jpg', '.jpeg'))][:max_images_per_class]

    # Menggabungkan kembali daftar gambar
    image_files = male_files + female_files

    # Pemetaan label biner ke nama kelas
    labels_map = {0: 'Male', 1: 'Female'}

    # Mulai hitung waktu inferensi
    start_time = time.time()
    # Proses setiap gambar di folder
    y_true = []
    y_pred_binary = []

    # Ukuran batch untuk gambar per baris
    batch_size = 8
    num_batches = (len(image_files) + batch_size - 1) // batch_size

    for batch_idx in range(num_batches):
        fig, ax = plt.subplots(1, min(batch_size, len(image_files) - batch_idx * batch_size), figsize=(20, 5))
        ax = ax.flatten()
        for i in range(min(batch_size, len(image_files) - batch_idx * batch_size)):
            file = image_files[batch_idx * batch_size + i]
            img_path = file
            img = load_img(img_path, target_size=target_size)
            img_array = img_to_array(img)
            img_array = np.expand_dims(img_array, axis=0)
            img_array = datagen.flow(img_array, batch_size=1)[0]  # Preprocess the image

            # Prediksi label untuk gambar
            prediction = model.predict(img_array)
            label = int(prediction > 0.5)
            y_pred_binary.append(label)

            # Tentukan label benar
            true_label = 1 if 'Test_Female' in img_path else 0
            y_true.append(true_label)

            # Display the image with the predicted label
            ax[i].imshow(img)
            ax[i].set_title(f'Pred: {labels_map[label]}\nTrue: {labels_map[true_label]}')
            ax[i].axis('off')
        plt.tight_layout()
        plt.show()

    # Akhiri hitung waktu inferensi
    inference_time = time.time() - start_time

    # Debug: Print y_true and y_pred_binary
    print("True labels:", y_true)
    print("Predicted labels:", y_pred_binary)

    # Hitung metrik
    precision = precision_score(y_true, y_pred_binary, zero_division=1)
    recall = recall_score(y_true, y_pred_binary, zero_division=1)
    f1 = f1_score(y_true, y_pred_binary, zero_division=1)

    print(f'Inference Time: {inference_time} seconds')
    print(f'Precision: {precision}')
    print(f'Recall: {recall}')
    print(f'F1-Score: {f1}')

# Contoh penggunaan fungsi
model_path = '/content/drive/MyDrive/vgg16_gender_classification_best_model2.h5'
base_folder = '/content/drive/MyDrive/Test'
predict_images_in_test_folder(model_path, base_folder)


In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.models import load_model
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.metrics import precision_score, recall_score, f1_score
import time

def predict_images_in_test_folder(model_path, base_folder, target_size=(224, 224), max_images_per_class=50):
    """
    Memperkirakan label untuk gambar yang ada di folder Test/Test_Male dan Test/Test_Female menggunakan model yang telah dilatih,
    dan menghitung waktu inferensi serta metrik evaluasi menggunakan hasil prediksi sebagai label.

    Parameters:
    - model_path: Path ke model yang telah dilatih (file .h5).
    - base_folder: Path ke folder dasar yang berisi folder 'Test_Male' dan 'Test_Female'.
    - target_size: Ukuran gambar yang diharapkan oleh model.
    - max_images_per_class: Jumlah maksimal gambar yang akan digunakan dari setiap kelas.

    Returns:
    - Tidak ada (menampilkan gambar beserta label yang diprediksi, waktu inferensi, precision, recall, dan F1-score).
    """
    # Inisialisasi ImageDataGenerator untuk rescaling
    datagen = ImageDataGenerator(rescale=1./255)

    # Muat model yang telah dilatih
    model = load_model(model_path)

    # Folder untuk kategori Test_Male dan Test_Female
    male_folder = os.path.join(base_folder, 'Test_Male')
    female_folder = os.path.join(base_folder, 'Test_Female')

    # Daftar file gambar di folder Test_Male dan Test_Female
    male_files = [os.path.join(male_folder, f) for f in os.listdir(male_folder) if f.lower().endswith(('.png', '.jpg', '.jpeg'))][:max_images_per_class]
    female_files = [os.path.join(female_folder, f) for f in os.listdir(female_folder) if f.lower().endswith(('.png', '.jpg', '.jpeg'))][:max_images_per_class]

    # Menggabungkan kembali daftar gambar
    image_files = male_files + female_files

    # Pemetaan label biner ke nama kelas
    labels_map = {0: 'Male', 1: 'Female'}

    # Mulai hitung waktu inferensi
    start_time = time.time()
    # Proses setiap gambar di folder
    y_true = []
    y_pred_binary = []

    # Ukuran batch untuk gambar per baris
    batch_size = 8
    num_batches = (len(image_files) + batch_size - 1) // batch_size

    for batch_idx in range(num_batches):
        fig, ax = plt.subplots(1, min(batch_size, len(image_files) - batch_idx * batch_size), figsize=(20, 5))
        ax = ax.flatten()
        for i in range(min(batch_size, len(image_files) - batch_idx * batch_size)):
            file = image_files[batch_idx * batch_size + i]
            img_path = file
            img = load_img(img_path, target_size=target_size)
            img_array = img_to_array(img)
            img_array = np.expand_dims(img_array, axis=0)
            img_array = datagen.flow(img_array, batch_size=1)[0]  # Preprocess the image

            # Prediksi label untuk gambar
            prediction = model.predict(img_array)
            label = int(prediction > 0.5)
            y_pred_binary.append(label)

            # Tentukan label benar
            true_label = 1 if 'Test_Female' in img_path else 0
            y_true.append(true_label)

            # Display the image with the predicted label
            ax[i].imshow(img)
            ax[i].set_title(f'Pred: {labels_map[label]}\nTrue: {labels_map[true_label]}')
            ax[i].axis('off')
        plt.tight_layout()
        plt.show()

    # Akhiri hitung waktu inferensi
    inference_time = time.time() - start_time

    # Debug: Print y_true and y_pred_binary
    print("True labels:", y_true)
    print("Predicted labels:", y_pred_binary)

    # Hitung metrik
    precision = precision_score(y_true, y_pred_binary, zero_division=1)
    recall = recall_score(y_true, y_pred_binary, zero_division=1)
    f1 = f1_score(y_true, y_pred_binary, zero_division=1)

    print(f'Inference Time: {inference_time} seconds')
    print(f'Precision: {precision}')
    print(f'Recall: {recall}')
    print(f'F1-Score: {f1}')

# Contoh penggunaan fungsi
model_path = '/content/drive/MyDrive/vgg16_gender_classification_best_model.h5'
base_folder = '/content/drive/MyDrive/Test'
predict_images_in_test_folder(model_path, base_folder)
