<a href="https://colab.research.google.com/github/fjadidi2001/DataScienceJourney/blob/master/Gender_classification_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.preprocessing import image
import cv2
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import zipfile
from google.colab import drive

In [2]:
# 1. Import the required libraries (done above)
print("Libraries imported successfully!")

Libraries imported successfully!


In [3]:
# 2. Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
zip_path = '/content/drive/MyDrive/gender_dataset_face.zip'
extract_path = '/content/gender_dataset'

In [5]:
if not os.path.exists(extract_path):
    os.makedirs(extract_path)

    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)
    print("Dataset extracted successfully!")
else:
    print("Dataset directory already exists!")


Dataset extracted successfully!


In [9]:
# 3. Load the Dataset
dataset_path = '/content/gender_dataset/gender_dataset_face'

In [12]:
# 4. Convert images to arrays and create labels
def load_dataset(dataset_path, target_size=(224, 224)):
    images = []
    labels = []

    # Assuming dataset structure is: dataset_path/male and dataset_path/female
    male_path = os.path.join(dataset_path, 'man')
    female_path = os.path.join(dataset_path, 'woman')

    # Load male images
    print("Loading male images...")
    for img_name in tqdm(os.listdir(male_path)):
        if img_name.endswith(('.jpg', '.jpeg', '.png')):
            img_path = os.path.join(male_path, img_name)
            img = cv2.imread(img_path)
            if img is not None:
                img = cv2.resize(img, target_size)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                images.append(img)
                labels.append(1)  # 1 for male

    # Load female images
    print("Loading female images...")
    for img_name in tqdm(os.listdir(female_path)):
        if img_name.endswith(('.jpg', '.jpeg', '.png')):
            img_path = os.path.join(female_path, img_name)
            img = cv2.imread(img_path)
            if img is not None:
                img = cv2.resize(img, target_size)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                images.append(img)
                labels.append(0)  # 0 for female

    return np.array(images), np.array(labels)

In [13]:
X, y = load_dataset(dataset_path)
print(f"Dataset loaded! X shape: {X.shape}, y shape: {y.shape}")

Loading male images...


100%|██████████| 1173/1173 [00:01<00:00, 835.40it/s]


Loading female images...


100%|██████████| 1134/1134 [00:00<00:00, 1183.69it/s]


Dataset loaded! X shape: (2307, 224, 224, 3), y shape: (2307,)


In [15]:
# Normalize pixel values
X = X / 255.0

In [16]:
# 5. Apply Train and Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
print(f"Train set: {X_train.shape}, Test set: {X_test.shape}")

Train set: (1845, 224, 224, 3), Test set: (462, 224, 224, 3)


In [17]:
# 6. Implement Data Augmentation
train_datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

train_generator = train_datagen.flow(
    X_train, y_train,
    batch_size=32
)