<a href="https://colab.research.google.com/github/bhuvan-thekkade-hub/gender--classsification--using-deeplearning/blob/main/gender_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import files

uploaded = files.upload()


Saving train_nLPp5K8.zip to train_nLPp5K8.zip


In [3]:
import zipfile
import os

# Define the uploaded zip file name
zip_file_name = 'train_nLPp5K8.zip'

# Define the directory where you want to extract the contents
extract_dir = 'extracted_data'

# Create the directory if it doesn't exist
os.makedirs(extract_dir, exist_ok=True)

# Unzip the file
with zipfile.ZipFile(zip_file_name, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)

# List the contents of the extracted directory to verify
print("Files extracted successfully to:", extract_dir)
print("Contents of the extracted directory:")
print(os.listdir(extract_dir))

Files extracted successfully to: extracted_data
Contents of the extracted directory:
['train.csv', 'images']


In [5]:
# CHANGE THIS PATH TO YOUR CORRECT PATH
# For example, if your folders are in 'extracted_data/gender_dataset', use that.
# Or if they are in 'extracted_data/train_nLPp5K8/dataset', use that path.
DATA_DIR = 'extracted_data/train_nLPp5K8/dataset'

# Now, you can proceed with the data loading code from the previous response.
# (The image_dataset_from_directory part)

In [7]:
# List the contents of the 'images' directory
images_dir = os.path.join(extract_dir, 'images')
if os.path.exists(images_dir):
    print("\nContents of the 'images' directory:")
    print(os.listdir(images_dir)[:10]) # Print only the first 10 entries for brevity
else:
    print("\n'images' directory not found.")


Contents of the 'images' directory:
['15468.jpg', '796.jpg', '17408.jpg', '6893.jpg', '11677.jpg', '5841.jpg', '28759.jpg', '27025.jpg', '248.jpg', '14424.jpg']


In [10]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import os
import pandas as pd

# Define the root directory where the zip file was extracted
extracted_root = 'extracted_data'

# Define the path to the train.csv file
train_csv_path = os.path.join(extracted_root, 'train.csv')

# Load the train.csv file into a pandas DataFrame
try:
    train_df = pd.read_csv(train_csv_path)
    print("train.csv loaded successfully.")
    print(train_df.head())
except FileNotFoundError:
    print(f"Error: train.csv not found at {train_csv_path}. Please check the extracted files.")
    train_df = None # Set train_df to None to indicate loading failed

# Now that we have the dataframe, we can proceed with constructing image paths and loading images in the next steps.
# The image_dataset_from_directory approach is not suitable for this data structure.
# We will implement the data loading manually in subsequent steps.

train.csv loaded successfully.
  image_names  class
0   11413.jpg      1
1    9863.jpg      1
2     997.jpg      0
3    5950.jpg      0
4   10454.jpg      1


In [11]:
# Define the path to the images directory
images_dir = os.path.join(extracted_root, 'images')

# Construct the full image paths
if train_df is not None:
    train_df['image_path'] = train_df['image_names'].apply(lambda x: os.path.join(images_dir, x))
    print("\nImage paths constructed successfully.")
    print(train_df.head())
else:
    print("\nCould not construct image paths because train_df was not loaded.")


Image paths constructed successfully.
  image_names  class                       image_path
0   11413.jpg      1  extracted_data/images/11413.jpg
1    9863.jpg      1   extracted_data/images/9863.jpg
2     997.jpg      0    extracted_data/images/997.jpg
3    5950.jpg      0   extracted_data/images/5950.jpg
4   10454.jpg      1  extracted_data/images/10454.jpg


In [12]:
# Define image dimensions
IMG_HEIGHT = 128
IMG_WIDTH = 128

# Function to load and preprocess images
def load_and_preprocess_image(image_path):
    # Read the image file
    img = tf.io.read_file(image_path)
    # Decode the image
    img = tf.image.decode_jpeg(img, channels=3)
    # Resize the image
    img = tf.image.resize(img, [IMG_HEIGHT, IMG_WIDTH])
    # Normalize the image to the range [0, 1]
    img = img / 255.0
    return img

# Load and preprocess all images
if train_df is not None:
    # Create a TensorFlow Dataset from the image paths and labels
    image_paths = train_df['image_path'].values
    labels = train_df['class'].values

    # Create a dataset of image paths and labels
    path_ds = tf.data.Dataset.from_tensor_slices(image_paths)
    label_ds = tf.data.Dataset.from_tensor_slices(labels)

    # Map the preprocessing function to the image paths dataset
    image_ds = path_ds.map(load_and_preprocess_image)

    # Combine the image and label datasets
    image_label_ds = tf.data.Dataset.zip((image_ds, label_ds))

    print("\nImages loaded and preprocessed successfully.")
    print("Example image and label:")
    for image, label in image_label_ds.take(1):
        print("Image shape:", image.shape)
        print("Label:", label.numpy())
else:
    print("\nCould not load and preprocess images because train_df was not loaded.")


Images loaded and preprocessed successfully.
Example image and label:
Image shape: (128, 128, 3)
Label: 1


In [14]:
# Define batch size
BATCH_SIZE = 32

# Shuffle and batch the dataset
# It's good practice to shuffle the training data
# You might want to split into train and validation sets first
# For simplicity, let's shuffle the entire dataset for now
# In a real scenario, you would split before shuffling and batching

# Get the total number of samples
DATASET_SIZE = tf.data.experimental.cardinality(image_label_ds).numpy()
print(f"\nTotal number of samples in the dataset: {DATASET_SIZE}")

# Define split ratios (e.g., 80% train, 20% validation)
train_size = int(0.8 * DATASET_SIZE)
val_size = DATASET_SIZE - train_size

# Shuffle the dataset
image_label_ds = image_label_ds.shuffle(buffer_size=DATASET_SIZE)

# Split the dataset
train_ds = image_label_ds.take(train_size)
val_ds = image_label_ds.skip(train_size)

# Batch the datasets
train_ds = train_ds.batch(BATCH_SIZE)
val_ds = val_ds.batch(BATCH_SIZE)

print(f"\nTraining dataset size: {tf.data.experimental.cardinality(train_ds).numpy() * BATCH_SIZE}")
print(f"Validation dataset size: {tf.data.experimental.cardinality(val_ds).numpy() * BATCH_SIZE}")

# You can optionally prefetch the data for better performance
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

print("\nData prepared for model training (split into train/validation and batched).")


Total number of samples in the dataset: 12196

Training dataset size: 9760
Validation dataset size: 2464

Data prepared for model training (split into train/validation and batched).


In [15]:
# Define the model architecture
model = keras.Sequential([
    keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)),
    keras.layers.MaxPooling2D((2, 2)),
    keras.layers.Conv2D(64, (3, 3), activation='relu'),
    keras.layers.MaxPooling2D((2, 2)),
    keras.layers.Conv2D(128, (3, 3), activation='relu'),
    keras.layers.MaxPooling2D((2, 2)),
    keras.layers.Flatten(),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(1, activation='sigmoid') # Sigmoid for binary classification
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Print the model summary
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [16]:
# Define the number of training epochs
EPOCHS = 10  # You can adjust this number

# Train the model
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS
)

print("\nModel training complete.")

Epoch 1/10
[1m305/305[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 48ms/step - accuracy: 0.6341 - loss: 0.6104 - val_accuracy: 0.8012 - val_loss: 0.4158
Epoch 2/10
[1m305/305[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 19ms/step - accuracy: 0.8066 - loss: 0.4207 - val_accuracy: 0.8439 - val_loss: 0.3533
Epoch 3/10
[1m305/305[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 18ms/step - accuracy: 0.8305 - loss: 0.3663 - val_accuracy: 0.8660 - val_loss: 0.2892
Epoch 4/10
[1m305/305[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 18ms/step - accuracy: 0.8647 - loss: 0.3049 - val_accuracy: 0.8861 - val_loss: 0.2585
Epoch 5/10
[1m305/305[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 19ms/step - accuracy: 0.8882 - loss: 0.2561 - val_accuracy: 0.9008 - val_loss: 0.2244
Epoch 6/10
[1m305/305[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 18ms/step - accuracy: 0.9113 - loss: 0.2105 - val_accuracy: 0.9086 - val_loss: 0.2202
Epoch 7/10
[1m305/3