In [1]:
# Dependencies
import tensorflow as tf
from keras import layers
import numpy as np
import cv2
import os
import csv

In [2]:
image = cv2.imread(r"C:\Users\johns\OneDrive\Desktop\ML\MLData\CELEBA_dataset\img_align_celeba\000001.jpg", 0)

# The 2D Matrix of the image values. 
npimage = np.array(image)
np.set_printoptions()
print("First Image, Grey Scale")
print(np.shape(npimage))
print(npimage)

First Image, Grey Scale
(218, 178)
[[233 233 233 ... 232 241 241]
 [233 233 233 ... 234 241 241]
 [233 233 233 ... 236 242 242]
 ...
 [ 88  63  93 ...  72  73  73]
 [ 77  85 113 ...  66  68  68]
 [115 151 192 ...  66  68  68]]


# Dividing up the dataset (Priminary)
Total image count: 202,599

Preliminary Training:   000001.jpg - 010000.jpg Count: 10,000 <br>

Image dimensions 178 x 218

Goal: Hair Detection

Based on the "Bald" Annotation 

In [3]:
labels_csv_path = r"C:\Users\johns\OneDrive\Desktop\ML\MLData\CELEBA_dataset\CELEBA_annotations\list_attr_celeba_CSV.csv"   # CSV File created with celeb attributes .txt file
image_dataset_directory = r"C:\Users\johns\OneDrive\Desktop\ML\MLData\CELEBA_dataset\img_align_celeba"                      # File path to image dataset

# open csv and read the file names and labels
with open(labels_csv_path, 'r') as file:
    reader = csv.reader(file)
    next(reader) # skip first header row

    # Create lists to store file names and labels
    file_names = []
    labels = []

    for i, line in enumerate(reader):
        if i>= 20000:                   # The first 10000 values
            break

        file_name = line[0]             # 0 is where the file name is
        label = line[5]                # 36 is where the Wearing_Hat label is

        file_names.append(file_name)    # ADDING FILE NAME TO LIST
        labels.append(label)            # ADDING LABEL TO LIST  
# Load the images and the labels
images = []
for file_name in file_names:
    image_path = f"{image_dataset_directory}\{file_name}"
    img = cv2.imread(image_path, 0) # Grayscale read, shouldn't need color for hat detection
    images.append(img)

images = np.array(images)
images = images / 255.0 # Normalize to 0 to 1
labels_pre = np.array(labels)
labels = [1 if label == '1' else -1 for label in labels_pre] # NO string format
labels = np.array(labels)
# Replacing -1 label with 0 label
for index, value in enumerate(labels):
    if value == -1:
        labels[index] = 0

'''
images --> all images as grayscale in matrix format
labels --> all labels as 0 for bald and 1 for hair
'''

'\nimages --> all images as grayscale in matrix format\nlabels --> all labels as 0 for bald and 1 for hair\n'

In [4]:
# images --> a 10000 length list where each item is a array with gray scale values normalized 0 to 1
# labels --> a 10000 length list where each index is 1 or -1 corresponding to images
image_shape = np.shape(images) # (10000, 218, 178)
label_shape = np.shape(labels) # (10000,)
input_shape = (image_shape[1], image_shape[2], 1)
print("Example Data: Images[0]: ")
print(images[0])
print("Example Data: Labels")
print(labels)

Example Data: Images[0]: 
[[0.91372549 0.91372549 0.91372549 ... 0.90980392 0.94509804 0.94509804]
 [0.91372549 0.91372549 0.91372549 ... 0.91764706 0.94509804 0.94509804]
 [0.91372549 0.91372549 0.91372549 ... 0.9254902  0.94901961 0.94901961]
 ...
 [0.34509804 0.24705882 0.36470588 ... 0.28235294 0.28627451 0.28627451]
 [0.30196078 0.33333333 0.44313725 ... 0.25882353 0.26666667 0.26666667]
 [0.45098039 0.59215686 0.75294118 ... 0.25882353 0.26666667 0.26666667]]
Example Data: Labels
[0 0 0 ... 0 0 0]


In [5]:
# First Testing Model

model = tf.keras.Sequential ([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape = (218, 178, 1)),
    layers.MaxPooling2D(pool_size=(2,2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2,2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2,2,)),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 216, 176, 32)      320       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 108, 88, 32)      0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 106, 86, 64)       18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 53, 43, 64)       0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 51, 41, 128)       73856     
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 25, 20, 128)      0

In [7]:
# Compile Model
with tf.device('/GPU:0'):
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    # Train Model
    model.fit(images, labels, batch_size = 20, epochs = 10, validation_split = 0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [8]:
# including dropout within convolutional layers and dense layers
# First Testing Model

model = tf.keras.Sequential ([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape = (218, 178, 1)),
    layers.MaxPooling2D(pool_size=(2,2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2,2)),
    layers.Dropout(0.1),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2,2,)),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.1),
    layers.Dense(64, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_3 (Conv2D)           (None, 216, 176, 32)      320       
                                                                 
 max_pooling2d_3 (MaxPooling  (None, 108, 88, 32)      0         
 2D)                                                             
                                                                 
 conv2d_4 (Conv2D)           (None, 106, 86, 64)       18496     
                                                                 
 max_pooling2d_4 (MaxPooling  (None, 53, 43, 64)       0         
 2D)                                                             
                                                                 
 dropout (Dropout)           (None, 53, 43, 64)        0         
                                                                 
 conv2d_5 (Conv2D)           (None, 51, 41, 128)      

In [9]:
# Compile Model
with tf.device('/GPU:0'):
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    # Train Model
    model.fit(images, labels, batch_size = 20, epochs = 10, validation_split = 0.2)