In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import opendatasets as od
import tensorflow as tf
import cv2
from tqdm import *



In [2]:
dataset_directory = "C:\\Users\\amith\\Documents\\Datasets\\kaggle\\kaggle".replace("\\","/")
csv_directory = "C:\\Users\\amith\\Documents\\Datasets".replace("\\","/")

In [3]:
train_val_age = pd.read_csv(csv_directory + "/train_age.csv").sort_values(by="imageId")
train_val_gender = pd.read_csv(csv_directory + "/train_gender.csv").sort_values(by="imageId")

id = train_val_age["imageId"].values.tolist()
train_val_files = pd.DataFrame(columns=["imageId","Files"])
ctr = 0

for i in os.listdir(dataset_directory+"/train"):
    train_val_files.loc[ctr,"imageId"] = id[ctr]
    train_val_files.loc[ctr,"Files"] = i
    ctr += 1

train_val_files = train_val_files.sort_values(by="imageId")

train_val_output = pd.merge(left=train_val_age,right=train_val_gender,on="imageId",how="inner")
train_val_output = pd.merge(left=train_val_output,right=train_val_files,on="imageId",how="inner")

train_val_output = train_val_output.sort_values(by="imageId")

train_val_id = train_val_output["imageId"].values.tolist()
np.random.shuffle(train_val_id)


threshold = int(np.round(0.8 * len(train_val_id)))

train_id = train_val_id[0:threshold]
val_id = train_val_id[threshold:]

train_val_output.set_index(keys="imageId",drop=False,inplace=True)

train_output = train_val_output.loc[train_id,:]
val_output = train_val_output.loc[val_id,:]

train_output = train_output.reset_index(drop=True).drop(columns=["imageId"])
val_output = val_output.reset_index(drop=True).drop(columns=["imageId"])

In [4]:
train_output.head()

Unnamed: 0,age,gender,Files
0,64.0,1,004945.png
1,72.0,1,010345.png
2,29.0,1,008066.png
3,24.0,1,006394.png
4,41.0,0,009885.png


In [5]:
val_output.head()

Unnamed: 0,age,gender,Files
0,69.0,1,005835.png
1,28.0,1,008981.png
2,67.0,0,003344.png
3,73.0,1,005203.png
4,28.0,1,009086.png


In [6]:
train_images = []
train_data = {}
ctr = 0
for i in tqdm(iterable=train_output["Files"],desc="Processing train data"):
    ages = train_output.loc[ctr,"age"]
    genders = train_output.loc[ctr,"gender"]
    img_org = cv2.imread(dataset_directory + "/train/" + i)
    img_gray = cv2.cvtColor(src=img_org, code=cv2.COLOR_BGR2GRAY)
    img_short = cv2.resize(src=img_gray, dsize=(64,64))
    img = img_short / 255.0
    train_images.append(img)
    ctr += 1

train_images = np.array(train_images)

Processing train data: 100%|██████████| 8562/8562 [03:03<00:00, 46.70it/s]


In [7]:
dataset_directory + "/" + i

'C:/Users/amith/Documents/Datasets/kaggle/kaggle/003431.png'

In [8]:
dataset_directory + "/val/" + i

'C:/Users/amith/Documents/Datasets/kaggle/kaggle/val/003431.png'

In [9]:
val_images = []
val_data = {}
ctr = 0
for i in tqdm(iterable=val_output["Files"],desc="Processing val data"):
    ages = val_output.loc[ctr,"age"]
    genders = val_output.loc[ctr,"gender"]
    img_org = cv2.imread(dataset_directory + "/train/" + i)
    img_gray = cv2.cvtColor(src=img_org, code=cv2.COLOR_BGR2GRAY)
    img_short = cv2.resize(src=img_gray, dsize=(64,64))
    img = img_short / 255.0
    val_images.append(img)
    ctr += 1

val_images = np.array(val_images)

Processing val data: 100%|██████████| 2140/2140 [00:41<00:00, 52.14it/s]


In [10]:
train_images = train_images.reshape(tuple(list(train_images.shape) + [1]))
val_images = val_images.reshape(tuple(list(val_images.shape) + [1]))

In [11]:
val_images.shape

(2140, 64, 64, 1)

In [12]:
model_genders = tf.keras.models.Sequential(layers=[
    tf.keras.layers.Conv2D(filters=16,kernel_size=(3,3),activation="relu",input_shape=(64,64,1)),
    tf.keras.layers.Conv2D(filters=32,kernel_size=(3,3),activation="relu",input_shape=(64,64,1)),
    tf.keras.layers.Conv2D(filters=64,kernel_size=(3,3),activation="relu",input_shape=(64,64,1)),
    tf.keras.layers.Conv2D(filters=64,kernel_size=(3,3),activation="relu",input_shape=(64,64,1)),
    
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(units=64,activation="relu"),
    tf.keras.layers.Dense(units=1,activation="softmax")
])



In [13]:
model_ages = tf.keras.models.Sequential(layers=[
    tf.keras.layers.Conv2D(filters=16,kernel_size=(3,3),activation="relu",input_shape=(64,64,1)),
    tf.keras.layers.Conv2D(filters=32,kernel_size=(3,3),activation="relu",input_shape=(64,64,1)),
    tf.keras.layers.Conv2D(filters=64,kernel_size=(3,3),activation="relu",input_shape=(64,64,1)),
    tf.keras.layers.Conv2D(filters=64,kernel_size=(3,3),activation="relu",input_shape=(64,64,1)),

    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(units=64,activation="relu"),
    tf.keras.layers.Dense(units=1,activation="linear")
])

In [14]:
train_ages = train_output["age"].values
train_genders = train_output["gender"].values

val_ages = val_output["age"].values
val_genders = val_output["gender"].values

In [15]:
model_genders.compile(optimizer=tf.keras.optimizers.Adam(),loss=tf.keras.losses.binary_crossentropy,metrics=['accuracy'])
model_ages.compile(optimizer=tf.keras.optimizers.SGD(),loss=tf.keras.losses.mean_absolute_error, metrics=["mae"])

In [None]:
model_genders.fit(x=train_images,y=train_genders,batch_size=32,epochs=10,validation_data=(val_images,val_genders),verbose=1,shuffle=True,callbacks=tf.keras.callbacks.EarlyStopping(patience=2))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10

In [None]:
model_ages.fit(x=train_images,y=train_ages,batch_size=32,epochs=10,validation_data=(val_images,val_ages),verbose=1,shuffle=True,callbacks=tf.keras.callbacks.EarlyStopping(patience=2))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100

In [21]:
tf.keras.models.save_model(model=model_genders,filepath="C:/Users/amith/Documents/Datasets/model_genders.h5",overwrite=True)
tf.keras.models.save_model(model=model_ages,filepath="C:/Users/amith/Documents/Datasets/model_ages.h5",overwrite=True)

  tf.keras.models.save_model(model=model_genders,filepath="C:/Users/amith/Documents/Datasets/model_genders.h5",overwrite=True)
  tf.keras.models.save_model(model=model_ages,filepath="C:/Users/amith/Documents/Datasets/model_ages.h5",overwrite=True)


In [27]:
print(dataset_directory + "/test/")
os.listdir(test_location)

C:/Users/amith/Documents/Datasets/kaggle/kaggle/test/


['000000.png',
 '000001.png',
 '000002.png',
 '000003.png',
 '000004.png',
 '000005.png',
 '000006.png',
 '000007.png',
 '000008.png',
 '000009.png',
 '000010.png',
 '000011.png',
 '000012.png',
 '000013.png',
 '000014.png',
 '000015.png',
 '000016.png',
 '000017.png',
 '000018.png',
 '000019.png',
 '000020.png',
 '000021.png',
 '000022.png',
 '000023.png',
 '000024.png',
 '000025.png',
 '000026.png',
 '000027.png',
 '000028.png',
 '000029.png',
 '000030.png',
 '000031.png',
 '000032.png',
 '000033.png',
 '000034.png',
 '000035.png',
 '000036.png',
 '000037.png',
 '000038.png',
 '000039.png',
 '000040.png',
 '000041.png',
 '000042.png',
 '000043.png',
 '000044.png',
 '000045.png',
 '000046.png',
 '000047.png',
 '000048.png',
 '000049.png',
 '000050.png',
 '000051.png',
 '000052.png',
 '000053.png',
 '000054.png',
 '000055.png',
 '000056.png',
 '000057.png',
 '000058.png',
 '000059.png',
 '000060.png',
 '000061.png',
 '000062.png',
 '000063.png',
 '000064.png',
 '000065.png',
 '000066.p

In [30]:
test_images = []

ctr = 0
for i in tqdm(iterable=sorted(os.listdir(dataset_directory + "/test/")),desc="Processing test data"):
    img_org = cv2.imread(dataset_directory + "/test/" + i)
    img_gray = cv2.cvtColor(src=img_org, code=cv2.COLOR_BGR2GRAY)
    img_short = cv2.resize(src=img_gray, dsize=(64,64))
    img = img_short / 255.0
    test_images.append(img)
    ctr += 1

test_images = np.array(test_images)

Processing test data: 100%|██████████| 11747/11747 [04:08<00:00, 47.31it/s]


In [32]:
test_ages = pd.read_csv("C:\\Users\\amith\\Documents\\Datasets\\sample_submission_age.csv")
test_genders = pd.read_csv("C:\\Users\\amith\\Documents\\Datasets\\sample_submission_gender.csv")

In [33]:
model_ages.predict(test_images)



array([[0.],
       [0.],
       [0.],
       ...,
       [0.],
       [0.],
       [0.]], dtype=float32)