## AGE DETECTION

### Data Preparation

In [1]:
import os
import pandas as pd
import numpy as np

In [2]:
faces_path = '../../age_detection/age_gender_race/'
faces_image_names = os.listdir(faces_path)

In [3]:
print(faces_image_names[0])#name of image => age , gender , race , date
len(faces_image_names) #no of image 

65_0_0_20170103183632050.jpg.chip.jpg


23708

In [4]:
def label_name_split(image_name): #functions to split the name into labels
    image_labels = image_name.split('_')
    age = image_labels[0]
    gender = image_labels[1]
    race = image_labels[2]
    return age, gender, race

print(label_name_split(faces_image_names[0]))

('65', '0', '0')


In [5]:
#dictionary to map image name to their labels
faces_age_labels = np.array([])
faces_ages_df = pd.DataFrame(columns = ['image','age','target'],dtype='int')
for i , image in enumerate(faces_image_names):
    age, gender, race = label_name_split(image)
    faces_age_labels = np.append(faces_age_labels,age)
    faces_ages_df.loc[i,'image'] = faces_path + image
    faces_ages_df.loc[i,'age'] = age
    faces_ages_df.loc[i,'target'] = 0

In [6]:
unique_ages , unique_ages_count = np.unique(faces_age_labels,return_counts=True) #unqiue age and how many images are there

In [7]:
#dictionary ages and how many image of that image
ages_count = {}
for age,count in zip(unique_ages,unique_ages_count):
    ages_count[age] = count
images_df = pd.DataFrame(ages_count.values(), index=ages_count.keys(), columns=['images'])
print(images_df.head)

<bound method NDFrame.head of      images
1      1123
10      156
100      11
101       2
103       1
..      ...
92       13
93        5
95        9
96       17
99        9

[104 rows x 1 columns]>


### Label for age and Class Imabalance 

Age-ranges (classes)<br>
0  1 - 4	
1   5 - 18	
2	19 - 24	
3	25 - 26	
4	27 - 30	
5	31 - 35	
6	36 - 42	
7	43 - 53	
8	54 - 65	
9	66 - 116

In [8]:
import pandas as pd

In [9]:
def split_classes(ser, n_classes):
     
    ages_label = {}
    
    n_images = int(sum(ser) / n_classes)

    classes_df = pd.DataFrame()

    age_index = 0

    for i in range(n_classes):
        if age_index<=103:
            age_start = ser.index[age_index]
            age_current = ser.index[age_index]
        else:
            break

        class_images = 0
        
        while class_images < n_images:
            class_images += ser[age_current]
            age_index += 1

            if age_index<=103:
                age_current = ser.index[age_index]
            else:
                break
                
        if age_index<=104:
            age_end = ser.index[age_index-1]
        else:
            break
            
        ages_label[i] = {
            'start':age_start,
            'end':age_end
        }
        classes_df.loc[i, 'age(start)'] = age_start 
        classes_df.loc[i, 'age(end)'] = age_end
        classes_df.loc[i, 'class balance'] = round((class_images / sum(ser)) * 100, 2)
    
    return classes_df , ages_label

In [10]:
a,label_age = split_classes(images_df['images'],11)
print(a)
label_age

  age(start) age(end)  class balance
0          1       16           9.14
1         17       23          10.34
2         24       26          15.99
3         27        3          10.09
4         30       34           9.66
5         35       39           9.48
6          4       47           9.35
7         48       56           9.90
8         57       70           9.22
9         71       99           6.84


{0: {'start': '1', 'end': '16'},
 1: {'start': '17', 'end': '23'},
 2: {'start': '24', 'end': '26'},
 3: {'start': '27', 'end': '3'},
 4: {'start': '30', 'end': '34'},
 5: {'start': '35', 'end': '39'},
 6: {'start': '4', 'end': '47'},
 7: {'start': '48', 'end': '56'},
 8: {'start': '57', 'end': '70'},
 9: {'start': '71', 'end': '99'}}

### save ages dataset

In [11]:
faces_ages_df.head(10)

Unnamed: 0,image,age,target
0,../../age_detection/age_gender_race/65_0_0_201...,65,0.0
1,../../age_detection/age_gender_race/71_0_0_201...,71,0.0
2,../../age_detection/age_gender_race/38_0_1_201...,38,0.0
3,../../age_detection/age_gender_race/16_0_0_201...,16,0.0
4,../../age_detection/age_gender_race/1_1_4_2017...,1,0.0
5,../../age_detection/age_gender_race/60_0_3_201...,60,0.0
6,../../age_detection/age_gender_race/26_1_2_201...,26,0.0
7,../../age_detection/age_gender_race/22_1_2_201...,22,0.0
8,../../age_detection/age_gender_race/1_0_2_2016...,1,0.0
9,../../age_detection/age_gender_race/32_0_0_201...,32,0.0


In [12]:
def class_labels(age):
    if 1 <= age <= 2:
        return 0
    elif 3 <= age <= 9:
        return 1
    elif 10 <= age <= 20:
        return 2
    elif 21 <= age <= 27:
        return 3
    elif 28 <= age <= 45:
        return 4
    elif 46 <= age <= 65:
        return 5
    else:
        return 6
faces_ages_df['age'] = faces_ages_df['age'].astype(int)
faces_ages_df['target'] = faces_ages_df['age'].map(class_labels)

In [13]:
faces_ages_df.head(10)

Unnamed: 0,image,age,target
0,../../age_detection/age_gender_race/65_0_0_201...,65,5
1,../../age_detection/age_gender_race/71_0_0_201...,71,6
2,../../age_detection/age_gender_race/38_0_1_201...,38,4
3,../../age_detection/age_gender_race/16_0_0_201...,16,2
4,../../age_detection/age_gender_race/1_1_4_2017...,1,0
5,../../age_detection/age_gender_race/60_0_3_201...,60,5
6,../../age_detection/age_gender_race/26_1_2_201...,26,3
7,../../age_detection/age_gender_race/22_1_2_201...,22,3
8,../../age_detection/age_gender_race/1_0_2_2016...,1,0
9,../../age_detection/age_gender_race/32_0_0_201...,32,4


### Data Augmentation

In [14]:
# Making a new folder to save all augmented images as a new dataset.
new_path = os.listdir("./age_dataset")
train_df = faces_ages_df
train_aug_df = pd.DataFrame(columns=train_df.columns)
train_aug_df

Unnamed: 0,image,age,target


In [15]:
# Running a for-loop through all the images in train_df and augmenting them.

for i in range(train_df.shape[0]):

    # Reading the image filepath, name, age and target values.
    img_path = train_df.loc[i, 'filename']
    img_name = img_path.split("/")[-1]
    img_age = train_df.loc[i, 'age']
    img_target = train_df.loc[i, 'target']

    # Reading the image from the filepath and converting it to proper colour format in cv2.
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

    # Creating the augmented versions of the image as shown above.
    img_rot_pos40 = ndimage.rotate(img, 40, reshape=False)
    img_rot_pos20 = ndimage.rotate(img, 20, reshape=False)
    img_rot_neg20 = ndimage.rotate(img, -20, reshape=False)
    img_rot_neg40 = ndimage.rotate(img, -40, reshape=False)
    img_fliplr = np.fliplr(img)
    img_fliplr_rot_pos40 = ndimage.rotate(img_fliplr, 40, reshape=False)
    img_fliplr_rot_pos20 = ndimage.rotate(img_fliplr, 20, reshape=False)
    img_fliplr_rot_neg20 = ndimage.rotate(img_fliplr, -20, reshape=False)
    img_fliplr_rot_neg40 = ndimage.rotate(img_fliplr, -40, reshape=False)

    # Creating new image names for the augmented images.
    img_name_wo_jpg = img_name.split(".")[0]
    img_name2 = img_name_wo_jpg+"_rot_pos40.jpg"
    img_name3 = img_name_wo_jpg+"_rot_pos20.jpg"
    img_name4 = img_name_wo_jpg+"_rot_neg20.jpg"
    img_name5 = img_name_wo_jpg+"_rot_neg40.jpg"
    img_name6 = img_name_wo_jpg+"_fliplr.jpg"
    img_name7 = img_name_wo_jpg+"_fliplr_rot_pos40.jpg"
    img_name8 = img_name_wo_jpg+"_fliplr_rot_pos20.jpg"
    img_name9 = img_name_wo_jpg+"_fliplr_rot_neg20.jpg"
    img_name10 = img_name_wo_jpg+"_fliplr_rot_neg40.jpg"

    # Saving the original image and all the augmented images into a new folder combined_faces_train_augmented.
    img_dest1 = os.path.join(new_path, img_name)
    cv2.imwrite(img_dest1, img, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
    img_dest2 = os.path.join(new_path, img_name2)
    cv2.imwrite(img_dest2, img_rot_pos40, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
    img_dest3 = os.path.join(new_path, img_name3)
    cv2.imwrite(img_dest3, img_rot_pos20, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
    img_dest4 = os.path.join(new_path, img_name4)
    cv2.imwrite(img_dest4, img_rot_neg20, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
    img_dest5 = os.path.join(new_path, img_name5)
    cv2.imwrite(img_dest5, img_rot_neg40, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
    img_dest6 = os.path.join(new_path, img_name6)
    cv2.imwrite(img_dest6, img_fliplr, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
    img_dest7 = os.path.join(new_path, img_name7)
    cv2.imwrite(img_dest7, img_fliplr_rot_pos40, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
    img_dest8 = os.path.join(new_path, img_name8)
    cv2.imwrite(img_dest8, img_fliplr_rot_pos20, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
    img_dest9 = os.path.join(new_path, img_name9)
    cv2.imwrite(img_dest9, img_fliplr_rot_neg20, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
    img_dest10 = os.path.join(new_path, img_name10)
    cv2.imwrite(img_dest10, img_fliplr_rot_neg40, [int(cv2.IMWRITE_JPEG_QUALITY), 100])

    # Creating a new temp_df for the augmented images (similar to train_df).
    temp_df = pd.DataFrame(columns=train_df.columns)
    temp_df.loc[1] = ["/content"+img_dest1, img_age, img_target]
    temp_df.loc[2] = ["/content"+img_dest2, img_age, img_target]
    temp_df.loc[3] = ["/content"+img_dest3, img_age, img_target]
    temp_df.loc[4] = ["/content"+img_dest4, img_age, img_target]
    temp_df.loc[5] = ["/content"+img_dest5, img_age, img_target]
    temp_df.loc[6] = ["/content"+img_dest6, img_age, img_target]
    temp_df.loc[7] = ["/content"+img_dest7, img_age, img_target]
    temp_df.loc[8] = ["/content"+img_dest8, img_age, img_target]
    temp_df.loc[9] = ["/content"+img_dest9, img_age, img_target]
    temp_df.loc[10] = ["/content"+img_dest10, img_age, img_target]

    # Concatenating temp_df with the new train_aug_df.
    train_aug_df = pd.concat([train_aug_df, temp_df], axis=0, ignore_index=True)

    # Keeping track of progress and printing relevant statements for the user.
    if (i+1) % 500 == 0:
        print(f"Images augmented: {i+1} of {train_df.shape[0]}")

print("\nDone augmenting all training dataset images and saved them into combined_faces_train_augmented.")

KeyError: 'filename'