FG-NET dataset by Yanwei Fu<br>
I do not own this dataset.<br>
This dataset is just used for research purpose.

This is the FG-NET data. Obviously, the original FG-NET website does not provide this data any more.<br>
I provide them in my homepage. Cory (Ke Chen) gave me this data which is used in his paper

[1] Y. Fu et al., ``Robust Subjective Visual Property Prediction from Crowdsourced Pairwise Labels,`` IEEE TPAMI, 2016.<br>
[2] Y. Fu et al., ``Interestingness Prediction by Robust Learning to Rank,`` ECCV, 2014.<br>
[3] K. Chen et al., ``Cumulative Attribute Space for Age and Crowd Density Estimation,`` CVPR, 2013.

To download all data: http://yanweifu.github.io/FG_NET_data/FGNET.zip

The data explanations: (available if you start with http://www.eecs.qmul.ac.uk/~yf300/FG_NET_data/)<br>
./images folder: all human face images.<br>
The groundtruth is used to name each image.<br>
For example, 078A11.JPG, means that this is the No.'78' person's image when he/she was 11 years old.<br>
'A' is short for Age.

./points folder: this is the 68 manual annotated points for each image in ./images folder.<br>
The annotated data is of much higher quality than another dataset e.g. MORPH (saved in /export/beware/thumper/yf300/Age_estimation_org_data_backup/ageEstimation/MOPRH).<br>
However, MORPH is much bigger dataset than FG-NET.

./feature_generation_tools: this is the tool to generate the features.<br>
./feature_generation_tools/how-to-use-it: tutorial of how to use the tools.<br>
./age50_10_round.mat is the 10 rounds of data used in my work [1].<br>
Normally, you should firstly split the training/testing data by yourself.<br>
And generate the low-level feature for training/testing data respectively.<br>
For each split, the training/testing features are not the same.<br>
Because the process of generating training features is also needed to refer the annotations of testing features.

There is another very good tutorial and matlab labelling tool for AAM/ASM.<br>
You can download it from: http://yanweifu.github.io/FG_NET_data/AAM_verygood.rar<br>
But some of them were written in Chinese.

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import cv2
import os


def read_pts(fname):
    f = open(fname)
    lines = f.readlines()
    n_points = int(lines[1].split(':')[1].strip())
    
    data_lines = lines[3:(n_points+3):1]
    points = []
    for line in data_lines:
        coords = line.strip().split()
        points.append(list(map(float, coords)))
        
    f.close()    
    return np.array(points)


def FGNET_DataLoader():
    cwd = os.getcwd()
    path = [cwd + "\\FGNET\\images\\", cwd + "\\FGNET\\points\\"]
    
    file_list_jpg = [file for file in os.listdir(path[0]) if file.endswith(".JPG")]
    file_list_pts = [file for file in os.listdir(path[1]) if file.endswith(".pts")]

    face_list, age_list, pid_list = [], [], []
    for file_name_jpg, file_name_pts in zip(file_list_jpg, file_list_pts):
        img = cv2.imread(os.path.join(path[0], file_name_jpg), cv2.IMREAD_GRAYSCALE)  # cv2.IMREAD_COLOR

        ### (Start) Face Alignment
        landmarks = read_pts(os.path.join(path[1], file_name_pts))
        # 31: Left Pupil
        # 36: Right Pupil
        pupils = landmarks[[31, 36], :]
        
        delta_x, delta_y = pupils[1] - pupils[0]
        angle = np.arctan(delta_y / delta_x) * 180 / np.pi
        
        h, w = img.shape
        (cx, cy) = (h // 2, w // 2)

        rotation_matrix = cv2.getRotationMatrix2D(center=(cx, cy), angle=angle, scale=1)
        img = cv2.warpAffine(img, rotation_matrix, dsize=(img.shape[1], img.shape[0]))
        ### (End) Face Alignment

        ### (Start) Face Cropping
        landmarks = np.transpose(np.dot(rotation_matrix[:, 0:2], np.transpose(landmarks))) + np.transpose(rotation_matrix[:, 2])
        
        x_min, y_min = np.min(landmarks[:, 0]).astype(int), np.min(landmarks[:, 1]).astype(int)
        x_max, y_max = np.max(landmarks[:, 0]).astype(int), np.max(landmarks[:, 1]).astype(int)

        height = y_max - y_min + 1
        width = x_max - x_min + 1
        if height > width:
            diff = height - width
            pad = diff // 2
            img = img[y_min:(y_max + 1), (x_min - pad):(x_max + 1 + (diff - pad))]
        elif height < width:
            diff = width - height
            pad = diff // 2
            img = img[(y_min - pad):(y_max + 1 + (diff - pad)), x_min:(x_max + 1)]
        else:
            img = img[y_min:(y_max + 1), x_min:(x_max + 1)]

        ### (End) Face Cropping

        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
        img = clahe.apply(img)
        
        img = cv2.resize(src=img, dsize=(48, 48), interpolation=cv2.INTER_LINEAR)
        
        face_list.append(img)                      # Face Image
        age_list.append(int(file_name_jpg[4:6]))   # Age
        pid_list.append(int(file_name_jpg[0:3]))   # Person ID for Leave One Person Out(LOPO)
    
    return face_list, age_list, pid_list


def DrawImages(img, ratio=1):
    n = len(img)
    rows = int(np.ceil(n/10))
    if rows < 2:
        cols = n
    else:
        cols = 10
    
    plt.figure(figsize=(cols * ratio, rows * ratio))
    for j in range(rows):
        for k in range(cols):
            if j * 10 + k < n:
                plt.subplot(rows, cols, j * 10 + k + 1)
                plt.imshow(img[j * 10 + k], cmap="gray")
                plt.axis("off")
    
    plt.show()
    

def VisualizeCounts(arr, xaxis_name):
    pos, height = np.unique(arr, return_counts=True)
    plt.figure()
    plt.bar(pos, height, color="royalblue")
    plt.xlabel(xaxis_name)
    plt.ylabel("The Number of Samples")  # Count
    plt.grid(True)
    plt.show()
    

def VisualizeScatter(arr1, arr2):
    plt.figure(figsize=(10, 3))
    plt.scatter(arr1, arr2, s=0.8, c="blue")
    plt.xlabel("Person ID")
    plt.ylabel("Age")
    plt.grid(True)
    plt.show()    

In [None]:
if not (os.path.exists(os.getcwd() + "\\FGNET\\npz\\")):
    os.mkdir(os.getcwd() + "\\FGNET\\npz")

x, y, pid = FGNET_DataLoader()
np.savez_compressed(file=os.getcwd() + "\\FGNET\\npz\\fgnet.npz", x=x, y=y, pid=pid)

In [None]:
data = np.load(file=os.getcwd() + "\\FGNET\\npz\\fgnet.npz")
x = data['x']
y = data['y']
pid = data["pid"]

VisualizeCounts(y, "Age")

In [None]:
VisualizeCounts(pid, "Person ID")

In [None]:
VisualizeScatter(pid, y)

In [None]:
DrawImages(x)