In [None]:
import scipy.io as sio
import numpy as np
from PIL import Image
import os
import h5py
import pandas as pd

from DataSource import DataSource

dataSource = DataSource()

selected = {
    "A":"Across_Back", # this is not A in paper.
    "B":"NeckBase_Circ",
    "C":"Torso_Height", # not in male
    "D":"BUST_Circ",
    "E":"NaturalWAIST_Circ",
    "F":"HIP_Circ",
    "G":"Wrist_Circ",
    "H":"Bicep_Circ",
    "I":"Elbow_Circ",
    "J":"Shoulder_to_Wrist",
    "K":"Inseam",
    "L":"Thigh_Circ",
    "M":"Calf_Circ",
    "N":"Ankle_Circ",
    "O":"Head_Top_Height",
    "P":"Shoulder_to_Shoulder",
}

In [None]:
def load_one_mat_file(fn, name):
    mat = sio.loadmat(fn)
    data = mat['s'][0,0]
    result, names = [], []
    result.append(name)
    names.append("name")
    for name in enumerate(data.dtype.names):
        result.append(data[name[1]][0,0])
        names.append(name[1])
    return result, names

def load_one_pic(fn, resize=(224, 224), channel=1):
    img = Image.open(fn)
    img = img.resize(resize)
    npimg = np.asarray(img)
    npimg = np.reshape(npimg, (channel, resize[0], resize[1]))
    return npimg

def build_body_measurement_df():
    files = os.listdir(dataSource.getBodyMeasurementPath())
    data, names = [], []
    for fn in files:
        if fn.endswith(".mat"):
            result, names = load_one_mat_file(os.path.join(dataSource.getBodyMeasurementPath(), fn), fn.split(".")[0])
            data.append(result)
    df = pd.DataFrame(data, columns=names)
    return df

In [None]:
body_measurement_df = build_body_measurement_df()
body_measurement_df = body_measurement_df.map(lambda x: np.nan if type(x) != str and x < 0 else x)
body_measurement_df = body_measurement_df.dropna()
body_measurement_df.to_csv("female_body.csv")
print(body_measurement_df.shape)
body_measurement_df.head()

In [None]:
# train, validate, test split
# train: 70%, validate: 20%, test: 10%

def split_train_validate_test(df, train=0.7, validate=0.2, test=0.1):
    train_df = df[:int(len(df)*train)]
    validate_df = df[int(len(df)*train):int(len(df)*(train+validate))]
    test_df = df[int(len(df)*(train+validate)):]
    return train_df, validate_df, test_df

# build h5py
def build_h5py(df, selected_names, h5py_fn):
    selected_df = df[selected_names + ["name"]]
    front_data = []
    side_data = []
    labels = []
    label_names = selected_names

    for i in range(len(selected_df)):
        name = selected_df.iloc[i]["name"]
        front_fn = os.path.join(dataSource.getFrontPath(), name + ".png")
        side_fn = os.path.join(dataSource.getSidePath(), name + ".png")
        if not os.path.exists(front_fn) or not os.path.exists(side_fn):
            continue
        front = load_one_pic(front_fn)
        side = load_one_pic(side_fn)
        label = selected_df.iloc[i][selected_names].values
        front_data.append(front)
        side_data.append(side)
        labels.append(np.array(label, dtype=np.float32))
    print(f"selected data length: {len(labels)}")
    f = h5py.File(h5py_fn, "w")
    f['data_front'] = front_data
    f['data_side'] = side_data
    f['labels'] = labels
    f['label_names'] = label_names
    f.close()

In [None]:
split_train_validate_test(body_measurement_df)
selected_names = list(selected.values())
train_df, val_df, test_df = split_train_validate_test(body_measurement_df)
build_h5py(train_df, selected_names, dataSource.getTrainH5Path())
build_h5py(val_df, selected_names, dataSource.getValidateH5Path())
build_h5py(test_df, selected_names, dataSource.getTestH5Path())
build_h5py(body_measurement_df, selected_names, dataSource.getMeasurementH5Path())

In [None]:
f = h5py.File(dataSource.getMeasurementH5Path(), "r")
fnt = f['data_front']
print(fnt.shape)
print(fnt.dtype)
lbs = f['labels']
print(lbs.shape)
print(np.max(lbs, axis=0))
print(np.min(lbs, axis=0))
f.close()