In [127]:
import pandas as pd
import numpy as np
import os
from PIL import Image
from tqdm import tqdm
import json
import matplotlib.pyplot as plt
import random


In [118]:
def keypoint_process(keypoint) :
    keypoint = keypoint.replace('[ ', '[')
    keypoint = keypoint.replace('  ', ' ')
    keypoint = keypoint.replace(' ', ', ')
    return keypoint
def ochuman_to_array(df) :
    data_list = []
    for index, (name, keypoints_y, keypoints_x, label) in df.iterrows() :
        keypoints_y = json.loads(keypoint_process(keypoints_y))
        keypoints_x = json.loads(keypoint_process(keypoints_x))
        label = json.loads(keypoint_process(label))
        data_list.append([[h, w, l] for h, w, l in zip(keypoints_y, keypoints_x, label)])
    return np.array(data_list)

def fashion_to_array(df) :
    data_list = []
    for index, (name, keypoints_y, keypoints_x) in df.iterrows() :
        keypoints_y = json.loads(keypoints_y)
        keypoints_x = json.loads(keypoints_x)
        if -1 in keypoints_y or -1 in keypoints_x : continue # 18개짜리 keypoint만
        data_list.append([[h, w, 0] for h, w in zip(keypoints_y, keypoints_x)])
    return np.array(data_list)

In [119]:
df_ochuman = pd.read_csv('human/occlusion_df.csv', sep=":")
df_fasion_train = pd.read_csv('human/fasion-annotation-train.csv', sep=":")
df_fasion_test = pd.read_csv('human/fasion-annotation-test.csv', sep=":")

In [120]:
ochuman_array = ochuman_to_array(df_ochuman)
fashion_train_array = fashion_to_array(df_fasion_train)
fashion_train_array[3566, 9, 1] = 0
fashion_test_array = fashion_to_array(df_fasion_test)

In [121]:
print('OCHuman shape:', ochuman_array.shape)
print('Fashion train shape:', fashion_train_array.shape)
print('Fashion test shape:', fashion_test_array.shape)

OCHuman shape: (8110, 18, 3)
Fashion train shape: (7979, 18, 3)
Fashion test shape: (627, 18, 3)


In [122]:
dataset = np.concatenate([ochuman_array, fashion_train_array, fashion_test_array], axis = 0)

In [123]:
dataset[:, :, 2].mean(0)

array([0.        , 0.08817899, 0.05587461, 0.04881551, 0.06095956,
       0.06155779, 0.05784877, 0.06999282, 0.06448911, 0.02901412,
       0.02327112, 0.07005264, 0.03062934, 0.02285236, 0.        ,
       0.        , 0.        , 0.        ])

In [126]:
ochuman_array[:, :, 2].mean(0)

array([0.        , 0.18175092, 0.11516646, 0.10061652, 0.12564735,
       0.12688039, 0.11923551, 0.14426634, 0.13292232, 0.05980271,
       0.04796547, 0.14438964, 0.06313194, 0.04710234, 0.        ,
       0.        , 0.        , 0.        ])

# Split

In [137]:
def get_splitindex(length) :
    index_list = [i for i in range(length)]
    random.shuffle(index_list)
    first_cut = int(length * 0.7)
    second_cut = int(length * 0.85)
    
    return index_list[:first_cut], index_list[first_cut : second_cut], index_list[second_cut:]

In [143]:
def print_ratio(array) :
    ratio = array[:, :, 2].mean(0)
    print(ratio)

In [147]:
dataset.shape

(16716, 18, 3)

In [138]:
length = dataset.shape[0]
train_index, test_index, valid_index = get_splitindex(length)

In [151]:
train = dataset[train_index]
test = dataset[test_index]
valid = dataset[valid_index]

print('train ratio:', print_ratio(train))
print('test ratio:', print_ratio(test))
print('valid ratio:', print_ratio(valid))

[0.         0.08606102 0.05503803 0.04785916 0.06033672 0.0615332
 0.05666182 0.06931032 0.06409709 0.02803179 0.02230579 0.07059226
 0.03051021 0.02298949 0.         0.         0.         0.        ]
train ratio: None
[0.         0.09254089 0.05384922 0.04866374 0.06461907 0.05823694
 0.05704029 0.0733945  0.06222577 0.02512964 0.02114081 0.06820901
 0.02752294 0.01994416 0.         0.         0.         0.        ]
test ratio: None
[0.         0.09370016 0.06180223 0.05342903 0.06020734 0.06499203
 0.06419458 0.06977671 0.06858054 0.03748006 0.02990431 0.06937799
 0.03429027 0.02511962 0.         0.         0.         0.        ]
valid ratio: None


In [152]:
def array_to_df(array) :
    y = array[:, :, 0].tolist()
    x = array[:, :, 1].tolist()
    label = array[:, :, 2].tolist()
    
    data_dict = {'keypoints_y' : y,
                'keypoints_x' : x,
                'label' : label}
    df = pd.DataFrame.from_dict(data_dict)
    return df
    

In [154]:
train_df = array_to_df(train)
test_df = array_to_df(test)
valid_df = array_to_df(valid)

In [158]:
train_df.to_csv('train_annotation.csv', index=False, sep=':')
test_df.to_csv('test_annotation.csv', index=False, sep=':')
valid_df.to_csv('valid_annotation.csv', index=False, sep=':')