In [None]:
import xml.etree.ElementTree as ET
import numpy as np
import pandas as pd
import glob
import os
from collections import defaultdict
import itertools
import cv2
import matplotlib.pyplot as plt
import json
import random

def merge_dicts(a,b):
    de = defaultdict(list, a)
    for i, j in b.items():
        de[i].extend(j)
    return de

def polygon_xml(ob):
    tlist = []
    ylist = []
    xlist = []
    for itrial in ob.iter('polygon'):# go thru polygons
        for ifield in itrial: 
            if ifield.tag == 't':
                t = int(ifield.text)
            if ifield.tag == 'pt':
                for ipt in ifield:
                    if ipt.tag == 'x':
                        xlist.append(int(ipt.text)) 
                    elif ipt.tag == 'y':
                        ylist.append(int(ipt.text))
                        tlist.append(t)
    return tlist, xlist, ylist

def video_xml(fpath,user):
    tree = ET.parse(fpath)
    root = tree.getroot()
    zpartlist = []; ztlist = []; zxlist = []; zylist = [];
    for ob in root.iter('object'): # goes through body parts
        for iname in ob.iter('name'):
            dum=1
        tlist,xlist,ylist = polygon_xml(ob)
        partlist = [iname.text]*len(tlist)
        zpartlist.extend(partlist)
        ztlist.extend(tlist)
        zxlist.extend(xlist)
        zylist.extend(ylist)
    flist = [int(os.path.basename(fpath)[1:-4])]*len(zylist)
    userlist = [user]*len(zylist)
#     return flist,zpartlist,zxlist,zylist,ztlist
    return {'user':userlist,'video':flist,'part': zpartlist, 'x': zxlist, 'y': zylist, 't': ztlist}

def box_coco(df):
    df = df.reset_index()
    boxlist = np.asarray([np.min(df.x), np.min(df.y), np.max(df.x)-np.min(df.x), np.max(df.y)-np.min(df.y)]) # l,b,w,h
    xy = np.asarray([val for pair in zip(df.x, df.y) for val in pair])
    df_out = df.loc[0,:]
    df_out['box'] = boxlist
    df_out['box_seg'] = xy
    df_out = df_out[['part','x', 'y', 'box','box_seg']]
    return df_out

class MyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super(MyEncoder, self).default(obj)


def build_list(row_idx,xs):
    nparts = 17
    x_list =[0]*nparts
    for i, irow in enumerate(row_idx):
        x_list[int(irow)] = xs[i]
    return x_list

def get_anno(df):
    nparts = 17
    print('%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%')
    print(np.unique(df.video))
    print(np.unique(df.id))
    print(df.loc[df.partlabel=='bbox',:].reset_index())
    box_coord = list(map(int,df.loc[df.partlabel=='bbox',:].reset_index().box[0]))
    dfparts = df.loc[df.partlabel!='bbox',:].sort_values('partnumber').reset_index()
    row_idx = dfparts.partnumber.dropna().tolist()
    xs = dfparts.x[~dfparts.partnumber.isnull()].tolist(); ys = dfparts.y[~dfparts.partnumber.isnull()].tolist()
    vs = [2]*len(xs)
    x_list = list(map(int,build_list(row_idx,xs))); y_list = list(map(int,build_list(row_idx,ys))); v_list = list(map(int,build_list(row_idx,vs)))
    partvec = list(itertools.chain(*zip(x_list, y_list, v_list)))
    num_keypoints = len(row_idx)
    
    box = df[df.partlabel=='bbox'].reset_index()
    area = int(np.round(np.prod(box.box[0][-2:])))
    seg = list(box.box_seg[0].astype(int))
    id = box.id[0]
    image_id = box.imageid[0]
    return {"bbox": box_coord, "keypoints": partvec, "num_keypoints": num_keypoints, "segmentation": [seg], "area": area,\
           "category_id": 1,"iscrowd": 0, "id":id, "image_id":image_id}

def rescale_values(df, new_max):
    df['max'] = df[["ncols", "nrows"]].max(axis=1)
    cols = ['x','y', 'ncols', 'nrows','box', 'box_seg'] 
    df[cols] = df[cols].div(df['max'],axis=0)*new_max
    for icol in cols:
        if (icol == 'ncols')|(icol == 'nrows'):
            df[icol] = df[icol].astype(int)
        else:
            df[icol] = df[icol].apply(np.around)
    return df

def get_image(df):
    df = df.reset_index().loc[0,:]
    return {'file_name':str(df.imageid)+'.jpg', 'id': df.imageid, 'height':int(df.nrows), 'width':int(df.ncols)}

def remove_duplicates(df, df_frame, df_end):
    print(df.video.unique()[0])
    print(df_frame[df_frame.video==df.video.unique()[0]].frame)
    print(int(df_frame[df_frame.video==df.video.unique()[0]].frame))
    frame = int(df_frame[df_frame.video==df.video.unique()[0]].frame)
    if len(df_end[df_end.video==df.video.unique()[0]])>0:
        print(df_end)
        frame_end = int(df_end[df_end.video==df.video.unique()[0]].frame)
        df_out = df[(df.t>=frame)&(df.t<=frame_end)]
    else:
        df_out = df[df.t>=frame]
    return df_out

def frame_reduce(df,step):
    return df.reset_index().loc[df.reset_index().t % step == 0,:].drop(['video','partlabel','index', 'person'],axis=1)



In [None]:
# notes: 

# only deals with 1 subject per frame - change get_anno

step = 4 # how many frames? 1 in 'step' frames

new_max = 640 # images are resized relative to some max npixels. Images in COCO are max 640 pixels

npartitions = 2 # how many partitions for train/test division? typical would be 10

json_name = '_infant_pose.json' # output file name

parts = ['ose', 'l_eye', 'r_eye', 'l_ear', 'r_ear', 'l_shoulder', 'r_shoulder', 'l_elbow', \
 'r_elbow', 'l_wrist', 'r_wrist', 'l_hip', 'r_hip', 'l_kee', 'r_kee', 'l_akle', 'r_akle']

categories = [{'id': 1,'keypoints': ['nose','left_eye','right_eye','left_ear','right_ear','left_shoulder',
        'right_shoulder','left_elbow','right_elbow','left_wrist','right_wrist','left_hip','right_hip','left_knee',\
        'right_knee','left_ankle','right_ankle'],'name': 'person',\
        'skeleton': [[16, 14],[14, 12],[17, 15],[15, 13],[12, 13],[6, 12],[7, 13],[6, 7],[6, 8],[7, 9],[8, 10],[9, 11],\
        [2, 3],[1, 2],[1, 3],[2, 4],[3, 5],[4, 6],[5, 7]],'supercategory': 'person'}]

image_path = './files/image/*'
xmlpath = './files/xml/*'
jsonpath = './files/labels_NN_input'
outputimagepath = './files/image_NN_input'

df = pd.DataFrame()
# find image size for each folder
vidlist = []; rowlist = []; collist = []
img_folder = sorted(glob.glob(image_path))
for fpath in img_folder:
    imgfile = sorted(glob.glob(os.path.join(fpath,'*')))[0]
    vidlabel = int(os.path.basename(fpath)[1:])
    img = cv2.imread(imgfile,0)
    nrows = len(img)
    ncols = len(img[0])
    vidlist.append(vidlabel); rowlist.append(nrows); collist.append(ncols);    
df_vid = pd.DataFrame(dict(video = vidlist, nrows = rowlist, ncols = collist))

# loop over user folders, get annotations
xml_folder = sorted(glob.glob(xmlpath))
zdict = {'user':[],'video':[],'part': [], 'x': [], 'y': [], 't': []}
for fpaths in xml_folder:
    user = os.path.basename(fpaths)
    fpaths = sorted(glob.glob(os.path.join(fpaths,'*.xml')))
    for fpath in fpaths:
        dict_file = video_xml(fpath, user)
        zdict = merge_dicts(zdict,dict_file)

df = pd.DataFrame(zdict, columns=['user','video', 'part', 'x', 'y', 't'])
df['box'] = np.nan; df['box_seg'] = np.nan; df['partnumber'] = np.nan
df['person'] = df['part'].str[-1:].astype(int)

# leave out videos with 2 infants
# find all videos where person=1 is there...remove
df['person_type'] = df['part'].str[-3:-2]
# remove adult labels, take infants only
df_1 = df[df.person_type=='b']

# for now, remove videos where there is a person label =1, change get anno to handle many people in frame
videos_w_2_infants = df_1[df_1.person==1].video.unique()
df = df_1[~np.isin(df_1.video, videos_w_2_infants)]

# leave out suffix from string: b_0 from label string
df['partlabel'] = df['part'].str[:-4]
dfbox = df[df.partlabel=='bbox']
dfparts = df[df.partlabel!='bbox']

dfparts = dfparts.groupby(['user', 'video','person','partlabel','t']).mean().reset_index()
dfbox = dfbox.groupby(['user', 'video','person','partlabel','t']).apply(box_coco).reset_index()
df1 = dfbox.append(dfparts)
df1 = df1[['user','video','person','part','partlabel','partnumber','t','x','y','box', 'box_seg']]
for ii,ipart in enumerate(parts):
    df1.loc[df1.partlabel==ipart, 'partnumber'] =int(ii)
df2 = pd.merge(df1,df_vid, on='video', how='inner')

# rescale all values
df2 = rescale_values(df2, new_max)

# add imageid and id
imageid_list = []; id_list = [];
for i in range(len(df2)):
    imageid_list.append(int('1%06d%06d' % (df2.loc[i,'video'], df2.loc[i,'t'])))
    id_list.append(int('1%06d%06d%02d' % (df2.loc[i,'video'], df2.loc[i,'t'], df2.loc[i,'person'])))
df2['imageid'] = imageid_list
df2['id'] = id_list

# divide into train/test by video
ntest = np.ceil(len(df2.video.unique())/npartitions)
ntrain = len(df2.video.unique())-ntest
train_test_idx = np.concatenate((np.zeros(int(ntrain)), np.ones(int(ntest)))).astype(int)
random.shuffle(train_test_idx) 
df_vid['train_test'] = train_test_idx

df3 = df2.groupby(['video','partlabel','person']).apply(lambda x:frame_reduce(x,step)).reset_index()
df3 = pd.merge(df3,df_vid[['video', 'train_test']], how='left', on='video')

# remove random mislabels here. Check for these when new data is added
# df3.loc[(df3.partlabel=='b_elbow'), 'partlabel'] = 'r_elbow'

if 1-os.path.isdir(jsonpath):
    os.mkdir(jsonpath)

# save .json annotation file: train and test files
sets = ['train','val']
for i,iset in enumerate(sets):
    zdf = df3[df3.train_test==i]
    image_info = zdf.groupby('imageid').apply(get_image)
    anno = zdf.groupby('id').apply(get_anno)
    json_dict = {"annotations":anno.tolist(), "categories":categories, "images":image_info.tolist()}
    with open(os.path.join(jsonpath,'person_keypoints_'+iset+json_name), 'w') as f:
        json.dump(json_dict, f, cls=MyEncoder)

In [None]:
# resize images and save to train/test folder

outputimagepath = './files/image_NN_input'

out_folder = ['train_infant','val_infant']

new_max = 640

if 1-os.path.isdir(outputimagepath):
    os.mkdir(outputimagepath)
image_folders = os.listdir(image_path[:-1])

for ifolder in out_folder:
    new_image_path = os.path.join(outputimagepath,ifolder)
    if 1-os.path.isdir(new_image_path):
        os.mkdir(new_image_path)

image_folders_1 = [os.path.join(image_path[:-1],'_%06d' % (int(i[1:]))) for i in sorted(image_folders)]


for i, ifolder in enumerate(image_folders_1):
    vidnumber = int(os.path.basename(ifolder)[1:])
    ts = df3[df3.video==vidnumber].t.unique()
    print(ifolder)
    print(vidnumber)
    print(ts)
    x = [ifolder+'/'+str(it)+'.jpg' for it in ts]
    train_test = df_vid[df_vid.video == vidnumber].train_test
    df_image = pd.DataFrame()
    df_image['images'] = pd.Series(x)
    df_image['image_number'] = pd.Series(ts)
    image_files = df_image.sort_values('image_number').images.tolist()
    
    for j, iimage in enumerate(image_files):
        I = cv2.imread(iimage)
        rows = len(I[0]); cols = len(I); 
        max_image = np.max([rows,cols])
        new_image_name = '1%06d%06d.jpg' % (int(os.path.basename(ifolder)[1:]), int(os.path.basename(iimage)[:-4]))
        resized_I = cv2.resize(I, (int(rows/max_image*new_max), int(cols/max_image*new_max)))
        new_image_path = os.path.join(outputimagepath,out_folder[int(train_test)])
        cv2.imwrite(os.path.join(new_image_path, new_image_name),resized_I)