In [1]:
import os
import random

import numpy as np
import pandas as pd
from easydict import EasyDict
from scipy.io import loadmat

np.random.seed(0)
random.seed(0)

In [2]:
pa100k_dir = './data/PA100k/'
peta_dir = './data/PETA/'


In [3]:
"""
create a PA100K peta_dataset description file, which consists of images, labels
"""
pa100k_data = loadmat(os.path.join(pa100k_dir, 'annotation.mat'))

pa100k_dataset = EasyDict()
pa100k_dataset.description = 'pa100k'
# pa100k_dataset.root = os.path.join(pa100k_dir, 'data')

train_image_name = [pa100k_data['train_images_name'][i][0][0] for i in range(80000)]
val_image_name = [pa100k_data['val_images_name'][i][0][0] for i in range(10000)]
test_image_name = [pa100k_data['test_images_name'][i][0][0] for i in range(10000)]
pa100k_dataset.image_name = train_image_name + val_image_name + test_image_name

pa100k_dataset.label = np.concatenate(
    (pa100k_data['train_label'], pa100k_data['val_label'], pa100k_data['test_label']), axis=0)
assert pa100k_dataset.label.shape == (100000, 26)
pa100k_dataset.attr_name = [pa100k_data['attributes'][i][0][0] for i in range(26)]

pa100k_df = pd.DataFrame(data=pa100k_dataset.label,
                         index=[i for i in range(pa100k_dataset.label.shape[0])],
                         columns=pa100k_dataset.attr_name)

pa100k_df['image_id'] = pa100k_dataset.image_name
print(pa100k_df.head())

   Female  AgeOver60  Age18-60  AgeLess18  Front  Side  Back  Hat  Glasses  \
0       1          0         1          0      1     0     0    0        0   
1       0          0         1          0      0     0     1    0        0   
2       1          0         1          0      0     1     0    0        0   
3       0          0         1          0      0     0     1    0        0   
4       1          0         1          0      0     1     0    0        0   

   HandBag  ...  UpperPlaid  UpperSplice  LowerStripe  LowerPattern  LongCoat  \
0        0  ...           0            0            0             0         0   
1        0  ...           0            0            0             0         0   
2        0  ...           0            0            0             0         0   
3        0  ...           0            0            0             0         0   
4        1  ...           0            0            0             0         0   

   Trousers  Shorts  Skirt&Dress  boots    i

In [8]:
annotated_pa100k_df = pd.read_csv(os.path.join(pa100k_dir, 'program_annotated_zero_one_pa100k.csv'))

annotated_pa100k_df = annotated_pa100k_df.drop(annotated_pa100k_df.columns[0], axis=1)

annotated_pa100k_df['accessoryHat'] = pa100k_df['Hat']
annotated_pa100k_df['accessorySunglasses'] = pa100k_df['Glasses']
annotated_pa100k_df['carryingBackpack'] = pa100k_df['Backpack']
annotated_pa100k_df['carryingFolder'] = pa100k_df['HandBag']
annotated_pa100k_df['carryingMessengerBag'] = pa100k_df['ShoulderBag']
annotated_pa100k_df['carryingOther'] = pa100k_df['HoldObjectsInFront']
annotated_pa100k_df['footwearBoots'] = pa100k_df['boots']
annotated_pa100k_df['lowerBodyShortSkirt'] = pa100k_df['Skirt&Dress']
annotated_pa100k_df['lowerBodyShorts'] = pa100k_df['Shorts']
annotated_pa100k_df['lowerBodyTrousers'] = pa100k_df['Trousers']
annotated_pa100k_df['personalLarger60'] = pa100k_df['AgeOver60']
annotated_pa100k_df['personalLess15'] = pa100k_df['AgeLess18']
annotated_pa100k_df['personalLess60'] = pa100k_df['Age18-60']
annotated_pa100k_df['upperBodyCasual'] = pa100k_df['UpperPlaid']
annotated_pa100k_df['upperBodyFormal'] = pa100k_df['LongCoat']
annotated_pa100k_df['upperBodyLogo'] = pa100k_df['UpperLogo']
annotated_pa100k_df['upperBodyLongSleeve'] = pa100k_df['LongSleeve']
annotated_pa100k_df['upperBodyOther'] = pa100k_df['UpperStride'] | pa100k_df['UpperSplice']
annotated_pa100k_df['upperBodyShortSleeve'] = pa100k_df['ShortSleeve']

print(annotated_pa100k_df.head())

   personalLess30  personalLess45  personalLess60  personalLarger60  \
0               0               0               1                 0   
1               0               1               1                 0   
2               0               1               1                 0   
3               0               1               1                 0   
4               1               0               1                 0   

   carryingBackpack  carryingOther  lowerBodyCasual  upperBodyCasual  \
0                 0              1                1                0   
1                 0              0                1                0   
2                 0              0                1                0   
3                 0              0                1                0   
4                 0              0                1                0   

   lowerBodyFormal  upperBodyFormal  ...  carryingBrown  carryingGreen  \
0                0                0  ...              0           

In [10]:
annotated_pa100k_df.shape

(100000, 117)

In [5]:
"""
create a PETA peta_dataset description file, which consists of images, labels
"""
peta_data = loadmat(os.path.join(peta_dir, 'PETA_added.mat'))

peta_dataset = EasyDict()
peta_dataset.description = 'peta'
peta_dataset.reorder = 'group_order'
peta_dataset.root = os.path.join(peta_dir, 'images')
peta_dataset.image_name = [f'{100000 + i + 1:06}.png' for i in range(19000)]

raw_attr_name = [i[0][0].strip() for i in peta_data['peta'][0][0][1]]
# (19000, 105)
raw_label = peta_data['peta'][0][0][0][:, 4:]


# (1900, 116)
peta_dataset.label = raw_label
peta_dataset.attr_name = raw_attr_name



In [7]:
peta_df = pd.DataFrame(data=peta_dataset.label,
                         index=[i for i in range(peta_dataset.label.shape[0])],
                         columns=peta_dataset.attr_name)

peta_df['image_id'] = peta_dataset.image_name
print(peta_df.head())

   personalLess30  personalLess45  personalLess60  personalLarger60  \
0               1               0               0                 0   
1               1               0               0                 0   
2               1               0               0                 0   
3               1               0               0                 0   
4               1               0               0                 0   

   carryingBackpack  carryingOther  lowerBodyCasual  upperBodyCasual  \
0                 0              0                1                0   
1                 0              0                1                0   
2                 0              0                1                0   
3                 0              0                1                1   
4                 0              0                1                1   

   lowerBodyFormal  upperBodyFormal  ...  carryingBrown  carryingGreen  \
0                0                0  ...              0           

In [9]:
peta_df.shape

(19000, 117)

In [18]:
assert (annotated_pa100k_df.columns == peta_df.columns).all()

## Rename Peta image files to start from 100K onwards

In [19]:
peta_dataset.root

'./data/PETA/images'

In [22]:
i= 0

for count, filename in enumerate(os.listdir(peta_dataset.root)): 
    if i < 10:
        new_name = f'{100000 + i + 1:06}.png'

        src = os.path.join(peta_dataset.root, filename) 
        dst = os.path.join(peta_dataset.root, new_name) 

        print(src)
        print(dst)
        # rename() function will 
        # rename all the files 
    #     os.rename(src, dst) 
        i = i + 1

./data/PETA/images\00001.png
./data/PETA/images\100001.png
./data/PETA/images\00002.png
./data/PETA/images\100002.png
./data/PETA/images\00003.png
./data/PETA/images\100003.png
./data/PETA/images\00004.png
./data/PETA/images\100004.png
./data/PETA/images\00005.png
./data/PETA/images\100005.png
./data/PETA/images\00006.png
./data/PETA/images\100006.png
./data/PETA/images\00007.png
./data/PETA/images\100007.png
./data/PETA/images\00008.png
./data/PETA/images\100008.png
./data/PETA/images\00009.png
./data/PETA/images\100009.png
./data/PETA/images\00010.png
./data/PETA/images\100010.png
