In [1]:
import numpy as np
import pandas as pd
from scipy.io import loadmat


class MPIIData(object):
    def __init__(self, base_dir):
        self.base_dir = base_dir
        assert self.base_dir[-1] == '/'

    def load(self, file):
        file = self.base_dir + file
        x = loadmat(file, squeeze_me = False)
        return x


In [2]:
if __name__ == '__main__':
    x = MPIIData(base_dir = '/home/hrishi/1Hrishi/0Thesis/Data/').load(file = 'mpii_human_pose_v1_u12_1.mat')


In [3]:
# Print the keys
for k in x.keys():
  print(k)

__globals__
RELEASE
__version__
__header__


## Annotation description
Annotations are stored in a matlab structure RELEASE having following fields

* ```.annolist(imgidx)``` - annotations for image imgidx

    * ```.image.name``` - image filename
    * ```.annorect(ridx)``` - body annotations for a person ridx
        * ```.x1, .y1, .x2, .y2``` - coordinates of the head rectangle
        * ```.scale``` - person scale w.r.t. 200 px height
        * ```.objpos``` - rough human position in the image
        * ```.annopoints.point``` - person-centric body joint annotations
            * ```.x, .y``` - coordinates of a joint
            * ```id``` - joint id (0 - r ankle, 1 - r knee, 2 - r hip, 3 - l hip, 4 - l knee, 5 - l ankle, 6 - pelvis, 7 - thorax, 8 - upper neck, 9 - head top, 10 - r wrist, 11 - r elbow, 12 - r shoulder, 13 - l shoulder, 14 - l elbow, 15 - l wrist)
            * ```is_visible``` - joint visibility
    * ```.vidx``` - video index in video_list
    * ```.frame_sec``` - image position in video, in seconds
* ```img_train(imgidx)``` - training/testing image assignment
* ```single_person(imgidx)``` - contains rectangle id ridx of sufficiently separated individuals
* ```act(imgidx)``` - activity/category label for image imgidx
    * ```act_name``` - activity name
    * ```cat_name``` - category name
    * ```act_id``` - activity id
* ```video_list(videoidx)``` - specifies video id as is provided by YouTube. To watch video on youtube go to https://www.youtube.com/watch?v=video_list(videoidx)


In [4]:
x['RELEASE'].dtype.names

('annolist', 'img_train', 'version', 'single_person', 'act', 'video_list')

In [53]:
print("Train/test split statistics:")
total_samples = x['RELEASE']['img_train'][0,0][0].shape[0]
print("Total samples: ", total_samples)
num_training = sum(x['RELEASE']['img_train'][0,0][0])
print("Training set size: ", num_training)
print("Testing set size: ", total_samples - num_training)
print("Train percentage: ", num_training*100/total_samples)

Train/test split statistics:
Total samples:  24987
Training set size:  18079
Testing set size:  6908
Train percentage:  72.3536238844199


In [93]:
print("Action Catogory Statistics:")
categories = np.unique(np.unique(x['RELEASE']['act'][0,0]['cat_name']))
print("Total categories: ", categories.shape[0])
# from matplotlib import pyplot as plt
# data = np.unique(x['RELEASE']['act'][0,0]['cat_name'])
# plt.hist(data, bins=categories, alpha=0.5)

Action Catogory Statistics:
Total categories:  19


In [107]:
np.unique(np.unique(x['RELEASE']['act'][0,0]['cat_name']))[1].shape

(1,)

In [None]:
for category in 

In [34]:
x['RELEASE']['single_person'][0,0][3]

array([array([[1]], dtype=uint8)], dtype=object)

In [5]:
x['RELEASE']['annolist'].shape

(1, 1)

In [6]:
x['RELEASE']['annolist'][0,0].shape

(1, 24987)

In [19]:
x['RELEASE']['annolist'][0,0]

array([[(array([[(array(['037454012.jpg'], dtype='<U13'),)]], dtype=[('name', 'O')]), array([[(array([[3.88073395]]), array([[(array([[601]], dtype=uint16), array([[380]], dtype=uint16))]],
      dtype=[('x', 'O'), ('y', 'O')]))]],
      dtype=[('scale', 'O'), ('objpos', 'O')]), array([], shape=(1, 0), dtype=float64), array([], shape=(1, 0), dtype=float64)),
        (array([[(array(['095071431.jpg'], dtype='<U13'),)]], dtype=[('name', 'O')]), array([[(array([[8.07816613]]), array([[(array([[881]], dtype=uint16), array([[394]], dtype=uint16))]],
      dtype=[('x', 'O'), ('y', 'O')])),
        (array([[8.90412938]]), array([[(array([[338]], dtype=uint16), array([[210]], dtype=uint8))]],
      dtype=[('x', 'O'), ('y', 'O')]))]],
      dtype=[('scale', 'O'), ('objpos', 'O')]), array([], shape=(1, 0), dtype=float64), array([], shape=(1, 0), dtype=float64)),
        (array([[(array(['073199394.jpg'], dtype='<U13'),)]], dtype=[('name', 'O')]), array([[(array([[4.32666153]]), array([[(array([[

In [14]:
x['RELEASE']['annolist'][0,0].dtype.names

('image', 'annorect', 'frame_sec', 'vididx')

In [10]:
x['RELEASE']['annolist'][0,0].shape

(1, 24987)

In [18]:
x['RELEASE']['annolist'][0,0].dtype

dtype([('image', 'O'), ('annorect', 'O'), ('frame_sec', 'O'), ('vididx', 'O')])

In [20]:
x['RELEASE']['annolist'][0,0]['annorect']

array([[array([[(array([[3.88073395]]), array([[(array([[601]], dtype=uint16), array([[380]], dtype=uint16))]],
      dtype=[('x', 'O'), ('y', 'O')]))]],
      dtype=[('scale', 'O'), ('objpos', 'O')]),
        array([[(array([[8.07816613]]), array([[(array([[881]], dtype=uint16), array([[394]], dtype=uint16))]],
      dtype=[('x', 'O'), ('y', 'O')])),
        (array([[8.90412938]]), array([[(array([[338]], dtype=uint16), array([[210]], dtype=uint8))]],
      dtype=[('x', 'O'), ('y', 'O')]))]],
      dtype=[('scale', 'O'), ('objpos', 'O')]),
        array([[(array([[4.32666153]]), array([[(array([[619]], dtype=uint16), array([[350]], dtype=uint16))]],
      dtype=[('x', 'O'), ('y', 'O')]))]],
      dtype=[('scale', 'O'), ('objpos', 'O')]),
        ...,
        array([[(array([[287]], dtype=uint16), array([[130]], dtype=uint8), array([[324]], dtype=uint16), array([[163]], dtype=uint8), array([[(array([[(array([[334]], dtype=uint16), array([[225]], dtype=uint8), array([[6]], dtype=uint8),

In [26]:
x['RELEASE']['annolist'][0,0]['annorect'][0].dtype

dtype('O')