In [None]:
import pandas as pd
import scipy.io
import numpy as np
import h5py
import json

file_name = "/scratch/datasets/SUNRGBDtoolbox/Metadata/SUNRGBD2Dseg.mat"

In [None]:
dtypes = {}
def string(seq):
    """Convert a sequence of integers into a single string.
    """
    return ''.join([chr(a) for a in seq])

def add_dtype_name(f, name):
    """Keep track of all dtypes and names in the HDF5 file using it.
    """
    global dtypes
    dtype = f.dtype            
    if str(dtypes) in dtype.name:
        dtypes[dtype.name].add(name)
    else:
        dtypes[dtype.name] = set([name])
    return

In [None]:
def recursive_dict(f, root=None, name='root'):
    if root is None: root = f
    if hasattr(f, 'keys'):
        a = dict(f)
        if u'#refs#' in a.keys(): # we don't want to keep this
            del(a[u'#refs#'])
        for k in a.keys():
            # print k
            a[k] = recursive_dict(f[k], root, name = name + '->' + k)
        return a
    elif hasattr(f, 'shape'):
        if f.dtype.name not in ['object', 'uint16']: # this is a numpy array
            # Check shape to assess whether it can fit in memory
            # or not. If not recast to a smaller dtype!
            add_dtype_name(f, name)
            dtype = f.dtype
            if (np.prod(f.shape)*f.dtype.itemsize) > 2e9:
                print("WARNING: The array" + name + "requires > 2Gb")
                if f.dtype.char=='d':
                    print("\t Recasting" + dtype + "to float32")
                    dtype = np.float32
                else:
                    raise MemoryError
            return np.array(f, dtype=dtype).squeeze()
        elif f.dtype.name in ['uint16']: # this may be a string for Matlab
            add_dtype_name(f, name)
            try:
                return string(f)
            except ValueError: # it wasn't...
                print("WARNING:"+ name, ":")
                print("\t" + f)
                print("\t CONVERSION TO STRING FAILED, USING ARRAY!")
                tmp = np.array(f).squeeze()
                print("\t" + tmp)
                return tmp
            pass
        elif f.dtype.name=='object': # this is a 2D array of HDF5 object references or just objects
            add_dtype_name(f, name)
            container = []
            for i in range(f.shape[0]):
                for j in range(f.shape[1]):
                    if str(f[i][j])=='<HDF5 object reference>': # reference follow it:
                        container.append(recursive_dict(root[f[i][j]], root, name=name))
                    else:
                        container.append(np.array(f[i][j]).squeeze())
            try:
                return np.array(container).squeeze()
            except ValueError:
                print("WARNING:" + name + ":")
                print("\t" + container)
                print("\t CANNOT CONVERT INTO NON-OBJECT ARRAY")
                return np.array(container, dtype=np.object).squeeze()
        else:
            raise NotImplemented
    else:
        raise NotImplemented
    return

In [None]:
f = h5py.File(file_name, mode='r')
data = recursive_dict(f)

In [None]:
data.keys()

In [None]:
a = data['SUNRGBD2Dseg']['seglabel']
b = data['SUNRGBD2Dseg']['seglabelall']

del(data)

import gc
gc.collect()

In [None]:
VALUES = {}

for i in range(0, 10334): #10334
    pic = a[i]
    label = b[i]
    for r in range(len(pic)):
        for j in range(0, len(pic[r])):
            print(label[r][j])
            print(pic[r][j])
            VALUES[label[r][j]] = pic[r][j] 
    print('Ended with ' + str(i))

In [None]:
with open('labels_matching_full.json', 'w') as fp:
    json.dump(VALUES, fp)

In [None]:
# Ideally after this point you need to restart the kernel and clean the memory

In [None]:
import json
with open('labels_matching_full.json') as handle:
    mapping = json.loads(handle.read())
    
labels_full_names = ['wall', 'floor', 'cabinet', 'bed', 'chair', 'sofe', 'table', 'door', 'window', 'bookshelf', 
        'picture', 'counter', 'blinds', 'desk', 'shelves', 'curtain', 'dresser', 'pillow', 'mirrow', 'floor_mat',
       'clothes', 'ceiling', 'books', 'fridge', 'tv', 'paper', 'towel', 'shower_curtain', 'box', 'whiteboard', 
        'person', 'night_stand', 'toilet', 'sink', 'lamp', 'bathtub', 'bag']
Labels_37 = list(range(1,38))
import pandas as pd
labels_37 = pd.DataFrame({
     'Label_37': Labels_37,
     'Name_37': labels_full_names})

In [None]:
unique = []
parsing = []
for key in mapping.items():
    if key[1]!= 0.:
        unique.append(int(float(key[1])))
        parsing.append(int(float(key[0])))

In [None]:
Name_6585 = []
for i in range(0, len(parsing)):
    Name_6585.append(labels_full_names[parsing[i]])

In [None]:
labels_full = pd.DataFrame({
     'Label_6585': parsing,
     'Label_37': unique,
     'Name_6585': Name_6585})

In [None]:
final_dataset = pd.merge(labels_full, labels_37)

In [None]:
final_dataset

In [None]:
final_dataset.to_csv('name_mapping_from_toolbox', sep=',')