In [1]:
import numpy as np
import pandas as pd
import scipy.io as spio
import os
from scipy.sparse import coo_matrix, save_npz, load_npz

In [2]:
def _check_keys(dict):
    '''
    checks if entries in dictionary are mat-objects. If yes
    todict is called to change them to nested dictionaries
    '''
    for key in dict:
        if isinstance(dict[key], spio.matlab.mio5_params.mat_struct):
            dict[key] = _todict(dict[key])
    return dict

In [3]:
def loadmat(filename):
    '''
    this function should be called instead of direct spio.loadmat
    as it cures the problem of not properly recovering python dictionaries
    from mat files. It calls the function check keys to cure all entries
    which are still mat-objects

    from: `StackOverflow <http://stackoverflow.com/questions/7008608/scipy-io-loadmat-nested-structures-i-e-dictionaries>`_
    '''
    data = spio.loadmat(filename, struct_as_record=False, squeeze_me=True)
    return _check_keys(data)

In [4]:
fname = 'D:\\Home\\Dimitris\\OneDrive - University College London\\dev\\Python\\cell_call\\demo_data\\CellMap.mat'
cellmap = loadmat(fname)
cellmap = cellmap["CellMap"]
cellmap.shape

(5471, 7602)

In [2]:
coo_image = load_npz('D://Home/Dimitris/OneDrive - University College London/dev/Python/pciSeq/data/mouse/ca1/segmentation/label_image.coo.npz')
coo_image.shape

(5471, 7602)

In [10]:
# [coo_image.data.min(), coo_image.data.max()]

[1, 3481]

In [11]:
# coo_image.data

array([  51,   51,   51, ..., 2429, 2429, 2429], dtype=uint32)

In [23]:
# coo_max = coo_image.data.max()
# _keys = 1 + np.arange(coo_max)
# _vals = _keys.copy()
# np.random.shuffle(_vals)

In [21]:
# np.random.shuffle(_keys)

In [24]:
# _keys

array([   1,    2,    3, ..., 3479, 3480, 3481], dtype=int64)

In [25]:
# _vals

array([1510,  129, 2127, ..., 1730, 2967, 1275], dtype=int64)

In [28]:
# d = dict(zip(_keys, _vals))

129

In [34]:
# new_data = np.array([d[x] for x in coo_image.data])

In [36]:
# coo_matrix((new_data, (coo_image.row, coo_image.col)), shape=coo_image.shape)
#  coo_matrix((data, (row, col)), shape=(4, 4))

<5471x7602 sparse matrix of type '<class 'numpy.int64'>'
	with 3721912 stored elements in COOrdinate format>

In [6]:
def remap_labels(coo):
    coo_max = coo.data.max()
    _keys = 1 + np.arange(coo_max)
    _vals = _keys.copy()
    np.random.shuffle(_vals)
    d = dict(zip(_keys, _vals))
    new_data = np.array([d[x] for x in coo.data])
    out = coo_matrix((new_data, (coo.row, coo.col)), shape=coo.shape)
    return out

In [9]:
remap_labels(coo_image).toarray()

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int64)

In [None]:
# spots_df = spots_df.sample(frac=1).reset_index(drop=True)

In [4]:
coo_image.data

array([  51,   51,   51, ..., 2429, 2429, 2429], dtype=uint32)

In [8]:
np.unique(coo_image.data, return_inverse=True)

(array([   1,    2,    3, ..., 3479, 3480, 3481], dtype=uint32),
 array([  50,   50,   50, ..., 2428, 2428, 2428], dtype=int64))

In [9]:
def order_labels(coo):
    coo_max = coo.data.max()
    _keys = 1 + np.arange(coo_max)
    _vals = _keys.copy()
#     np.random.shuffle(_vals)
    d = dict(zip(_keys, _vals))
    new_data = np.array([d[x] for x in coo.data])
    out = coo_matrix((new_data, (coo.row, coo.col)), shape=coo.shape)
    return out

In [12]:
np.unique(coo_image.data)

array([   1,    2,    3, ..., 3479, 3480, 3481], dtype=uint32)

In [20]:
uKeys = list((dict.fromkeys(coo_image.data)))
# uKeys

In [21]:
np.unique(uKeys, return_inverse=True)

(array([   1,    2,    3, ..., 3479, 3480, 3481], dtype=uint32),
 array([  50,   47,   72, ..., 2447, 2309, 2428], dtype=int64))

In [23]:
uKeys[-1]

2429

In [24]:
coo_image.toarray()[14, 110]

51