In [1]:
import os
from glob import glob
import numpy as np
import pandas
import math
from nilearn import image, input_data
from copy import deepcopy
import nibabel as ni
import scipy.stats as stats
from scipy.io import savemat
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import FeatureAgglomeration
from sklearn.feature_extraction.image import grid_to_graph
import statsmodels.distributions.empirical_distribution as ed

In [43]:
def prepare_PET_data(files_in, atlas, ref = None, msk = None, dimension_reduction = False,
                     ECDF_in = None, output_type = 'py', out_dir = './', out_name = 'PET_data', 
                     save_matrix = False, save_ECDF = False, save_images = False, ref_index = [],
                    voxyreg = False):
    ''' This is a function that will take several PET images and an atlas and will
    return a subject X region matrix. If specified, the function will also calculate 
    probabilities (via ECDF) either voxelwise, or using a specified reference region
    
    files_in = input can either be 
        - a path to a directory full of (only) nifti images OR
        - a "search string" using wildcards
        - a list of subject paths OR
        - a subject X image matrix
        
    altas = a path to a labeled regional atlas in the same space as the PET data
    
    ref = multiple options:
        - If None, no probabilities will be calculated, and script will simply extract
        regional PET data using the atlas.
        - If a path to a reference region mask (MUST BE BINARY), will calculate voxelwise 
        probabilities based on values within the reference region. Mask must be in the same 
        space as PET data and atlas
        - If a path to an atlas image (between 3 and 1002 distinct values), will calucalte
        region-wise probabilities for each region in the atlas.
        - If a list of integers, will combine these atlas labels with these integers to 
        make reference region out of input of atlas argument
        - if 'voxelwise', voxelwise (or atom-wise from dimension reduction) probabilities
        will be estimated. In other words, each voxel or atom will use serve as its own
        reference. Also, see ref_index argument.
        
    msk = A path to a binary mask file in the same space as PET data and atlas. If None,
        mask will be computed as a binary mask of the atlas.
        ** PLEASE NOTE: The mask will be used to mask the reference region! **
    
    dimension_reduction = whether or not to first reduce dimensions of data using
    hierarchical clustering. This results in an initial step that will be very slow, but 
    will may result in an overall speedup for the script, but perhaps only if ref is set 
    to 'voxelwise'.
        - If None, do not perform dimension reduction
        - If integer, the number of atoms (clusters) to reduce to
    
    ECDF_in = If the user wishes to apply an existing ECDF to the PET data instead of
        generating one de novo, that can be done here. This crucial if the user wishes to
        use multiple datasets. Think of it like scaling in machine learning.
        - If None, will generate ECDF de novo.
        - If np.array, will use this array to generate the ECDF.
        - If statsmodel ECDF object, will use this as ECDF
        - If a path, will use the
    
    output_type = type of file to save final subject x region matrix into. multiple options:
        -- 'py' will save matrix into a csv
        -- 'mat' will save matrix into a matfile
    
    out_dir = location to save output files. Defaults to current directory
    
    out_name = the prefix for all output files
    
    save_matrix = Whether to save or return subject x image matrix. Useful if running multiple 
        times, as this matrix can be set as files_in, bypassing the costly data import
        -- if 'return', will return subject x image matrix to python environment
        -- if 'save', will write subject x image matrix to file. 
        -- if None, matrix will not be stored
    
    save_ECDF = whether to save the ECDF used to create the probabilities. This is crucial if 
        using multiple datasets. The resulting output can be used as input for the ECDF argument.
        -- if 'return, will return np.array to python environment
        -- if 'save', will write array to file
        -- if None, array will not be stored
    
    save_images = whether to save PET probability (output) images. If True, data will be written
        to a 4D nifti file, specified by out_dir and out_name
    
    ref_index = whether to use a subset of input data as a reference group to use to create the
        ECDF, which will subsequently be applied to all subject data. 
        -- Input should be a list of indices corresponding to the order of the subjects in files_in. 
        For instance, if you have 6 subjects and want to use the 2nd and 4th as reference, 
        input would be [1,3]. 
        -- Passing an empty list will skip this and use the whole sample for the ECDF.
        * NOTE * this argument will only have an effect if ref is set to voxelwise.
        
    
    '''
    # Check input arguments
    print('initiating...')

    if output_type != 'py' and output_type != 'mat':
        raise IOError('output_type must be set to py or mat')
    
    if not os.path.isdir(out_dir):
        os.mkdir(out_dir)
    
    # Load data
    print('loading data...')
    i4d = load_data(files_in) # load PET data
    if save_matrix == 'save':
        otpt = os.path.join(out_dir,'%s_4d_data'%out_name)
        print('saving 4d subject x scan to nifti image: \n',otpt)
        i4d.to_filename(otpt)
    
    # load atlas
    atlas = ni.load(atlas).get_data().astype(int) 
    if atlas.shape != i4d.shape[:3]:
        raise ValueError('atlas dimensions do not match PET data dimensions')
    
    # load reference region
    regionwise = False
    if type(ref) == str and ref != 'voxelwise': 
        print('looking for reference image...')
        if not os.path.isfile(ref):
            raise IOError('Please enter a valid path for ref, or select a different option for this argument')
        else:
            ref_msk = ni.load(ref).get_data()
            if ref_msk.shape != i4d.shape[:3]:
                raise ValueError('ref region image dimensions do not match PET data dimensions')
            ref_uvals = len(np.unique(ref_msk))
            if ref_uvals > 2 and ref_uvals < 1003:
                print('found reference atlas. Using for region-wise probability calculation')
                regionwise = True
            elif ref_uvals < 2 or ref_uvals > 1002:
                raise ValueError(
                        'reference image is either empty or has too many unique values for an atlas.')
            else:
                print('found binary reference region mask. Using for probability calculation')
                
    elif type(ref) == list:
        ref_msk = np.zeros_like(atlas)
        for i in ref:
            ref_msk[atlas == i] = 1
    else:
        ref_msk = None
    
    
    # Mask data
    print('masking data...')
    if msk == None:
        img_mask = deepcopy(atlas)
        img_mask[img_mask<1] = 0
        img_mask[img_mask>0] = 1
    else:
        img_mask = ni.load(msk).get_data()
        atlas[img_mask < 1] = 0
    
    if type(ref_msk) != type(None):
        ref_msk[img_mask < 1] = 0
    
    mask_tfm = input_data.NiftiMasker(ni.Nifti1Image(img_mask,i4d.affine))
    mi4d = mask_tfm.fit_transform(i4d)
    
    # dimension reduction (IN BETA!)
    if dimension_reduction:
        print('reducing dimensions...')
        dm_mask = mask_tfm.mask_img_.get_data().astype(bool)
        shape = dm_mask.shape
        connectivity = grid_to_graph(n_x=shape[0], n_y=shape[1],
                                   n_z=shape[2], mask=dm_mask)
        mi4d, labels, ward = dim_reduction(mi4d, connectivity, dimension_reduction)
        labels_img = mask_tfm.inverse_transform(labels)
        
    # main ECDF calculation
    skip = False
    if ref != 'voxelwise':
        if type(ECDF_in) != type(None): 
            print('generating ECDF...')
            print('using user-supplied data...')
            if type(ECDF_in) == ed.ECDF:
                mi4d_ecdf, ecref = ecdf_simple(mi4d, ECDF_in)
                input_distribution = 'not generated'
            elif type(ECDF_in) == np.ndarray:
                mi4d_ecdf, ecref = ecdf_simple(mi4d, ECDF_in)
                input_distribution = ECDF_in
            elif type(ECDF_in) == str:
                if not os.path.isfile(ECDF_in):
                    raise ValueError('input for ECDF_in is not a valid path')
                if '.npy' not in ECDF_in:
                    raise ValueError('this function currently only accepts .npy files as inputs for ECDF_in')
                input_distribution = np.load(ECDF_in)
                mi4d_ecdf, ecref = ecdf_simple(mi4d, input_distribution)
                
            else:
                try:
                    mi4d_ecdf, ecref = ecdf_simple(mi4d, ECDF_in)
                    print('Could not understand ECDF input, but ECDF successful')
                    input_distribution = 'not generated'
                except:
                    raise IOError(
                            'Invalid argument for ECDF in. Please enter an ndarray, an ECDF object, or a valid path')
        else:
            if type(ref_msk) != type(None):
                if not regionwise:
                    print('generating ECDF...')
                    ref_tfm = input_data.NiftiMasker(ni.Nifti1Image(ref_msk,i4d.affine))
                    refz = ref_tfm.fit_transform(i4d)
                    mi4d_ecdf, ecref = ecdf_simple(mi4d, refz)
                    input_distribution = refz.flat
                else:
                    print('generating region-wise ECDF...')
                    if not voxyreg:
                        reg_mat = generate_matrix_from_atlas(i4d, ref_msk)
                        mi4d_ecdf, ECDF_array = ecdf_voxelwise(np.array(reg_mat),ref_index, save_ECDF)
                        f_mat = pandas.DataFrame(mi4d_ecdf,
                                                 columns = ['roi_%s'%x for x in np.unique(ref_msk.astype(int))[1:]]
                                                )
                        input_distribution = 'not_generated'
                    else:
                        # FIX THIS UP
                        f_images = ecdf_regionwise(i4d, ref_index, save_ECDF, ref_msk)
                        ECDF_array = None
                        input_distribution = 'not_generated'
            else:
                print('skipping ECDF...')
                skip = True
    
    else:
        print('generating voxelwise ECDF...')
        mi4d_ecdf, ECDF_array = ecdf_voxelwise(mi4d, ref_index, save_ECDF)
        input_distribution = 'not generated'
        
    if not skip:
        if save_ECDF:
            if type(input_distribution) == np.ndarray:
                flnm = os.path.join(out_dir, '%s_input_distribution'%out_name)
                print('saving ECDF to',flnm)
                np.save(flnm, input_distribution)
            else:
                print('Due to the arguments passed, ECDF array was not generated and therefore cannot be saved')
        
        # transform back to image-space
        if not regionwise and not dimension_reduction:
            print('transforming back into image space')
            f_images = mask_tfm.inverse_transform(mi4d_ecdf)
        elif dimension_reduction:
            print('transforming back into image space')
            labels_img = mask_tfm.inverse_transform(labels)
            tfmd = ward.inverse_transform(mi4d_ecdf)
            f_images = mask_tfm.inverse_transform(tfmd)
            #nimgs = rebuild_image_from_atlas(i4d.get_data)
    else:
        print('transforming back into image space')
        f_images = mask_tfm.inverse_transform(mi4d)
    
    if voxyreg:
        regionwise = False
    
    if save_images:
        if regionwise:
                print('transforming back into image space')
                nimgs = rebuild_image_from_atlas(i4d.get_data(), ref_msk, f_mat)
                f_images = ni.Nifti1Image(nimgs, i4d.affine)
                flnm = os.path.join(out_dir, '%s_4D_probability_data'%out_name)
                print('saving 4D probability image to', flnm)
                f_images.to_filename(flnm)
        else:
            flnm = os.path.join(out_dir, '%s_4D_probability_data'%out_name)
            print('saving 4D probability image to', flnm)
            f_images.to_filename(flnm)
    
    # generate output matrix
    
    print('generating final subject x region matrix')
    if not regionwise:
        f_mat = generate_matrix_from_atlas(f_images, atlas)
    
    # compile (and save) outputs
    print('preparing outputs')
    output = {}
    if output_type == 'py':
        f_mat.to_csv(os.path.join(out_dir, '%s_roi_data.csv'%out_name))
        output.update({'roi_matrix': f_mat})
    else:
        output.update({'roi_matrix': fmat.values})
        output.update({'roi_matrix_columns': fmat.columns})
    if save_matrix == 'return':
        output.update({'4d_image_matrix': i4d})
    if save_ECDF == 'return':
        if output_type == 'py':
            output.update({'ECDF_function': ECDF_array})
        else:
            output.update({'input_distribution': input_distribution})
    
    if output_type == 'py':
        return output
    else:
        savemat(os.path.join(out_dir,'%s_output'%out_name), output)
        return None
        
    
def load_data(files_in):
    
    fail = False
    
    if type(files_in) == str:
        if os.path.isdir(files_in):
            print('It seems you passed a directory')
            search = os.path.join(files_in,'*')
            num_f = len(glob(search))
            if num_f == 0:
                raise IOError('specified directory did not contain any files')
            else:
                print('found %s images!'%num_f)
            i4d = image.load_img(search)
        elif '*' in files_in:
            print('It seems you passed a search string')
            num_f = len(glob(files_in))
            if num_f == 0:
                raise IOError('specified search string did not result in any files')
            else:
                print('found %s images'%num_f)
            i4d = image.load_img(files_in)
        else:
            fail = True
    elif type(files_in) == list:
        print('processing %s subjects'%len(files_in))
        i4d = ni.concat_images(files_in)
    elif type(files_in) == ni.nifti1.Nifti1Image:
        print('processing %s subjects'%files_in.shape[-1])
        i4d = files_in
    else:
        fail = True
        
    if fail:
        print('files_in not recognized.', 
                    'Please enter a search string, valid directory, list of subjects, or matrix')
        raise ValueError('I do not recognize the files_in input.')
    
    return i4d

def dim_reduction(mi4d, connectivity, dimension_reduction):
    ward = FeatureAgglomeration(n_clusters=int(dimension_reduction/2),
            connectivity=connectivity, linkage='ward', memory='nilearn_cache')
    ward.fit(mi4d)
    ward = FeatureAgglomeration(n_clusters=dimension_reduction,
            connectivity=connectivity, linkage='ward', memory='nilearn_cache')
    ward.fit(mi4d)                                                         
    mi4d = ward.transform(mi4d)
    labels = ward.labels_ + 1
    
    return mi4d, labels, ward

def ecdf_simple(mi4d, refz):

    if type(refz) == ed.ECDF:
        ecref = refz
    else:
        if len(refz.shape) > 1:
            ecref = ed.ECDF(refz.flat)
        else:
            ecref = ed.ECDF(refz)
    print('transforming images...')
    mi4d_ecdf = ecref(mi4d.flat).reshape(mi4d.shape[0],mi4d.shape[1])

    return mi4d_ecdf, ecref   

def ecdf_voxelwise(mi4d, ref_index, save_ECDF):
    
    X,y = mi4d.shape

    if len(ref_index) == 0:
        if not save_ECDF:
            jnk = np.array([ed.ECDF(mi4d[:,x])(mi4d[:,x]) for x in range(y)])
            mi4d_ecdf = np.zeros_like(mi4d)
            for x in range(y):
                mi4d_ecdf[:,x] = jnk[x]
            ECDF_array = None
        else:
            ECDF_array = [ed.ECDF(mi4d[:,x]) for x in range(y)]
            print('transforming data...')
            jnk = np.array([ECDF_array[x](mi4d[:,x]) for x in range(y)])
            mi4d_ecdf = np.zeros_like(mi4d)
            for x in range(y):
                mi4d_ecdf[:,x] = jnk[x]
            
    else:
        good_ind = [x for x in list(range(X)) if x not in ref_index]
        if not save_ECDF:    
            jnk = np.array([ed.ECDF(mi4d[ref_index,x])(mi4d[:,x]) for x in range(y)])
            mi4d_ecdf = np.zeros_like(mi4d)
            for x in range(y):
                mi4d_ecdf[:,x] = jnk[x]
            ECDF_array = None
        else:
            ECDF_array = [ed.ECDF(mi4d[ref_index,x]) for x in range(y)]
            print('transforming data...')
            jnk = ecdf_voxelwise = np.array([ECDF_array[x](mi4d[:,x]) for x in range(y)]
                                     )
            mi4d_ecdf = np.zeros_like(mi4d)
            for x in range(y):
                mi4d_ecdf[:,x] = jnk[x]
    
    return mi4d_ecdf, ECDF_array

def ecdf_regionwise(i4d, ref_index, save_ECDF, atlas):
    
    i4d_ecdf = np.zeros_like(i4d.get_data())
    for i in np.unique(atlas)[1:]:
        print('working on region',i)
        msk = deepcopy(atlas)
        msk[atlas==i] = 1
        msk[msk!=1] = 0
        roi_tfm = input_data.NiftiMasker(ni.Nifti1Image(msk,i4d.affine))
        roi = roi_tfm.fit_transform(i4d)
        X,y = roi.shape
        rdist = ed.ECDF(roi.flat)
        roi_ecdf = np.array([rdist(roi[:,x]) for x in range(y)]).reshape(X,y)
        vals = roi_tfm.inverse_transform(roi_ecdf)
        i4d_ecdf += vals.get_data()
    
    i4d_ecdf = ni.Nifti1Image(i4d_ecdf,i4d.affine)
        
    return i4d_ecdf

def generate_matrix_from_atlas(files_in, atlas):
    
    files_in = np.nan_to_num(files_in.get_data())
    atlas = atlas.astype(int)
    uni = np.unique(atlas)
    atl_map = dict(zip(uni,range(len(uni))))
    new_atl = deepcopy(atlas)
    for old,new in atl_map.items():
        new_atl[atlas==old] = new
    
    f_mat = pandas.DataFrame(index = range(files_in.shape[-1]),
                             columns = ['roi_%s'%x for x in np.unique(atlas) if x != 0])
    tot = np.bincount(new_atl.flat)
    for sub in range(files_in.shape[-1]):
        mtx = files_in[:,:,:,sub]
        sums = np.bincount(new_atl.flat, weights = mtx.flat)
        rois = (sums/tot)[1:]
        f_mat.loc[f_mat.index[sub]] = rois
    
    
    return f_mat

def rebuild_image_from_atlas(files_in, atlas, map_mtx):
    
    for i in range(files_in.shape[-1]):
        xs = files_in[:,:,:,i]
        for col in map_mtx.columns:
            num = int(col.split('_')[-1])
            xs[atlas == num] = map_mtx.loc[map_mtx.index[i]][col]
            xs[atlas == 0] = 0
    
    return files_in

In [45]:
subs = sorted(glob('/Users/jakevogel/Science/tau/nan_snorm_*'))
files_in = subs[:5]
atlas = '/Users/jakevogel/Science/tau/dkt_nocereb_1mm.nii.gz'
ref = 'voxelwise'
msk = '/Users/jakevogel/Science/tau/ADNI_GM_mask_1mm_nocereb.nii.gz'
dimension_reduction = False
ECDF_in = None
output_type = 'py'
out_dir = '/Users/jakevogel/Science/tau/ESM_tau/'
out_name = 'small_trial'
save_matrix = False
save_ECDF = False
save_images = False
ref_index = []

In [46]:
if output_type != 'py' and output_type != 'mat':
    raise IOError('output_type must be set to py or mat')

if not os.path.isdir(out_dir):
    os.mkdir(out_dir)

# Load data
print('loading data...')
i4d = load_data(files_in) # load PET data
if save_matrix == 'save':
    otpt = os.path.join(out_dir,'%s_4d_data'%out_name)
    print('saving 4d subject x scan to nifti image: \n',otpt)
    i4d.to_filename(otpt)

# load atlas
atlas = ni.load(atlas).get_data().astype(int) 
if atlas.shape != i4d.shape[:3]:
    raise ValueError('atlas dimensions do not match PET data dimensions')

# load reference region
regionwise = False
if type(ref) == str and ref != 'voxelwise': 
    print('looking for reference image...')
    if not os.path.isfile(ref):
        raise IOError('Please enter a valid path for ref, or select a different option for this argument')
    else:
        ref_msk = ni.load(ref).get_data()
        if ref_msk.shape != i4d.shape[:3]:
            raise ValueError('ref region image dimensions do not match PET data dimensions')
        ref_uvals = len(np.unique(ref_msk))
        if ref_uvals > 2 and ref_uvals < 1003:
            print('found reference atlas. Using for region-wise probability calculation')
            regionwise = True
        elif ref_uvals < 2 or ref_uvals > 1002:
            raise ValueError(
                    'reference image is either empty or has too many unique values for an atlas.')
        else:
            print('found binary reference region mask. Using for probability calculation')

elif type(ref) == list:
    ref_msk = np.zeros_like(atlas)
    for i in ref:
        ref_msk[atlas == i] = 1
else:
    ref_msk = None


# Mask data
print('masking data...')
if msk == None:
    img_mask = deepcopy(atlas)
    img_mask[img_mask<1] = 0
    img_mask[img_mask>0] = 1
else:
    img_mask = ni.load(msk).get_data()
    atlas[img_mask < 1] = 0

if type(ref_msk) != type(None):
    ref_msk[img_mask < 1] = 0

mask_tfm = input_data.NiftiMasker(ni.Nifti1Image(img_mask,i4d.affine))
mi4d = mask_tfm.fit_transform(i4d)

# dimension reduction (IN BETA!)
if dimension_reduction:
    print('reducing dimensions...')
    dm_mask = mask_tfm.mask_img_.get_data().astype(bool)
    shape = dm_mask.shape
    connectivity = grid_to_graph(n_x=shape[0], n_y=shape[1],
                               n_z=shape[2], mask=dm_mask)
    mi4d, labels, ward = dim_reduction(mi4d, connectivity, dimension_reduction)
    labels_img = mask_tfm.inverse_transform(labels)


loading data...
processing 5 subjects
masking data...


In [47]:
skip = False
if ref != 'voxelwise':
    if type(ECDF_in) != type(None): 
        print('generating ECDF...')
        print('using user-supplied data...')
        if type(ECDF_in) == ed.ECDF:
            mi4d_ecdf, ecref = ecdf_simple(mi4d, ECDF_in)
            input_distribution = 'not generated'
        elif type(ECDF_in) == np.ndarray:
            mi4d_ecdf, ecref = ecdf_simple(mi4d, ECDF_in)
            input_distribution = ECDF_in
        elif type(ECDF_in) == str:
            if not os.path.isfile(ECDF_in):
                raise ValueError('input for ECDF_in is not a valid path')
            if '.npy' not in ECDF_in:
                raise ValueError('this function currently only accepts .npy files as inputs for ECDF_in')
            input_distribution = np.load(ECDF_in)
            mi4d_ecdf, ecref = ecdf_simple(mi4d, input_distribution)

        else:
            try:
                mi4d_ecdf, ecref = ecdf_simple(mi4d, ECDF_in)
                print('Could not understand ECDF input, but ECDF successful')
                input_distribution = 'not generated'
            except:
                raise IOError(
                        'Invalid argument for ECDF in. Please enter an ndarray, an ECDF object, or a valid path')
    else:
        if type(ref_msk) != type(None):
            if not regionwise:
                print('generating ECDF...')
                ref_tfm = input_data.NiftiMasker(ni.Nifti1Image(ref_msk,i4d.affine))
                refz = ref_tfm.fit_transform(i4d)
                mi4d_ecdf, ecref = ecdf_simple(mi4d, refz)
                input_distribution = refz.flat
            else:
                print('generating region-wise ECDF...')
                if not voxyreg:
                    reg_mat = generate_matrix_from_atlas(i4d, ref_msk)
                    mi4d_ecdf, ECDF_array = ecdf_voxelwise(np.array(reg_mat),ref_index, save_ECDF)
                    f_mat = pandas.DataFrame(mi4d_ecdf,
                                             columns = ['roi_%s'%x for x in np.unique(ref_msk.astype(int))[1:]]
                                            )
                    input_distribution = 'not_generated'
                else:
                    # FIX THIS UP
                    f_images = ecdf_regionwise(i4d, ref_index, save_ECDF, ref_msk)
                    ECDF_array = None
                    input_distribution = 'not_generated'
        else:
            print('skipping ECDF...')
            skip = True

else:
    print('generating voxelwise ECDF...')
    mi4d_ecdf, ECDF_array = ecdf_voxelwise(mi4d, ref_index, save_ECDF)
    input_distribution = 'not generated'

generating voxelwise ECDF...


In [48]:
mi4d_ecdf

array([[ 0.8,  0.8,  0.6, ...,  0.2,  0.2,  0.2],
       [ 0.4,  0.4,  0.4, ...,  0.8,  0.8,  0.8],
       [ 1. ,  1. ,  1. , ...,  1. ,  1. ,  1. ],
       [ 0.6,  0.6,  0.8, ...,  0.6,  0.6,  0.6],
       [ 0.2,  0.2,  0.2, ...,  0.4,  0.4,  0.4]])

In [49]:
list(zip(mi4d[:,0],mi4d_ecdf[:,0]))

[(0.85238528251647949, 0.80000000000000004),
 (0.76113224029541016, 0.40000000000000002),
 (1.2351555824279785, 1.0),
 (0.81288999319076538, 0.60000000000000009),
 (0.58321291208267212, 0.20000000000000001)]

In [21]:
ed.ECDF(mi4d[:,0])(mi4d[:,0])

array([ 0.8,  0.4,  1. ,  0.6,  0.2])

In [23]:
not save_ECDF

True

In [None]:
#def ecdf_voxelwise(mi4d, ref_index, save_ECDF):

X,y = mi4d.shape

mi4d_ecdf = np.array([ed.ECDF(mi4d[:,x])(mi4d[:,x]) for x in range(y)]).reshape(X,y)
ECDF_array = None


In [24]:
x = 0
ed.ECDF(mi4d[:,x])(mi4d[:,x])

array([ 0.8,  0.4,  1. ,  0.6,  0.2])

In [26]:
X,y = mi4d.shape
jnk = np.array([ed.ECDF(mi4d[:,x])(mi4d[:,x]) for x in range(y)])

In [42]:
new_mtx = np.zeros_like(mi4d)
for x in range(y):
    new_mtx[:,x] = jnk[x]
new_mtx

array([[ 0.8,  0.8,  0.6, ...,  0.2,  0.2,  0.2],
       [ 0.4,  0.4,  0.4, ...,  0.8,  0.8,  0.8],
       [ 1. ,  1. ,  1. , ...,  1. ,  1. ,  1. ],
       [ 0.6,  0.6,  0.8, ...,  0.6,  0.6,  0.6],
       [ 0.2,  0.2,  0.2, ...,  0.4,  0.4,  0.4]])

In [33]:
jnk.reshape?