### Image statistics

In [2]:
import matlab.engine
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import scipy.io as sio
import utils.utils as uti
import process.file as f
import os

### Plotting pixel level image statistics

In [7]:
def get_stats_FOCO(folder):
    df = pd.DataFrame()
    for fold in os.listdir(folder):
        if not fold[0:3] == '.DS':
            for file in os.listdir(folder+'/'+fold):
                if file[-4:] == '.mat':
                    imfile = sio.loadmat(folder+'/'+fold +'/'+file)

                    data = imfile['data']
                    channels = [0,2,4,1]
                    RGBW = np.squeeze(data[:,:,:, channels])

                    RGBW_flat = RGBW.reshape(-1, RGBW.shape[-1])

                    min = np.amin(RGBW, axis = (0,1,2))
                    max = np.amax(RGBW, axis = (0,1,2))
                    mean = np.mean(RGBW, axis = (0,1,2))
                    std = np.std(RGBW, axis = (0,1,2))

                    corr = np.corrcoef(RGBW_flat, rowvar= False)
                    RG_co = corr[0][1]
                    RB_co = corr[0][2]
                    RW_co = corr[0][3]
                    GB_co = corr[1][2]
                    GW_co = corr[1][3]
                    BW_co = corr[2][3]

                    per_ID, per_correct, per_correct2, correctID, corrID2 = uti.check_accuracy(f.proc_FOCO(folder+'/'+fold))

                    df = df.append({'folder':fold, 'acc':per_correct,'min':min, 'max': max, 'mean': mean, 'std':std, 'RG coef': RG_co, 'RB coef': RB_co, 'RW coef': RW_co, 'GB coef': GB_co, 'GW coef': GW_co, 'BW coef':BW_co}, ignore_index=True)
    return df

def get_stats_NP(folder):
    df = pd.DataFrame()

    for file in os.listdir(folder):
        if file[-9:] == 'YAaLR.mat' or file[-9:] == 'YAaDV.mat':
            imfile = sio.loadmat('data/NP_paper/all/'+file)

            data = imfile['data']
            channels = imfile['prefs']['RGBW'][0][0]-1
            RGBW = np.squeeze(data[:,:,:, channels])/16

            RGBW_flat = RGBW.reshape(-1, RGBW.shape[-1])

            min = np.amin(RGBW, axis = (0,1,2))
            max = np.amax(RGBW, axis = (0,1,2))
            mean = np.mean(RGBW, axis = (0,1,2))
            std = np.std(RGBW, axis = (0,1,2))

            corr = np.corrcoef(RGBW_flat, rowvar= False)
            RG_co = corr[0][1]
            RB_co = corr[0][2]
            RW_co = corr[0][3]
            GB_co = corr[1][2]
            GW_co = corr[1][3]
            BW_co = corr[2][3]

            df = df.append({'file':file, 'min':min, 'max': max, 'mean': mean, 'std':std, 'RG coef': RG_co, 'RB coef': RB_co, 'RW coef': RW_co, 'GB coef': GB_co, 'GW coef': GW_co, 'BW coef':BW_co}, ignore_index=True)
    
    return df

def get_stats_Chaud(folder):
    df = pd.DataFrame()

    for fold in os.listdir(folder):
        if not fold[0:3] == '.DS':
            for file in os.listdir(folder+'/'+fold):
                if file[-4:] == '.mat':
                    imfile = sio.loadmat(folder+'/'+fold +'/'+file)

                    data = imfile['data']
                    channels = imfile['prefs']['RGBW'][0][0]-1
                    RGBW = np.squeeze(data[:,:,:, channels])/16

                    RGBW_flat = RGBW.reshape(-1, RGBW.shape[-1])

                    min = np.amin(RGBW, axis = (0,1,2))
                    max = np.amax(RGBW, axis = (0,1,2))
                    mean = np.mean(RGBW, axis = (0,1,2))
                    std = np.std(RGBW, axis = (0,1,2))

                    corr = np.corrcoef(RGBW_flat, rowvar= False)
                    RG_co = corr[0][1]
                    RB_co = corr[0][2]
                    RW_co = corr[0][3]
                    GB_co = corr[1][2]
                    GW_co = corr[1][3]
                    BW_co = corr[2][3]

                    per_ID, per_correct, per_correct2, correctID, corrID2 = uti.check_accuracy(f.proc_Chaud(folder+'/'+fold))

                    df = df.append({'folder':fold,'name':file, 'min':min, 'max': max, 'mean': mean, 'std':std, 'RG coef': RG_co, 'RB coef': RB_co, 'RW coef': RW_co, 'GB coef': GB_co, 'GW coef': GW_co, 'BW coef':BW_co}, ignore_index=True)
    return df
        


In [10]:
FOCO_df = get_stats_FOCO('data/NP_foco_median')
Hist_df = get_stats_FOCO('data/NP_foco_hist_med')
NP_df = get_stats_NP('data/NP_paper/all')
#chaud_df = get_stats_Chaud('data/NP_chaudhary')


KeyError: 'data'

In [10]:
%matplotlib qt

def plot_corr_coefs(data, labels):
    fig, ax = plt.subplots(2,3)
    ax[0,0].boxplot([df['RG coef'] for df in data], labels = labels)
    ax[0,0].set_title('RG corr coef')
    ax[0,1].boxplot([df['RB coef'] for df in data], labels = labels)
    ax[0,1].set_title('RB corr coef')
    ax[0,2].boxplot([df['RW coef'] for df in data], labels = labels)
    ax[0,2].set_title('RW corr coef')
    ax[1,0].boxplot([df['GB coef'] for df in data], labels = labels)
    ax[1,0].set_title('GB corr coef')
    ax[1,1].boxplot([df['GW coef'] for df in data], labels = labels)
    ax[1,1].set_title('GW corr coef')
    ax[1,2].boxplot([df['BW coef'] for df in data], labels = labels)
    ax[1,2].set_title('BW corr coef')

    plt.show()


plot_corr_coefs([FOCO_df, NP_df, chaud_df], ['FOCO', 'NP', 'Chaudhary'])

In [11]:
%matplotlib qt

def plot_corr_acc(df):
    fig, ax = plt.subplots(2,3)

    ax[0,0].scatter(df['RG coef'], df['acc'])
    ax[0,0].set_title('RG coef vs accuracy')
    ax[0,0].set_ylabel('correlation coefficient')
    ax[0,0].set_xlabel('percent accuracy')
    ax[0,1].scatter(df['RB coef'], df['acc'])
    ax[0,1].set_title('RB coef vs accuracy')
    ax[0,1].set_xlabel('percent accuracy')
    ax[0,2].scatter(df['RW coef'], df['acc'])
    ax[0,2].set_title('RW coef vs accuracy')
    ax[0,2].set_xlabel('percent accuracy')
    ax[1,0].scatter(df['GB coef'], df['acc'])
    ax[1,0].set_title('GB coef vs accuracy')
    ax[1,0].set_xlabel('percent accuracy')
    ax[1,0].set_ylabel('correlation coefficient')
    ax[1,1].scatter(df['GW coef'], df['acc'])
    ax[1,1].set_title('GW coef vs accuracy')
    ax[1,1].set_xlabel('percent accuracy')
    ax[1,2].scatter(df['BW coef'], df['acc'])
    ax[1,2].set_title('BW coef vs accuracy')
    ax[1,2].set_xlabel('percent accuracy')

    plt.tight_layout()
    plt.show()

In [24]:
%matplotlib qt

def plot_image_stats(data, labels):

    fig, ax = plt.subplots(2,4)

    for i, df in enumerate(data):

        ax[0,0].scatter(np.stack(df['mean'])[:,0], np.stack(df['mean'])[:,1], label = labels[i], alpha=0.5)
        ax[1,0].scatter(np.stack(df['mean'])[:,2], np.stack(df['mean'])[:,3], label = labels[i], alpha=0.5)
        ax[0,1].scatter(np.stack(df['min'])[:,0], np.stack(df['min'])[:,1], label = labels[i], alpha=0.5)
        ax[1,1].scatter(np.stack(df['min'])[:,2], np.stack(df['min'])[:,3], label = labels[i], alpha=0.5)
        ax[0,2].scatter(np.stack(df['std'])[:,0], np.stack(df['std'])[:,1], label = labels[i], alpha=0.5)
        ax[1,2].scatter(np.stack(df['std'])[:,2], np.stack(df['std'])[:,3], label = labels[i], alpha=0.5)
        ax[0,3].scatter(np.stack(df['max'])[:,0], np.stack(df['max'])[:,1], label = labels[i], alpha=0.5)
        ax[1,3].scatter(np.stack(df['max'])[:,2], np.stack(df['max'])[:,3], label = labels[i], alpha=0.5)

    ax[0,0].set_aspect('equal')
    ax[0,0].set_xlim(0,150)
    ax[0,0].set_ylim(0,150)
    ax[0,0].set_title('RG means')
    ax[0,0].set_xlabel('R channel means')
    ax[0,0].set_ylabel('G channel means')
    ax[0,0].legend()
    ax[1,0].set_aspect('equal')
    ax[1,0].set_xlim(0,150)
    ax[1,0].set_ylim(0,150)
    ax[1,0].set_title('BW means')
    ax[1,0].set_xlabel('B channel means')
    ax[1,0].set_ylabel('W channel means')
    ax[0,1].set_aspect('equal')
    ax[0,1].set_xlim(0,100)
    ax[0,1].set_ylim(0,100)
    ax[0,1].set_title('RG mins')
    ax[0,1].set_xlabel('R channel min')
    ax[0,1].set_ylabel('G channel min')
    ax[1,1].set_aspect('equal')
    ax[1,1].set_xlim(0,100)
    ax[1,1].set_ylim(0,100)
    ax[1,1].set_title('BW min')
    ax[1,1].set_xlabel('B channel min')
    ax[1,1].set_ylabel('W channel min')
    ax[0,2].set_aspect('equal')
    ax[0,2].set_xlim(0,500)
    ax[0,2].set_ylim(0,500)
    ax[0,2].set_title('RG stds')
    ax[0,2].set_xlabel('R channel stds')
    ax[0,2].set_ylabel('G channel stds')
    ax[1,2].set_aspect('equal')
    ax[1,2].set_xlim(0,500)
    ax[1,2].set_ylim(0,500)
    ax[1,2].set_title('BW stds')
    ax[1,2].set_xlabel('B channel stds')
    ax[1,2].set_ylabel('W channel stds')    
    ax[0,3].set_aspect('equal')
    ax[0,3].set_xlim(0,5000)
    ax[0,3].set_ylim(0,5000)
    ax[0,3].set_title('RG max')
    ax[0,3].set_xlabel('R channel max')
    ax[0,3].set_ylabel('G channel max')
    ax[1,3].set_aspect('equal')
    ax[1,3].set_xlim(0,5000)
    ax[1,3].set_ylim(0,5000)
    ax[1,3].set_title('BW max')
    ax[1,3].set_xlabel('B channel max')
    ax[1,3].set_ylabel('W channel max')

    plt.tight_layout()
    plt.show()

In [6]:
plot_image_stats([FOCO_df, NP_df, chaud_df], ['FOCO', 'NP', 'Chaudhary'])

NameError: name 'NP_df' is not defined

### Histogram evaluation and equalization

In [12]:
def generate_histograms(image, scale):
    
    image = np.asarray(image)

    im_flat = image.reshape(-1, image.shape[-1])

    fig, axs = plt.subplots(2,2)

    for i, ax1 in enumerate(axs):
        for j, ax in enumerate(ax1):

            hist, bins = np.histogram(im_flat[:,i*2+j], 256, [0, 256*scale] )
            cdf = hist.cumsum()
            cdf_normalized = cdf * hist.max()/cdf.max()
            ax.axvline(np.max(im_flat[:,i*2+j]),linestyle='--') 
            ax.plot(cdf_normalized, color = 'b')
            ax.hist(im_flat[:,i*2+j], bins= np.arange(256)*scale, color= 'red')
            ax.set_xlabel('color channel gray count')
            ax.set_ylabel('pixel count')
            ax.set_xlim([0,256*scale])
            ax.legend(('max value', 'cdf', 'hist'), loc = 'upper right')
    
    axs[0,0].set_title('red histogram')
    axs[0,1].set_title('green histogram')
    axs[1,0].set_title('blue histogram')
    axs[1,1].set_title('white histogram')

    plt.show()


In [56]:
%matplotlib qt
imfile = sio.loadmat('data/NP_FOCO_cropped/2021-12-03-w00-NP1/neuropal_1_MMStack_Pos0.ome.mat')
data = imfile['data']
channels = [0,2,4,1]
RGBW = np.squeeze(data[:,:,:, channels])

generate_histograms(RGBW, 16)

In [22]:
%matplotlib qt

imfile = sio.loadmat('data/NP_paper/all/7_YAaLR.mat')
data = imfile['data']
channels = imfile['prefs']['RGBW'][0][0]-1
RGBW = np.squeeze(data[:,:,:, channels])

generate_histograms(RGBW, 256)

In [13]:
def equalize_hist(RGBW, threshs):
    '''
    thresh defines value above which to perform the histogram equalization
    loop through each pixel in image and transform based on histogram equalization
    '''

    size = RGBW.shape

    RGBW_new = np.zeros(size)

    flat = RGBW.reshape(-1, RGBW.shape[-1])

    for l in range(size[3]):
        channel = flat[:,l]

        thresh = threshs[l]
        
        hist_to_eq = channel[np.where(channel>=thresh)]
        N = len(hist_to_eq)
        num_bins = 4096-thresh
        hist, bins = np.histogram(hist_to_eq, num_bins, [thresh, 4096])
        cdf = hist.cumsum()
        
        for i in range(size[0]):
            for j in range(size[1]):
                for k in range(size[2]):
                        val = RGBW[i,j,k,l]

                        if val >= thresh:
                            val_index = np.where(bins==val)
                            cum_prob = cdf[val_index]/N
                            new_val = np.round(cum_prob*(num_bins-1))+thresh

                            RGBW_new[i,j,k,l] = new_val
                        
                        else:
                            RGBW_new[i,j,k,l] = val

    return RGBW_new

In [15]:
import utils.utils as uti
import process.file as f

hist_df = pd.DataFrame()

for folder in os.listdir('data/NP_foco_cropped'):
    if not folder[0:3] == '.DS':
        for file in os.listdir('data/NP_foco_cropped/'+folder):
            if file[-4:] == '.mat':
                print(folder)
                imfile = sio.loadmat('data/NP_foco_cropped/'+folder +'/'+file)

                data = imfile['data']
                channels = [0,2,4,1]
                RGBW = np.squeeze(data[:,:,:, channels])
                RGBW = RGBW.astype('int32')

                RGBW_shift = np.maximum(np.zeros(RGBW.shape), RGBW-80) #moves all values back -80 and then truncates to 0

                RGBW_flat = RGBW.reshape(-1, RGBW.shape[-1])

                RGBW_new = equalize_hist(RGBW_shift, [100, 240, 80, 50])

                min = np.amin(RGBW_new, axis = (0,1,2))
                max = np.amax(RGBW_new, axis = (0,1,2))
                mean = np.mean(RGBW_new, axis = (0,1,2))
                std = np.std(RGBW_new, axis = (0,1,2))

                corr = np.corrcoef(RGBW_flat, rowvar= False)
                RG_co = corr[0][1]
                RB_co = corr[0][2]
                RW_co = corr[0][3]
                GB_co = corr[1][2]
                GW_co = corr[1][3]
                BW_co = corr[2][3]

                per_ID, per_correct, per_correct2, correctID, corrID2 = uti.check_accuracy(f.proc_FOCO('data/NP_foco_cropped/'+folder))

                hist_df = hist_df.append({'folder': folder, 'acc':per_correct,'min':min, 'max': max, 'mean': mean, 'std':std, 'RG coef': RG_co, 'RB coef': RB_co, 'RW coef': RW_co, 'GB coef': GB_co, 'GW coef': GW_co, 'BW coef':BW_co}, ignore_index=True)

                #sio.savemat('data/NP_FOCO_hist/'+folder+'/'+'hist_equal_image.mat', {'Hist_RGBW':RGBW_new})


2022-04-26-w00-NP1
2022-04-26-w01-NP1
2021-12-03-w00-NP1
2022-02-22-w04-NP1
2022-04-01-w00-NP1
2022-03-05-w00-NP1
2022-02-12-w00-NP1
2022-02-12-w01-NP1
2022-01-22-w04-NP1
2022-02-11-w03-NP1


In [25]:
%matplotlib qt
plot_image_stats([FOCO_df, hist_df, NP_df], ['FOCO original', 'FOCO Hist Eq', 'NP paper'])


In [18]:
imfile = sio.loadmat('data/NP_foco_cropped/2022-02-12-w00-NP1/neuropal_1_MMStack_Pos0.ome.mat')

data = imfile['data']
channels = [0,2,4,1]
RGBW = np.squeeze(data[:,:,:, channels])
RGBW = RGBW.astype('int32')

RGBW_shift = np.maximum(np.zeros(RGBW.shape), RGBW-80) #moves all values back -80 and then truncates to 0

RGBW_flat = RGBW.reshape(-1, RGBW.shape[-1])

RGBW_new = equalize_hist(RGBW_shift, [100, 240, 80, 50])

min = np.amin(RGBW_new, axis = (0,1,2))
max = np.amax(RGBW_new, axis = (0,1,2))
mean = np.mean(RGBW_new, axis = (0,1,2))
std = np.std(RGBW_new, axis = (0,1,2))

corr = np.corrcoef(RGBW_flat, rowvar= False)
RG_co = corr[0][1]
RB_co = corr[0][2]
RW_co = corr[0][3]
GB_co = corr[1][2]
GW_co = corr[1][3]
BW_co = corr[2][3]

per_ID, per_correct, per_correct2, correctID, corrID2 = uti.check_accuracy(f.proc_FOCO('data/NP_foco_cropped/2022-02-12-w00-NP1'))

hist_df = hist_df.append({'folder': folder, 'acc':per_correct,'min':min, 'max': max, 'mean': mean, 'std':std, 'RG coef': RG_co, 'RB coef': RB_co, 'RW coef': RW_co, 'GB coef': GB_co, 'GW coef': GW_co, 'BW coef':BW_co}, ignore_index=True)

sio.savemat('data/NP_FOCO_hist/2022-02-12-w00-NP1/'+'hist_equal_image.mat', {'Hist_RGBW':RGBW_new})

0
1
2
3


: 

In [None]:
import utils.utils as uti
import process.file as f

hist_df = pd.DataFrame()

for folder in os.listdir('data/NP_foco_cropped'):
    if not folder[0:3] == '.DS':
        for file in os.listdir('data/NP_foco_cropped/'+folder):
            if file[-4:] == '.mat':
                imfile = sio.loadmat('data/NP_foco_cropped/'+folder +'/'+file)

                data = imfile['data']
                channels = [0,2,4,1]
                RGBW = np.squeeze(data[:,:,:, channels])
                RGBW = RGBW.astype('int32')

                RGBW_shift = np.maximum(np.zeros(RGBW.shape), RGBW-80) #moves all values back -80 and then truncates to 0

                RGBW_flat = RGBW.reshape(-1, RGBW.shape[-1])

                RGBW_new = equalize_hist(RGBW_shift, [100, 240, 80, 50])

                min = np.amin(RGBW_new, axis = (0,1,2))
                max = np.amax(RGBW_new, axis = (0,1,2))
                mean = np.mean(RGBW_new, axis = (0,1,2))
                std = np.std(RGBW_new, axis = (0,1,2))

                corr = np.corrcoef(RGBW_flat, rowvar= False)
                RG_co = corr[0][1]
                RB_co = corr[0][2]
                RW_co = corr[0][3]
                GB_co = corr[1][2]
                GW_co = corr[1][3]
                BW_co = corr[2][3]

                per_ID, per_correct, per_correct2, correctID, corrID2 = uti.check_accuracy(f.proc_FOCO('data/NP_foco_cropped/'+folder))

                hist_df = hist_df.append({'folder': folder, 'acc':per_correct,'min':min, 'max': max, 'mean': mean, 'std':std, 'RG coef': RG_co, 'RB coef': RB_co, 'RW coef': RW_co, 'GB coef': GB_co, 'GW coef': GW_co, 'BW coef':BW_co}, ignore_index=True)

                sio.savemat('data/NP_FOCO_hist/'+folder+'/'+'hist_equal_image.mat', {'Hist_RGBW':RGBW_new})


In [22]:
for i, row in hist_df.iterrows():
    print(row['folder'])
    print(row['std'])
    print(row['mean'])

2022-04-26-w00-NP1
[182.85059286 201.24276159 193.55643733 392.4979139 ]
[34.54121648 39.61112046 41.86655722 79.24602454]
2022-04-26-w01-NP1
[115.35925644 117.3026174  161.96169165 286.33321341]
[24.33852083 27.1966788  35.00683676 49.62824704]
2021-12-03-w00-NP1
[150.18277839 123.2684752  175.69144091 360.64454674]
[31.49609329 36.52928171 39.1746062  71.07115823]
2022-02-22-w04-NP1
[136.11129986 173.21009833 315.38309314 363.07843185]
[29.23938759 37.20927306 68.87849713 71.24675352]
2022-04-01-w00-NP1
[154.27793319 170.69595893 168.4160032  388.02698303]
[31.57362574 36.01060583 39.86840889 78.8444963 ]
2022-03-05-w00-NP1
[123.5802483  155.73407286 203.3378168  370.08495309]
[28.11267241 35.00767361 44.05451528 73.20352194]
2022-02-12-w00-NP1
[149.90765351 151.51846662 131.7458141  376.1967122 ]
[30.6627475  32.6856337  33.30107815 74.75682537]
2022-02-12-w01-NP1
[142.83430802 166.23721591 265.41701729 375.91058674]
[30.51329602 35.8326587  57.2141937  75.2204812 ]
2022-01-22-w04-N

In [38]:
folders = ['2021-12-03-w00-NP1', '2022-04-26-w00-NP1', '2022-01-22-w04-NP1', '2022-03-05-w00-NP1','2022-02-22-w04-NP1' , '2022-04-01-w00-NP1', '2022-02-11-w03-NP1']  
for folder in folders:
    print(folder)    
    imfile = sio.loadmat('data/NP_foco_cropped/'+folder+'/neuropal_1_MMStack_Pos0.ome.mat')

    data = imfile['data']
    channels = [0,2,4,1]
    RGBW = np.squeeze(data[:,:,:, channels])
    RGBW = RGBW.astype('int32')

    RGBW_shift = np.maximum(np.zeros(RGBW.shape), RGBW-80) #moves all values back -80 and then truncates to 0

    threshs = [100, 240, 80, 50]

    flat = RGBW.reshape(-1, RGBW.shape[-1])
    size = RGBW_shift.shape

    for l in range(size[3]):
        channel = flat[:,l]

        print(np.max(channel))

        thresh = threshs[l]

        hist_to_eq = channel[np.where(channel>=thresh)]
        N = len(hist_to_eq)
        num_bins = 4096-thresh
        hist, bins = np.histogram(hist_to_eq, num_bins, [thresh, 4096])
        cdf = hist.cumsum()

        cum_prob = cdf[thresh]/N
        #new_val = np.round(cum_prob*(num_bins-1))+thresh

        print(cum_prob)

2021-12-03-w00-NP1
3531
0.994860010471584
3791
0.6993464052287581
1887
0.9946990906220624
2848
0.4375767033894412
2022-04-26-w00-NP1
4095
0.9916473423874133
4095
0.5761985902771958
1368
0.9935076311837717
3560
0.476053758427822
2022-01-22-w04-NP1
3155
0.9934320544988497
4095
0.6562084336003736
1819
0.9853097374221089
2450
0.4602412361692056
2022-03-05-w00-NP1
1929
0.9966605721902388
4095
0.7123175022580515
838
0.992916498649792
1590
0.4565408034984361
2022-02-22-w04-NP1
1884
0.9956984428213621
4095
0.65786765641079
1139
0.9829455175886767
1645
0.4563855712633436
2022-04-01-w00-NP1
2381
0.994458652311682
4087
0.6616579133118786
835
0.995261775209818
1887
0.44011839845597184
2022-02-11-w03-NP1
3887
0.9957184002952519
3898
0.7349627435137804
1258
0.985576952150209
2802
0.473835875029989


In [37]:
%matplotlib qt
imfile = sio.loadmat('data/NP_FOCO_cropped/2022-02-12-w00-NP1/neuropal_1_MMStack_Pos0.ome.mat')
data = imfile['data']
channels = [0,2,4,1]
RGBW = np.squeeze(data[:,:,:, channels])

generate_histograms(RGBW, 16)