In [None]:
import os, random, sys, time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re
import dask
from dask.diagnostics import ProgressBar
import caiman as cm
import h5py
# from skimage.external import tifffile as tff
from sklearn.decomposition import PCA
import tifffile as tff
import joblib
import plotly.graph_objects as go

codeDir = r'V:/code/python/code'
sys.path.append(codeDir)
import apCode.FileTools as ft
import apCode.volTools as volt
from apCode.machineLearning import ml as mlearn
import apCode.behavior.FreeSwimBehavior as fsb
import apCode.behavior.headFixed as hf
import apCode.SignalProcessingTools as spt
import apCode.geom as geom
import seaborn as sns
import importlib
from apCode import util as util
from apCode import hdf
from apCode.imageAnalysis.spim import regress
from apCode.behavior import gmm as my_gmm
from apCode.machineLearning.preprocessing import Scaler

plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['ps.fonttype'] = 42


try:
    if __IPYTHON__:
        get_ipython().magic('load_ext autoreload')
        get_ipython().magic('autoreload 2')
except NameError:
    pass

# Setting seed for reproducability
seed = 143
random.seed = seed

print(time.ctime())

In [None]:
#%% Path to excel sheet storing paths to data and other relevant info
dir_xls = r'Y:\Avinash\Projects\RS recruitment\GCaMP imaging'
file_xls = 'GCaMP volumetric imaging summary.xlsx'


In [None]:
#%% Read xl file
idx_fish = 8
xls = pd.read_excel(os.path.join(dir_xls, file_xls), sheet_name='Sheet1')
path_now = np.array(xls.loc[xls.FishIdx == idx_fish].Path)[0]
print(path_now)

# Continue from the saved dataframe

In [None]:
%%time

startFresh = True # Reads hFile and df

if (startFresh) & (('hFilePath' in locals()) | ('df' in locals())):
    del hFilePath,  df

#%% If stored dataframe exists in path read it
if 'hFilePath' not in locals():
    hFileName = ft.findAndSortFilesInDir(path_now, ext = 'h5', search_str = 'procData')[-1]
    hFilePath = os.path.join(path_now, hFileName)
with h5py.File(hFilePath, mode = 'r') as hFile:
    print(hFile.keys())

if 'df' not in locals():
    file_df = ft.findAndSortFilesInDir(path_now, ext = 'pickle', search_str = 'dataFrame')
    if len(file_df)>0:
        file_df = file_df[-1]
        path_df = os.path.join(path_now, file_df)
        print(path_df)
        print('Reading dataframe...')
        %time df = pd.read_pickle(path_df)       
    else:
        print('No dataframe found in path!')
    

In [None]:
#%% Extract useful info
ta_trl = np.array([np.array(_) for _ in df.tailAngles])
ta = np.concatenate(ta_trl,axis = 1)

### *Can I get a clearer and crisper image volume than the offset map from registration to draw ROIs on?*

In [None]:
%%time
with h5py.File(hFilePath, mode='r') as hFile:
    ca = hFile['ca_trls_reg'][()]

nTrls = ca.shape[0]
trlLen = ca.shape[1]
volDims = ca.shape[-3:]  
ca = ca.reshape(-1, *volDims)
ca = ca[:, 1:]
ca_avg = ca.mean(axis=0)
ca_avg = ca_avg - ca_avg.min() + 1

In [None]:
#%% Save average stack for future reference
tff.imsave(os.path.join(path_now, 'averageCaImgVol.tif'), data=ca_avg.astype('int'))

## Make simple Ca$^{2+}$ response maps to distinguish head and tail-elicited responses

## Use the group data trained GMM model to predict posture labels

In [None]:
#%% Load the trained model
dir_group = r'Y:\Avinash\Projects\RS recruitment\GCaMP imaging\Group'
file_model = 'gmm_svd-3_env_pca-9_gmm-20_20200129-18.pkl' 
gmm_model = joblib.load(os.path.join(dir_group, file_model))


In [None]:
#%% Predict labels using the loaded and pre-trained GMM model

subSample = 1
alpha = 0.5
cmap = plt.cm.tab20


%matplotlib inline

%time labels, features =  gmm_model.predict(ta)

%time x_pca = PCA(n_components = 3, random_state = 143).fit_transform(features)

fh,ax = plt.subplots(2,2,figsize = (15,10))
ax = ax.flatten()


# clrs = cmap(spt.standardize(labels))
scaler_clrs = Scaler(standardize =True).fit(np.arange(gmm_model.n_gmm_))
clrs = cmap(scaler_clrs.transform(labels))

ax[0].scatter(x_pca[:,0], x_pca[:,1], s = 10, c = clrs, alpha = alpha)
ax[0].set_xlabel('pca 1')
ax[0].set_ylabel('pca 2')

ax[1].scatter(x_pca[:,0], x_pca[:,2], s = 10, c = clrs, alpha = alpha)
ax[1].set_xlabel('pca 1')
ax[1].set_ylabel('pca 3')

ax[2].scatter(x_pca[:,1], x_pca[:,2], s = 10, c = clrs, alpha = alpha)
ax[2].set_xlabel('pca 2')
ax[2].set_ylabel('pca 3')
fh.tight_layout()

x = np.unique(labels)
y = np.ones_like(x)
plt.figure(figsize = (20,5))
plt.scatter(x,y, c = cmap(scaler_clrs.transform(x)),s = 4000, marker = 's')
plt.yticks([])
plt.xticks(x, fontsize = 20);
plt.title('Norm-ordered colors', fontsize = 20);


In [None]:
#%% Check ouf a few trials with predictions from the GMM

# iTrl = 6  # (Struggles = {9, 11}
iTrl = np.random.choice(np.arange(df.shape[0]),size = 1)[0]
yShift = 1.1
loop = False
xl = (-0.1, 1)
# xl = 'auto'
figDir = os.path.join(path_now, 'figs/behav_trls_colored_by_gmm_label')
onOffThr = 0
figExts = ('png','pdf')
cmap = plt.cm.tab20
dt_behav = 1/500


if not os.path.exists(figDir):
    os.mkdir(figDir)

from IPython import display
if loop:
    trls = np.unique(df.trlNum)
else:
    trls = [iTrl]

maxEnv_full = spt.emd.envelopesAndImf(ta[-1])['env']['max']    
for iTrl in trls:
    ta_now = ta_trl[iTrl]
    trlLen = ta_now.shape[-1]
    inds = np.arange(iTrl*trlLen, (iTrl+1)*trlLen)
    maxEnv = maxEnv_full[inds]
    posInds = np.linspace(0, len(ta_now)-1,4).astype(int)
    z = np.diff(ta_now[posInds],axis = 0)
    ys = util.yOffMat(z)*yShift
    z = z - ys
    y = ta_now[-1]
    x = (np.arange(len(y))*dt_behav) - 1   
    lbls_now, x_now = gmm_model.predict(ta_now)
    lbls_norm = scaler_clrs.transform(lbls_now)
    clrs = cmap(lbls_norm)
    zerInds = np.where(maxEnv <=onOffThr)[0]
    clrs[zerInds,:] = 1 # Make this invisible
    # clrs[:,-1] = 0.8 # Alpha value of points
    fh = plt.figure(figsize = (20,8));    
    # plt.scatter(x,y,c = clrs,s= 15)
    for iLine, z_ in enumerate(z):
        plt.plot(x,z_, c = 'k', alpha = 0.2, label = f'loc = {iLine}')
        plt.scatter(x,z_, c = clrs, s = 15)
        plt.legend(loc = 'upper right', fontsize= 15)
    plt.xlabel('Time (s)', fontsize = 20)
    plt.ylabel('Total tail bending for segment ($^o$)', fontsize = 20)
    maxInd = []
    if np.any(xl == 'auto'):
        nonZerInds = np.setdiff1d(np.arange(len(y)),zerInds)        
        if len(nonZerInds)>0:
            maxInd = np.max(nonZerInds)
        else:
            maxInd = len(x)-31
        plt.xlim(-0.1,x[maxInd])
    else:
        plt.xlim(xl)
    yl = np.min((-100, z.min())), np.max((80, z.max()))
    plt.ylim(yl)
    plt.yticks([0,-100])
    plt.title(f'Total tail curvature with points colored by cluster label (trl = {iTrl})', fontsize = 20)
    plt.show()
    for fe in figExts:
        fn = f'Fig-{util.timestamp()}_Total tail curvature timeseries with colored clustered points_trl-{iTrl}.{fe}'
#         fh.savefig(os.path.join(figDir, fn), format = f'{fe}', dpi = 'figure')
    display.clear_output(wait = True)
    time.sleep(0.05)



In [None]:
#%% Check ouf a few trials with predictions from the GMM with annotated markers

# iTrl = 10  # (Struggles = {9, 11}
iTrl = np.random.choice(np.arange(df.shape[0]),size = 1)[0]
yShift = 1.1
loop = False
xl = (-0.1, 2)
# xl = 'auto'
figDir = os.path.join(path_now, 'figs/behav_trls_colored_by_gmm_label')
onOffThr = 0
figExts = ('png','pdf')
cmap = plt.cm.tab20
dt_behav = 1/500
figSize = (100,10)

%matplotlib inline
# %matplotlib notebook

if not os.path.exists(figDir):
    os.mkdir(figDir)

stimLoc = np.array(df.stimLoc)

trls = [iTrl]    
maxEnv_full = spt.emd.envelopesAndImf(ta[-1])['env']['max']    
for iTrl in trls:   
    ta_now = ta_trl[iTrl]
    fh = my_gmm._plot_with_labels(gmm_model, ta_now, marker_size=200, figSize=figSize)
    plt.xlabel('Frame #', fontsize = 20)
    plt.ylabel('Total tail bend amplitude ($^o$)', fontsize = 20)
    maxInd = []    
    yl = (-100, 100)
    plt.xlim(0, ta_now.shape[1])
    plt.ylim(yl)
    plt.yticks([0,-100])
    plt.title(f'Total tail curvature with points colored by cluster label (trl = {iTrl}, stim = {stimLoc[iTrl].upper()})', fontsize = 20)
    for fe in figExts:
        fn = f'Fig-{util.timestamp()}_Total tail curvature timeseries with colored clustered points_trl-{iTrl}.{fe}'
#         fh.savefig(os.path.join(figDir, fn), format = f'{fe}', dpi = 'figure')
    



In [None]:
#%% Check ouf a few trials with predictions from the GMM with annotated markers

# iTrl = 10  # (Struggles = {9, 11}
# iTrl = np.random.choice(np.arange(df.shape[0]),size = 1)[0]
yShift = 1.1
loop = False
xl = (-0.1, 2)
# xl = 'auto'
figDir = os.path.join(path_now, 'figs/behav_trls_colored_by_gmm_label')
onOffThr = 0
figExts = ('png','pdf')
cmap = plt.cm.tab20
dt_behav = 1/500
pre_behav = 500
figSize = (100,10)



if not os.path.exists(figDir):
    os.mkdir(figDir)

    
scaler_clrs = Scaler(standardize =True).fit(np.arange(gmm_model.n_gmm_))   
stimLoc = np.array(df.stimLoc)

trls = np.arange(ta_trl.shape[0])    
maxEnv_full = spt.emd.envelopesAndImf(ta[-1])['env']['max']
# trls = [1]
for iTrl in trls:
    ta_now = ta_trl[iTrl]
    x = (np.arange(ta_now.shape[1])-500)*dt_behav
    y = ta_now[-1]
    lbls, _ = gmm_model.predict(ta_now)
    line = go.Scatter(x=x, y = y, mode = 'lines', opacity = 0.2, marker = dict(color = 'black'), name = 'ta')
    scatters = []
    scatters.append(line)
    for iLbl, lbl in enumerate(np.unique(lbls)):
        clr = f'rgba{cmap(lbl)}'
        inds= np.where(lbls==lbl)[0]
        scatter = go.Scatter(x=x[inds], y=y[inds], mode='markers', marker = dict(color = clr, symbol = lbl, size = 10), name = f'Lbl-{lbl}')
        scatters.append(scatter)
    fig = go.Figure(scatters)
    fig.update_layout(title = f'Tail angles with GMM labels, trl = {iTrl}, stim = {(stimLoc[iTrl]).upper()}')
#     fig.show()
    figName = f'Fig-{util.timestamp()}_trl-{iTrl}.html'
    fig.write_html(os.path.join(figDir,figName))


In [None]:
#%% Plot GMM labels --> Markers map
x = np.arange(gmm_model.n_gmm_)
y = np.ones_like(x)
# data = pd.DataFrame(dict(gmm_label = x, y = y))
# px.scatter(data, x= 'gmm_label', y = y, symbol_sequence=x)
scatters = []
for ind, x_ in enumerate(x):
    clr = f'rgba{cmap(x_)}'
    scatter = go.Scatter(x= [x_], y = [y[ind]], mode = 'markers',
                         marker = dict(symbol = x_, size = 20, color = clr), name = f'Lbl-{x_}')
    scatters.append(scatter)
fig = go.Figure(scatters)
fig.update_xaxes(tickvals = x)
fig.update_yaxes(tickvals = [])
fig.update_layout(title='Symbol map: GMM labels to markers and colors', xaxis_title="GMM label")
fig.show()
figName = f'Fig-{util.timestamp()}_scatterPlotMarkerGmmLabelLegend.html'
fig.write_html(os.path.join(figDir,figName), auto_open=False)

In [None]:
def superSample(t, y, tt):
    """Super sample a signal using interpolation"""
    import numpy as np
    from scipy.interpolate import interp1d
    t = np.concatenate((tt[0].reshape((-1,)), t, tt[-1].reshape((-1,))))
    y = np.concatenate((np.array(0).reshape((-1,)), y, np.array(0).reshape((-1,))))
    f = interp1d(t,y,kind = 'slinear')
    return f(tt)

def padIr(ir_trl, pad_pre, pad_post):
    """
    Pads the impulse response timeseries obtained from 
    predictions on behavioral feature matrix to match
    time length with ca responses
    """
    ir_ser = []
    for c in ir_trl:
        ir_ser.append(np.pad(c,((0,0),(pad_pre, pad_post))).flatten())
    return np.array(ir_ser)

def serializeHyperstack(vol):
    """
    Given, a hyperstack, returns a 2D array with pixels serialized for regression, etc.
    Parameters
    ----------
    vol: array, (nTimePoints, nSlices, nRows, nCols)
    Returns
    -------
    vol_ser: array, (nTimePoints,nPixels)
    """
    vol_trans = np.transpose(vol,(2,3,1,0))
    vol_ser = vol_trans.reshape(-1, vol_trans.shape[-1])
    vol_ser = np.swapaxes(vol_ser,0,1)
    return vol_ser

def deserializeToHyperstack(arr, volDims):
    """
    Given an array which 
    """
    volDims = (np.array(volDims))[[1,2,0]]
    vol = arr.reshape(arr.shape[0],*volDims)
    vol = np.transpose(vol,(0,3,1,2))
    return vol

def pxlsToVol(pxls, volDims):
    """
    Given an array which 
    """
    volDims = (np.array(volDims))[[1,2,0]]
    vol = pxls.reshape(*volDims)
    vol = np.transpose(vol,(2,0,1))
    return vol

def superSample_arr(t, arr, tt, n_jobs = 32):
    """
    Parameters
    ----------
    arr: array, (nSignals, nTimePoints)
    """    
    from joblib import Parallel, delayed
    n_jobs = np.min((32, os.cpu_count()))
#     from dask import delayed, compute
#     arr_sup = compute(*[delayed(superSample)(t,y,tt) for y in arr], scheduler = 'processes')
    arr_sup = Parallel(n_jobs=n_jobs,verbose=1)(delayed(superSample)(t, y, tt) for y in arr)
    return np.array(arr_sup)

def betasToVol(betas, volDims):
    if np.ndim(betas)<2:
        betas = betas[:,np.newaxis]  
    nReg = betas.shape[1]
    B = betas.T.reshape(nReg, *volDims)
    return np.squeeze(B)

def head_tail_impulse_trains(df, n_pre=500, thr=5, modulate_by_amp=True):
    """ Return trialized impulse trains for regression along with the names
    of the regressors. Not GMM-label based, but rather head and tail stim-based.
    Parameters
    ----------
    df: pandas dataframe
        Must have the columns, 'tailAngles', 'stimLoc', 'sessionIdx'
    n_pre: int
        Number of pre-stimulus time points in each trial
    thr: scalar
        Tail angles threshold for determining period of motor activity
    modulate_by_amp: bool
        If True, head and tail regressors are modulated by amplitude
    """
    ta_trl = np.array([np.array(_) for _ in df.tailAngles])
    ta_trl = np.squeeze(ta_trl[:, -1, :])
#     ta = np.concatenate(ta_trl, axis=1)
    stimLoc = np.array(df.stimLoc)
    sessionIdx = np.array(df.sessionIdx)
    names_ir = ['head_stim', 'tail_stim', 'head_motor', 'tail_motor', 'session_idx', 'trl_num']
    ir_trl = []
    for iTrl, trl in enumerate(ta_trl):
        trl = trl-trl[0]
        ir_now = np.zeros((6, len(trl)))
        stim_now = stimLoc[iTrl]
        session_now = sessionIdx[iTrl]
        inds_supra = np.where(np.abs(trl)>=thr)[0]
        if stim_now.lower() == 'h':
            ir_now[0, n_pre-1] = 1
            if modulate_by_amp:
                ir_now[2, inds_supra] = np.abs(trl[inds_supra])
            else:
                ir_now[2, inds_supra] = 1            
        else:
            ir_now[1, n_pre-1] = 1
            if modulate_by_amp:
                ir_now[3, inds_supra] = np.abs(trl[inds_supra])
            else:
                ir_now[3, inds_supra] = 1
        ir_now[4] = session_now
        ir_now[5] = iTrl
        ir_trl.append(ir_now)        
    return np.array(ir_trl), names_ir   


In [None]:
%%time
#%% Predict labels on full time series, match lengths of behavior and ca trials, and make full set of impulse 
### trains and other regressors
n_pre_behav = 500
thr_amp = 5
modulate_by_amp = True


%time ir_trl, names_ir = head_tail_impulse_trains(df, n_pre=n_pre_behav, thr=thr_amp,\
                                                  modulate_by_amp=modulate_by_amp)
ir_ser = np.concatenate(ir_trl, axis=1)
ir_ser_norm = spt.standardize(ir_ser, axis=1)


In [None]:
#%% Display impulse trains & other regressors
Fs_behav = 500 # Sampling frequency of behavor

getStimName = lambda s: 'Head' if s == 'h' else 'Tail'
t_full = np.arange(ir_ser_norm.shape[-1])*(1/Fs_behav)
yOff = util.yOffMat(ir_ser_norm)
plt.figure(figsize = (16,8))
plt.plot(t_full, (ir_ser_norm-yOff).T);
regNames = names_ir.copy()

yt = -np.arange(ir_ser_norm.shape[0])
plt.yticks(yt, regNames)
plt.xlabel('Time (s)')
plt.title('Impulse responses & other regressors');

In [None]:
%%time
#%% CIRF in slightly subSampled behavAndScan time, followed by convolution to generate regressors
tLen = 6 # Length of kernel
tau_rise = 0.2 # Rise constant
tau_decay = 1 # Decay constant
tPeriStim_ca = (-1, 10)
tPeriStim_behav = (-1, 6)

dt_behav = 1/Fs_behav

my_conv = lambda y, cirf, mode: np.convolve(y, cirf, mode = mode)[:len(y)]

### CIRF
t_cirf = np.arange(0,tLen,dt_behav)
cirf = spt.generateEPSP(t_cirf,tau_rise, tau_decay,1,0)

ind_conv_upto = util.findStrInList('session_idx', regNames)[0]

print('Convolving with CIRF...')
regressors = []
for iTrl, trl in enumerate(ir_trl):
    reg_trl = []
    for reg_ in trl[:ind_conv_upto]:
        reg_trl.append(dask.delayed(my_conv)(reg_, cirf, 'full'))
    regressors.append(reg_trl)
regressors = dask.compute(*regressors)
regressors = np.concatenate(regressors,axis = 1)   
regressors = np.r_[regressors, ir_ser[ind_conv_upto:]]
regressors = spt.zscore(regressors,axis=1)

print('Reading Ca2+ trials from dataframe')
%time ca_trl = np.array([np.array(_) for _ in np.array(df['ca'])])

t_ca = np.linspace(*tPeriStim_ca, ca_trl.shape[1])
t_behav = np.linspace(*tPeriStim_behav, ir_trl.shape[-1])
ind_max = np.where(t_ca>=t_behav[-1])[0][0]
ca_trl_sub = ca_trl[:,:ind_max,...]

### Re-assign to dataframe
df = df.assign(ca = list(ca_trl_sub))
df.to_pickle(os.path.join(path_now, file_df))

t_behav = np.linspace(0, 1, regressors.shape[1])
t_ca = np.linspace(0, 1, ca_trl_sub.shape[0]*ca_trl_sub.shape[1])

print('Super sampling...')
%time regressors = superSample_arr(t_behav, regressors, t_ca)

if 'hFilePath' not in locals():
    hFileName = ft.findAndSortFilesInDir(path_now, ext = 'h5', search_str='procData')[-1]
    hFilePath = os.path.join(path_now, hFileName)

print('Saving variables to HDF file')    
with h5py.File(hFilePath, mode = 'r+') as hFile:
    if 'regression' in hFile:
        del hFile['regression']
    grp = hFile.create_group('regression')   
    grp.create_dataset('regressors', data = regressors.T)
    grp.create_dataset('regressor_names', data = util.to_ascii(regNames))
    grp.create_dataset('impulse_trains', data = ir_ser)


In [None]:
# #%% Serialize Ca2+ volumes into pixel timeseries for regression
# print('Serializing Ca2+ data for regression')
# %time ca_ser = serializeHyperstack(np.concatenate(ca_trl_sub,axis = 0))


In [None]:
#%% Plot all regressors
nTrls = ca_trl_sub.shape[0]
nTimePts = ca_trl_sub.shape[0]*ca_trl_sub.shape[1]
t_ca = np.linspace(0, (tPeriStim_behav[-1]-tPeriStim_behav[0])*nTrls, nTimePts)
yOff = util.yOffMat(regressors)
plt.figure(figsize = (20,10))
plt.plot(t_ca,(regressors-yOff).T)
plt.xlim(t_ca.min(), t_ca.max())
plt.yticks(-yOff, names_ir)
plt.xlabel('Time (s)')
plt.title('Regressors');

In [None]:
#%% Read saved dataframe if continuing from here
file_df = ft.findAndSortFilesInDir(path_now, ext = 'pickle', search_str='dataFrame')[-1]
%time df = pd.read_pickle(os.path.join(path_now,file_df))
%time ca_trl = np.array([np.array(_) for _ in np.array(df['ca'])])

In [None]:
%%time
#%% Denoise and filter images before regression
filtSize = 1
n_comps = 50


def den_flt(slc, n_comps = 50, filtSize = 1):
    if n_comps is None:
        slc_den = slc
    else:
        slc_den = volt.denoise_ipca(slc, components=n_comps)
    slc_flt = volt.img.gaussFilt(slc_den, sigma=filtSize)
    return slc_flt


images_reg_ser = ca_trl.reshape(-1, *ca_trl.shape[2:])
images_reg_ipca_flt = []
for iSlc, slc in enumerate(np.swapaxes(images_reg_ser,0,1)):
    print(f'{iSlc + 1}/{images_reg_ser.shape[1]}')
    slc_flt = den_flt(slc, n_comps=n_comps, filtSize=filtSize)
    images_reg_ipca_flt.append(slc_flt)

images_reg_ipca_flt = np.swapaxes(np.array(images_reg_ipca_flt),0,1)

if 'hFilePath' not in locals():
    hFileName = ft.findAndSortFilesInDir(path_now, ext = 'h5', search_str='procData')[-1]
    hFilePath = os.path.join(path_now, hFileName)
    
with h5py.File(hFilePath, mode = 'r+') as hFile:
    keyName = f'images_reg_ipca_flt_sigma-{int(filtSize*100)}'
    if keyName in hFile:
        del hFile[keyName]
    %time hFile.create_dataset(keyName, data = images_reg_ipca_flt)

In [None]:
%%time
#%% Read relevant variables for regression
filtSize = 1

hFileName = ft.findAndSortFilesInDir(path_now, ext = 'h5', search_str='procData')[-1]
hFilePath = os.path.join(path_now, hFileName)
with h5py.File(hFilePath, mode = 'r') as hFile:
    print(hFile.keys())
    images = np.array(hFile[f'images_reg_ipca_flt_sigma-{int(filtSize*100)}'])
    X_reg = np.array(hFile['regression/regressors'])
    regNames = util.to_utf(np.array(hFile['regression/regressor_names']))
    if 'images_reg_ipca_flt' in locals():
        del images_reg_ipca_flt

### Correct for last-point edge artifact        
foo = []
for x in X_reg.T:
    x[-1] = x[-2]
    foo.append(x)
X_reg= np.array(foo).T


## Regression

In [None]:
#%% Regress
def betasToVol(betas, volDims):
    if np.ndim(betas)<2:
        betas = betas[:,np.newaxis]  
    nReg = betas.shape[1]
    B = betas.T.reshape(nReg, *volDims)
    return np.squeeze(B)

print('Serializing pixels for regression...')
ca_ser = images.reshape(images.shape[0],-1)
%time regObj = regress(X_reg, ca_ser, n_jobs=-1, fit_intercept=True)

betas_vol = betasToVol(regObj.coef_, images.shape[-3:])
intercept_vol = betasToVol(regObj.intercept_, images.shape[-3:])
t_vol = betasToVol(regObj.T_, images.shape[-3:])
r_vol = betasToVol(regObj.Rsq_,images.shape[-3:])
mse_vol = betasToVol(regObj.mse_, images.shape[-3:])
se_vol = betasToVol(regObj.se_, images.shape[-3:])


In [None]:
iReg = 3
q_max = 99
q_min = 5

plt.figure(figsize = (20,15))

beta_ = betas_vol[iReg][1:]
plt.subplot(311)
plt.imshow(spt.stats.saturateByPerc(beta_.max(axis= 0), perc_up = q_max, perc_low = q_min))
plt.title(regNames[iReg], fontsize = 20)
plt.ylabel('Beta', fontsize = 20)
plt.colorbar()

se_ = se_vol[iReg][1:]
plt.subplot(312)
plt.imshow(spt.stats.saturateByPerc(se_.max(axis= 0), perc_up = q_max, perc_low = q_min))
plt.title(regNames[iReg], fontsize = 20)
plt.colorbar()
plt.ylabel('SE', fontsize = 20)


t_ = t_vol[iReg][1:]
beta_bool = volt.img.otsu(beta_, binary=True)
# t_ = t_*beta_bool
plt.subplot(313)
# plt.imshow(spt.stats.saturateByPerc(t_vol[iReg+1][1:].max(axis= 0), perc_up = q_max, perc_low = q_min))
plt.imshow(spt.stats.saturateByPerc(t_[1:].max(axis= 0), perc_up = q_max))
plt.colorbar()
plt.ylabel('T-value', fontsize = 20)


In [None]:
%%time
#%% Save regression images
figDir = os.path.join(path_now, f'figs/regression_ipca_flt_sigma-{int(filtSize*100)}_{util.timestamp("min")}')
t_mult = 1 # Multiply t-values by this value before converting to integer type because of low bit-depth otherwise

if not os.path.exists(figDir):
    os.mkdir(figDir)

### First save coefficients
foo = betas_vol.astype(int)
dir_now = os.path.join(figDir, 'betas')
if not os.path.exists(dir_now):
    os.mkdir(dir_now)
for iReg, vol in enumerate(foo):
    tff.imsave(os.path.join(dir_now,f'Fig-{util.timestamp()}_regressor-{regNames[iReg]}_coef.tif'),vol[1:])
tff.imsave(os.path.join(dir_now,f'Fig-{util.timestamp()}_regressor_intercept_coef.tif'),intercept_vol[1:].astype('int'))
    
foo = ((t_vol*t_mult).astype(int))[1:]
dir_now = os.path.join(figDir, 'tValues')
if not os.path.exists(dir_now):
    os.mkdir(dir_now)
for iReg, vol in enumerate(foo):
    tff.imsave(os.path.join(dir_now,f'Fig-{util.timestamp()}_regressor-{regNames[iReg]}_tVals.tif'),vol[1:])
tff.imsave(os.path.join(dir_now,f'Fig-{util.timestamp()}_regressor_intercept_T.tif'),foo[0])

print(f'Saved at {dir_now}')

## ROI analysis

In [None]:
#%% Read ROIs
# dir_rois= r'Y:\Avinash\Head-fixed tail free\GCaMP imaging\2020-01-11\f1\figs\regression_ipca_flt_sigma-100_20200317-0507\betas\RoiSet.zip'

dir_rois = os.path.join(path_now, 'RoiSet2.zip')

In [None]:
filtSize = 1

hFileName = ft.findAndSortFilesInDir(path_now, ext = 'h5', search_str='procData')[-1]
hFilePath = os.path.join(path_now, hFileName)

if 'images' not in locals():
    with h5py.File(hFilePath, mode = 'r') as hFile:
        images = np.array(hFile[f'images_reg_ipca_flt_sigma-{int(filtSize*100)}'])

if 'df' not in locals():
    file_df = ft.findAndSortFilesInDir(path_now, ext = 'pickle', search_str='dataFrame')[-1]
    %time df = pd.read_pickle(os.path.join(path_now, file_df))


In [None]:
#%% Some functions and reading of ROIs

def strip_suffices(strList):
    strList_new = []
    for _ in strList:
        a, b, c = _.split('.')
        strList_new.append(a + '.' + b)
    return np.array(strList_new)

def consolidate_rois(rois, volDims):
    roiNames_orig = list(rois.keys())
    roiNames = strip_suffices(roiNames_orig)
    roiNames_unique = np.unique(roiNames)
    masks = []
    for rn in roiNames_unique:
        inds = util.findStrInList(rn, roiNames)
        mask = np.zeros(volDims)
        for ind in inds:
            roi_ = rois[roiNames_orig[ind]]
            z = roi_['position']
            mask[z] = roi_['mask']
        masks.append(mask)
    return np.array(masks), roiNames_unique


imgDims = images.shape[-2:]
volDims = images.shape[-3:]

_, rois = mlearn.readImageJRois(dir_rois, imgDims, multiLevel=False)
masks, roiNames = consolidate_rois(rois, volDims)


In [None]:
%%time 
roi_ts = []
for iMask, mask in enumerate(masks):
    print(f'{iMask+1}/{masks.shape[0]}')
    ts = np.apply_over_axes(np.mean, images*mask, [1, 2, 3]).flatten()
    roi_ts.append(ts)
roi_ts = np.array(roi_ts)

In [None]:
mu_head.shape, mu_tail.shape,

In [None]:
nTrls = df.shape[0]
roi_ts_trls = roi_ts.reshape(roi_ts.shape[0], nTrls, -1)
roi_ts_trls -= roi_ts_trls[...,0][...,None]
stimLoc = np.array(df.stimLoc)
trls_head = np.where(stimLoc=='h')[0]
trls_tail = np.where(stimLoc=='t')[0]

roi_ts_head = roi_ts_trls[:, trls_head]
roi_ts_tail = roi_ts_trls[:, trls_tail]

mu_head = roi_ts_head.mean(axis=1)
sem_head = roi_ts_head.std(axis=1)/np.sqrt(mu_head.shape[0])
mu_tail = roi_ts_tail.mean(axis=1)
sem_tail = roi_ts_tail.std(axis=1)/np.sqrt(mu_tail.shape[0])

# plt.figure(figsize=(20, 20*nRows/nCols))
nCols = 3
nRows = int(np.ceil(len(roiNames)/nCols))
fh, ax = plt.subplots(nrows=nRows, ncols=nCols, sharex=True, figsize=(20, 20*nRows/nCols))
ax = ax.flatten()
fh.tight_layout()

t = np.arange(mu_head.shape[1])*(1/2)
for iRoi, roi_ in enumerate(mu_head):
#     ax[iRoi].plot(mu_head[iRoi], c=plt.cm.tab10(0), label='Head')
    ax[iRoi].fill_between(t, mu_head[iRoi]-sem_head[iRoi], mu_head[iRoi]+sem_head[iRoi],
                          color=plt.cm.tab10(0), alpha=0.5, label='Head')
    ax[iRoi].fill_between(t, mu_tail[iRoi]-sem_tail[iRoi], mu_tail[iRoi]+sem_tail[iRoi],
                          color=plt.cm.tab10(1), alpha=0.5, label='Tail')
#     ax[iRoi].plot(t,mu_tail[iRoi], c=plt.cm.tab10(1), label='Tail')
    ax[iRoi].set_yticks([])
    ax[iRoi].set_title(r'${}$'.format(roiNames[iRoi]), fontsize=20)
    if iRoi==0:
        ax[iRoi].legend(loc='upper left', fontsize=20)
fh.suptitle('Average Ca$^{2+}$ response for escape trials_Head vs tail stimulation\n R = ipi, L = contra', \
           fontsize=24);
fh.subplots_adjust(top=0.955, hspace=0.12)

dir_figs = os.path.join(path_now, 'figs')
if not os.path.exists(dir_figs):
    os.mkdir(dir_figs)
fn = f'Fig-{util.timestamp("minute")}_Trial-averaged Ca2+ responses_head and tail trials'
fh.savefig(os.path.join(dir_figs, fn + '.pdf'), dpi='figure', format='pdf')
fh.savefig(os.path.join(dir_figs, fn + '.png'), dpi='figure', format='png')
print(f'Saved at \n{dir_figs}')

In [None]:
nTrls = ca_trl.shape[0]
roi_ts_trls = roi_ts.reshape(roi_ts.shape[0], nTrls, -1)
roi_ts_trls -= roi_ts_trls[...,0][...,None]
trls_head = np.where(stimLoc=='h')[0]
trls_tail = np.where(stimLoc=='t')[0]

roi_ts_head = roi_ts_trls[:, trls_head]
roi_ts_tail = roi_ts_trls[:, trls_tail]

plt.figure(figsize=(10, 10))
mu_head = roi_ts_head.mean(axis=1)
sigma_head = roi_ts_head.std(axis=1)
mu_tail = roi_ts_tail.mean(axis=1)
sigma_tail = roi_ts_tail.std(axis=1)


yOff = 2*np.max(mu_head)*np.arange(roi_ts_trls.shape[0])[:, None]
# yOff = util.yOffMat(mu_head)*2

plt.subplot(121)
plt.plot((mu_head-yOff).T);
plt.plot((mu_head+sigma_head-yOff).T, c='k', alpha=0.25);


plt.subplot(122)
plt.plot((mu_tail-yOff).T);
plt.plot((mu_tail+sigma_tail-yOff).T, c='k', alpha=0.25);



In [None]:
#%% NMF in ROI-masked areas
masks_zProj = masks==1
masks_zPproj = masks.max(axis=1).max(axis=0)
images_zProj = images.mean(axis=1)
images_zProj_mask = masks_zPproj[None, ...]*images_zProj
mov = cm.movie(images_zProj_mask)
mov -= mov.min()

In [None]:
%time nmf_space, nmf_time = mov.NonnegativeMatrixFactorization()

In [None]:
iComp = 10
plt.figure(figsize=(10, 10))
plt.subplot(211)
plt.imshow(spt.standardize(nmf_space[iComp]), vmax=0.5)
plt.subplot(212)
plt.plot(nmf_time.T[iComp])



## *Try CNMF*

In [None]:
import bokeh.plotting as bpl
import caiman as cm
from caiman.motion_correction import MotionCorrect
from caiman.source_extraction.cnmf import cnmf as cnmf
from caiman.source_extraction.cnmf import params as params
from caiman.utils.utils import download_demo
from caiman.utils.visualization import plot_contours, nb_view_patches, nb_plot_contour
bpl.output_notebook()

# *CNMF* 

## *If data is small enough use a single patch approach*

In [None]:
images_now = images[:,1:]
images_ser = images_now.reshape(images_now.shape[0], -1)
nTrls = df.shape[0]
trlLen = images_now.shape[0]/nTrls

# %time regObj = regress(X_reg[:,-2:], images_ser, n_jobs=-1, fit_intercept=True)


In [None]:
x = np.arange(X_reg.shape[0])


In [None]:
images_now = images[:,1:]
imgs_proj = images_now.mean(axis=1)
mov = cm.movie(imgs_proj, fr=2)
# mov -= mov.min()
df_ca, baseline = mov.computeDFF()
# df = mov.bilateral_blur_2D()
# df = mov.copy()
df_ca = np.array(df_ca)
df_ca -= df_ca.min()
print(df_ca.shape)

In [None]:
#%% Save as memory mapped file
fn_new = cm.save_memmap([df_ca], order='C', base_name='Yr9')


In [None]:
# now load the file
Yr, dims, T = cm.load_memmap(fn_new)
images_now = np.reshape(Yr.T, [T] + list(dims), order='F')
print(images_now.shape)

In [None]:
#%% start a cluster for parallel processing (if a cluster already exists it will be closed and a new session will be opened)
if 'dview' in locals():
    cm.stop_server(dview=dview)
c, dview, n_processes = cm.cluster.setup_cluster(backend='local', n_processes=None,\
                                                 single_thread=False)

### *Inititalize CNMF object*

In [None]:

# import bokeh.plotting as bpl
# bpl.output_notebook()

# set parameters
fr = 2
# K = 20  # number of neurons expected per patch
# gSig = [2, 2]  # expected half size of neurons
merge_thresh = 0.9  # merging threshold, max correlation allowed
p = 2  # order of the autoregressive system

gnb = 2                     # number of global background components
rf = 45                     # half-size of the patches in pixels. e.g., if rf=25, patches are 50x50
stride_cnmf = 10             # amount of overlap between the patches in pixels
K = 4                       # number of components per patch
gSig = [20, 20]               # expected half size of neurons in pixels
method_init = 'greedy_roi'  # initialization method (if analyzing dendritic data using 'sparse_nmf')
ssub = 1                    # spatial subsampling during initialization
tsub = 1                    # temporal subsampling during intialization

# parameters for component evaluation
min_SNR = 2.0               # signal to noise ratio for accepting a component
rval_thr = 1.0              # space correlation threshold for accepting a component

remove_very_bad_comps = False
cnn_thr = 0.99              # threshold for CNN based classifier
cnn_lowest = 0.1 # neurons with cnn probability lower than this value are rejected


In [None]:
# INIT
cnm = cnmf.CNMF(n_processes, fr=fr, k=K, gSig=gSig, merge_thresh=merge_thresh, p=p, rf=rf, dview=dview,\
                min_SNR=min_SNR, rval_thr=rval_thr, remove_very_bad_comps=remove_very_bad_comps)

%time cnm = cnm.fit(images_now)
nComps = cnm.estimates.A.shape[-1]
print(f'{nComps} components')

In [None]:
#%% plot contours of found components
Cn = cm.local_correlations(images_now.transpose(1,2,0))
Cn[np.isnan(Cn)] = 0
cnm.estimates.plot_contours(img=Cn, thr=0.8);


In [None]:
i = 23
plt.figure(figsize=(10, 5)); 
plt.subplot(211)
plt.imshow(np.reshape(cnm.estimates.A[:,i-1].toarray(), dims, order='F'))

nmf_time = cnm.estimates.C
plt.subplot(212)
plt.plot(nmf_time[i])

In [None]:
# %%capture
#%% RE-RUN seeded CNMF on accepted patches to refine and perform deconvolution 
%time cnm2 = cnm.refit(images_now, dview=dview)
print(f'{cnm2.estimates.A.shape[-1]} components')

In [None]:
# the components are evaluated in three ways:
#   a) the shape of each component must be correlated with the data
#   b) a minimum peak SNR is required over the length of a transient
#   c) each shape passes a CNN based classifier

# cnm2.estimates.evaluate_components(images_now, cnm2.params, dview=dview)

In [None]:
cnm2.estimates.plot_contours(img=Cn, idx=cnm2.estimates.idx_components)

In [None]:
# #%% Extract DF/F values
# cnm2 = cnm2.estimates.detrend_df_f(quantileMin=8, frames_window=250)
# dff = cnm2.F_dff

In [None]:
iSlc = 16
slc = images[:,iSlc,...]
plt.imshow(slc.max(axis=0))

In [None]:
#%% Standard NMF
mov -= mov.min()
nmf_space, nmf_time = mov.NonnegativeMatrixFactorization(n_components=30)

In [None]:
iComp = 14
plt.figure(figsize=(10, 5))
plt.subplot(211)
plt.imshow(nmf_space[iComp])

plt.subplot(212)
plt.plot(nmf_time[iComp])

## *3D version*

In [None]:
#%% Rearrange dimensions to put txyz format
images_txyz = np.transpose(images, (0, 2, 3, 1))[...,1:]
images_txyz.shape

In [None]:
#%% Save as memory mapped file
fn_new = cm.save_memmap([images_txyz], order='C', base_name='Yr_3d2', is_3D=True)


In [None]:
# now load the file
Yr, dims, T = cm.load_memmap(fn_new)
Y = np.reshape(Yr.T, [T] + list(dims), order='F')
print(Y.shape)

In [None]:
# Cn = cm.local_correlations(Y)
plt.imshow(Cn.max(0) if len(Cn.shape) == 3 else Cn, cmap='viridis',
           vmin=np.percentile(Cn, 70), vmax=np.percentile(Cn, 99.9))
plt.show()

## *Single patch approach for small data*

In [None]:
# set parameters
K = 20  # number of neurons expected per patch
gSig = [2, 2, 2]  # expected half size of neurons
merge_thresh = 0.8  # merging threshold, max correlation allowed
p = 2  # order of the autoregressive system

In [None]:
# INIT
cnm = cnmf.CNMF(n_processes, k=K, gSig=gSig, merge_thresh=merge_thresh, p=p, dview=dview)

In [None]:
%%time
# %%capture
# FIT
images_now = np.reshape(Yr.T, [T] + list(dims), order='F')    # reshape data in Python format (T x X x Y x Z)
cnm = cnm.fit(images_now)

In [None]:
cnm.estimates.nb_view_components_3d(image_type='mean', dims=dims);

## *Patch approach for larger datasets*

In [None]:
# set parameters
rf = 18  # half-size of the patches in pixels. rf=25, patches are 50x50
stride = 10  # amounpl.it of overlap between the patches in pixels
K = 12  # number of neurons expected per patch
gSig = [8, 8, 2]  # expected half size of neurons
merge_thresh = 0.8  # merging threshold, max correlation allowed
p = 2  # order of the autoregressive system

In [None]:
# %%capture
#%% RUN ALGORITHM ON PATCHES

cnm = cnmf.CNMF(n_processes, k=K, gSig=gSig, merge_thresh=merge_thresh, p=p, dview=dview,
                rf=rf, stride=stride, only_init_patch=True)

%time cnm = cnm.fit(images)
print(('Number of components:' + str(cnm.estimates.A.shape[-1])))

In [None]:
# cnm.estimates.nb_view_components_3d(image_type='mean', dims=dims);

In [None]:
#%% COMPONENT EVALUATION
# the components are evaluated in two ways:
#   a) the shape of each component must be correlated with the data
#   b) a minimum peak SNR is required over the length of a transient

fr = 2 # approx final rate  (after eventual downsampling )
decay_time = 1.  # length of typical transient in seconds 
use_cnn = False  # CNN classifier is designed for 2d (real) data
min_SNR = 3      # accept components with that peak-SNR or higher
rval_thr = 0.7   # accept components with space correlation threshold or higher
cnm.params.change_params(params_dict={'fr': fr,
                                      'decay_time': decay_time,
                                      'min_SNR': min_SNR,
                                      'rval_thr': rval_thr,
                                      'use_cnn': use_cnn});
%time cnm.estimates.evaluate_components(images, cnm.params, dview=dview)

print(('Keeping ' + str(len(cnm.estimates.idx_components)) +
       ' and discarding  ' + str(len(cnm.estimates.idx_components_bad))))

In [None]:
# %%capture
cnm.params.set('temporal', {'p': p})
%time cnm2 = cnm.refit(images_now)

In [None]:

cnm2.estimates.nb_view_components_3d(image_type='corr', dims=dims, Yr=Yr,\
                                     denoised_color='red', max_projection=True);

In [None]:
cnm2.estimates.nb_view_components_3d(image_type='max', dims=dims, Yr=Yr,\
                                     denoised_color='red', max_projection=True);

In [None]:
# cnm.estimates.nb_view_components_3d(image_type='max', dims=dims, Yr=Yr,\
#                                      denoised_color='red', max_projection=True);

In [None]:
m = cnm2.estimates.A.max(1).toarray()
m = m.reshape(*images_now.shape[-3:])
m = m.transpose(2, 0, 1)
m.shape

In [None]:
plt.imshow(m[5])

In [None]:
m.transpose(2, 0, 1)