In [421]:
#%% Import relevant code
import os, sys, time
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import re
import dask
import dask.dataframe as dd
import h5py
import pandas as pd
pd.options.display.precision=2
from skimage.util import montage
import glob
import joblib
from plotly import graph_objects as go

#--- Import my code
codeDir = r'\\dm11\koyamalab\code\python\code'
sys.path.append(codeDir)
import apCode.FileTools as ft
import apCode.volTools as volt
from apCode.machineLearning import ml as mlearn
from apCode.behavior import gmm as GMM
import apCode.SignalProcessingTools as spt
from apCode.behavior import FreeSwimBehavior as fsb
import apCode.hdf as hdf
from apCode import util
from rsNeuronsProj import util as rsp
import apCode.behavior.headFixed as hf

#--- Setting seed for reproducability
seed = 143
np.random.seed = seed

plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['ps.fonttype'] = 42


#--- Auto-reload modules
try:
    if __IPYTHON__:
        get_ipython().magic('load_ext autoreload')
        get_ipython().magic('autoreload 2')
except NameError:
    pass

print(time.ctime())


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Wed May 13 00:12:10 2020


### *Read xls with paths to data*

In [117]:
dir_xls = r'\\Koyama-S2\Data3\Avinash\Projects\RS recruitment\GCaMP imaging'
dir_group = r'\\Koyama-S2\Data3\Avinash\Projects\RS recruitment\GCaMP imaging\Group'
file_xls = 'GCaMP volumetric imaging summary_2020-05-09.xlsx'
saveDir = os.path.join(dir_xls, 'Group')
os.makedirs(saveDir, exist_ok=True)

xls = pd.read_excel(os.path.join(dir_xls, file_xls), sheet_name='Sheet1')
print(f'Dataframe dimensions: {xls.shape}')
xls.head()

Dataframe dimensions: (19, 13)


Unnamed: 0,Date,Genotype,Time,FishIdx,FishIdxInSession,Path,RoiSet,AllCaImgs,fullDF,QualityBehavior,nPreStimFrames,reTrackBehav,Comments
0,20191106,xa316,2300,0,0.0,\\Koyama-S2\Data3\Avinash\Head-fixed tail free...,0,0.0,0.0,6,100,0,"Shorter trial length, consider ignoring. Tail ..."
1,20191107,xa316,2200,1,0.0,\\Koyama-S2\Data3\Avinash\Head-fixed tail free...,0,0.0,0.0,5,100,0,
2,20191207,nefma-alx,2400,2,2.0,\\Koyama-S2\Data3\Avinash\Head-fixed tail free...,0,0.0,0.0,?,500,0,
3,20191211,nefma-alx,1800,3,1.0,\\Koyama-S2\Data3\Avinash\Head-fixed tail free...,0,0.0,0.0,3,500,0,
4,20191211,nefma-alx,1930,4,2.0,\\Koyama-S2\Data3\Avinash\Head-fixed tail free...,0,0.0,0.0,?,500,1,


### *Read tail angles from all the HDF files*

In [90]:
%%time 
changePath = lambda path: path.replace(r"Y:", r"\\Koyama-S2\Data3")
pathList = np.array(list(map(changePath, xls.Path)))

df = dict(fishIdx=[], path=[], trlIdx=[], tailAngles=[], stimLoc=[], sessionIdx=[])
for iPath, path_ in enumerate(pathList):
    hfp = glob.glob(os.path.join(path_, 'procData*.h5'))[-1]
    with h5py.File(hfp, mode='r') as hFile:
        if 'behav/stimLoc' in hFile:
            print(f'{iPath+1}/{len(pathList)}')
            ta = np.array(hFile['behav/tailAngles'])
            nTrls = ta.shape[0]//50
            ta_trl = np.vsplit(ta, nTrls)            
            sl = util.to_utf(np.array(hFile['behav/stimLoc']))
            stim = [_[-1] for _ in sl]
            session = [int(_.split("_")[0])-1 for _ in sl]
            path_ = util.to_ascii(np.repeat(hfp, nTrls))
            df['tailAngles'].extend(ta_trl)
            df['fishIdx'].extend(np.repeat(iPath, nTrls))
            df['path'].extend(path_)
            df['trlIdx'].extend(np.arange(nTrls))
            df['sessionIdx'].extend(session)
            df['stimLoc'].extend(stim)
df = pd.DataFrame(df)
print(f'Dataframe dimensions: {df.shape}, \ncolumns = {df.columns}') 
            


1/19
2/19
3/19
4/19
5/19
6/19
7/19
8/19
9/19
10/19
12/19
13/19
14/19
15/19
16/19
17/19
18/19
19/19
Dataframe dimensions: (645, 6), 
columns = Index(['fishIdx', 'path', 'trlIdx', 'tailAngles', 'stimLoc', 'sessionIdx'], dtype='object')
Wall time: 9.12 s


### *Save dataframe with behavior info from all fish*

In [104]:
fn = f'dataframe_headFixed_gCamp_behavior_{util.timestamp("day")}.pkl'
%time df.to_pickle(os.path.join(saveDir, fn))


Wall time: 4.4 s


### *Reload dataframe if resuming from here*

In [108]:
path_df = glob.glob(os.path.join(saveDir, 'dataframe_headFixed_gCamp_behavior*.pkl'))[-1]
%time df = pd.read_pickle(path_df)

Wall time: 1.07 s


### *Create a GMM object and fit to data*

In [188]:
pca_percVar=0.99

ta_orig = np.concatenate([np.array(_) for _ in df.tailAngles], axis=1)
%time gmm_model = GMM.SvdGmm(pca_percVar=pca_percVar, pk_thr=5).fit(ta_orig)


Peaks are 10.8% of all samples
Wall time: 4min 36s


### *Save the GMM object for future use; no need to save the SVD, PCA or Scaler objects separately because they are not attributes of the GMM object*

In [204]:
n_gmm, n_svd, n_pca = gmm_model.n_gmm_, gmm_model.n_svd_, gmm_model.pca.n_components_
fn = f'gmm_headFixed_[{n_gmm}]_svd_[{n_svd}]_env_pca_[{n_pca}]_{util.timestamp("day")}.pkl'
%time path_gmm = joblib.dump(gmm_model, os.path.join(saveDir, fn))[0];
print(path_gmm)

Wall time: 19 ms
\\Koyama-S2\Data3\Avinash\Projects\RS recruitment\GCaMP imaging\Group\gmm_headFixed_[20]_svd_[3]_env_pca_[16]_20200511.pkl


### *Load the GMM model if resuming from here*

In [205]:
path_gmm = glob.glob(os.path.join(saveDir, 'gmm_headFixed_*.pkl'))[-1]
gmm_model = joblib.load(path_gmm)

### *Use the SVD object from the GMM object to clean tail angles* 

In [206]:
ta_orig = np.concatenate([np.array(_) for _ in df.tailAngles], axis=1)

print('Cleaning...')
%time ta, _, svd = hf.cleanTailAngles(ta_orig, svd=gmm_model.svd)


Cleaning...
Wall time: 1min 46s


### *Make interactive plots of some example trials with GMM labels overlaid and save in specified directory*

In [None]:
trlLens = np.array([_.shape[1] for _ in df.tailAngles])
ta_trl = np.hsplit(ta, np.cumsum(trlLens))[:-1]

figDir = os.path.join(saveDir, 'Figs', 'Trials with GMM labels')
os.makedirs(figDir, exist_ok=True)


# ind = np.random.choice(range(len(ta_trl)), size=1)[0]
for ind in range(len(ta_trl)):
    t = np.arange(ta_trl[ind].shape[1])*(1/500)
    path_ = util.to_utf([df.iloc[ind].path])[0].replace("\\", "/")
    title = f'session-{df.iloc[ind].sessionIdx}_trl-{df.iloc[ind].trlIdx}_stim-{df.iloc[ind].stimLoc}'
    title = title + f'_{path_}'

    fig = gmm_model.plot_with_labels_interact(ta_trl[ind], x=t, title=title)

    fig.write_html(os.path.join(figDir, f'Trl-{ind}_with GMM labels.html'))
    print(f'Trl {ind} saved at \n{figDir}')

In [662]:
noisyDir = r'Y:\Avinash\Projects\RS recruitment\GCaMP imaging\Group\Figs\Trials with GMM labels\noisy'
regex = r'\d{1,}'


def get_behav_img_dir(df, trl):
    path = os.path.split(df.iloc[trl].path)[0]
    path = util.to_utf([path])[0]
    stim = df.iloc[trl].stimLoc
    session = df.iloc[trl].sessionIdx
    trlIdx = df.iloc[trl].trlIdx
#     regex = r'{}\\behav\\Autosave'.format(stim)
    regex = r'[ht]\\behav\\Autosave'.format(stim)
    
    trlDirs = [out[0] for out in os.walk(path) if re.search(regex, out[0])]
    trlDir = trlDirs[trlIdx]
    return trlDir

# Get trial indices for noisy trials from files in the noisy directory
fnames = ft.findAndSortFilesInDir(noisyDir, ext='html')

trls = np.array([int(re.findall(regex, fn)[0]) for fn in fnames])

# Now find the image containing folder corresponding to trials
trlDirs = []
for iTrl, trl in enumerate(trls):
    print(f'Trl {iTrl+1}/{len(trls)}')
    trlDir = get_behav_img_dir(df, trl)
    trlDirs.append(trlDir)
    


Trl 1/44
Trl 2/44
Trl 3/44
Trl 4/44
Trl 5/44
Trl 6/44
Trl 7/44
Trl 8/44
Trl 9/44
Trl 10/44
Trl 11/44
Trl 12/44
Trl 13/44
Trl 14/44
Trl 15/44
Trl 16/44
Trl 17/44
Trl 18/44
Trl 19/44
Trl 20/44
Trl 21/44
Trl 22/44
Trl 23/44
Trl 24/44
Trl 25/44
Trl 26/44
Trl 27/44
Trl 28/44
Trl 29/44
Trl 30/44
Trl 31/44
Trl 32/44
Trl 33/44
Trl 34/44
Trl 35/44
Trl 36/44
Trl 37/44
Trl 38/44
Trl 39/44
Trl 40/44
Trl 41/44
Trl 42/44
Trl 43/44
Trl 44/44


In [675]:
path_ = os.path.join(figDir, 'noisyTrlPaths.npy')
foo = dict(trlIdx_glob=trls, trlDir=np.array(trlDirs))
np.save(path_, foo)

In [540]:
hf.extractAndStoreBehaviorData_singleFish??

[1;31mSignature:[0m
[0mhf[0m[1;33m.[0m[0mextractAndStoreBehaviorData_singleFish[0m[1;33m([0m[1;33m
[0m    [0mfishPath[0m[1;33m,[0m[1;33m
[0m    [0muNet[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mhFilePath[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mregex[0m[1;33m=[0m[1;34m'\\d{1,5}_[ht]'[0m[1;33m,[0m[1;33m
[0m    [0mimgExt[0m[1;33m=[0m[1;34m'bmp'[0m[1;33m,[0m[1;33m
[0m    [1;33m**[0m[0munet_kwargs[0m[1;33m,[0m[1;33m
[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mSource:[0m   
[1;32mdef[0m [0mextractAndStoreBehaviorData_singleFish[0m[1;33m([0m[0mfishPath[0m[1;33m,[0m [0muNet[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m [0mhFilePath[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m                                           [0mregex[0m[1;33m=[0m[1;34mr'\d{1,5}_[ht]'[0m[1;33m,[0m[1;33m
[0m                                           [0mimgExt[0m[1;33m=[0m[1;34m'bmp'[0m[1;33m,