In [None]:
#%% Import relevant code
import os, sys, time
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import re
import dask
import dask.dataframe as dd
import h5py
import pandas as pd
pd.options.display.precision=2
from skimage.util import montage
import glob
import joblib
from plotly import graph_objects as go

#--- Import my code
codeDir = r'\\dm11\koyamalab\code\python\code'
sys.path.append(codeDir)
import apCode.FileTools as ft
import apCode.volTools as volt
from apCode.machineLearning import ml as mlearn
from apCode.behavior import gmm as GMM
import apCode.SignalProcessingTools as spt
from apCode.behavior import FreeSwimBehavior as fsb
import apCode.hdf as hdf
from apCode import util
from rsNeuronsProj import util as rsp
import apCode.behavior.headFixed as hf

#--- Setting seed for reproducability
seed = 143
np.random.seed = seed

plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['ps.fonttype'] = 42


#--- Auto-reload modules
try:
    if __IPYTHON__:
        get_ipython().magic('load_ext autoreload')
        get_ipython().magic('autoreload 2')
except NameError:
    pass

print(time.ctime())


### *Read xls with paths to data*

In [None]:
dir_xls = r'\\Koyama-S2\Data3\Avinash\Projects\RS recruitment\GCaMP imaging'
dir_group = r'\\Koyama-S2\Data3\Avinash\Projects\RS recruitment\GCaMP imaging\Group'
file_xls = 'GCaMP volumetric imaging summary_2020-05-09.xlsx'
saveDir = os.path.join(dir_xls, 'Group')
os.makedirs(saveDir, exist_ok=True)

xls = pd.read_excel(os.path.join(dir_xls, file_xls), sheet_name='Sheet1')
print(f'Dataframe dimensions: {xls.shape}')
xls.head()

### *Read tail angles from all the HDF files*

In [None]:
%%time 
changePath = lambda path: path.replace(r"Y:", r"\\Koyama-S2\Data3")
pathList = np.array(list(map(changePath, xls.Path)))

df = dict(fishIdx=[], path=[], trlIdx=[], tailAngles=[], stimLoc=[], sessionIdx=[])
for iPath, path_ in enumerate(pathList):
    hfp = glob.glob(os.path.join(path_, 'procData*.h5'))[-1]
    with h5py.File(hfp, mode='r') as hFile:
        if 'behav/stimLoc' in hFile:
            print(f'{iPath+1}/{len(pathList)}')
            ta = np.array(hFile['behav/tailAngles'])
            nTrls = ta.shape[0]//50
            ta_trl = np.vsplit(ta, nTrls)            
            sl = util.to_utf(np.array(hFile['behav/stimLoc']))
            stim = [_[-1] for _ in sl]
            session = [int(_.split("_")[0])-1 for _ in sl]
            path_ = util.to_ascii(np.repeat(hfp, nTrls))
            df['tailAngles'].extend(ta_trl)
            df['fishIdx'].extend(np.repeat(iPath, nTrls))
            df['path'].extend(path_)
            df['trlIdx'].extend(np.arange(nTrls))
            df['sessionIdx'].extend(session)
            df['stimLoc'].extend(stim)
df = pd.DataFrame(df)
print(f'Dataframe dimensions: {df.shape}, \ncolumns = {df.columns}') 
            


### *Save dataframe with behavior info from all fish*

In [None]:
fn = f'dataframe_headFixed_gCamp_behavior_{util.timestamp("day")}.pkl'
%time df.to_pickle(os.path.join(saveDir, fn))


### *Reload dataframe if resuming from here*

In [None]:
path_df = glob.glob(os.path.join(saveDir, 'dataframe_headFixed_gCamp_behavior*.pkl'))[-1]
%time df = pd.read_pickle(path_df)

### *Create a GMM object and fit to data*

In [None]:
pca_percVar=0.99

ta_orig = np.concatenate([np.array(_) for _ in df.tailAngles], axis=1)
%time gmm_model = GMM.SvdGmm(pca_percVar=pca_percVar, pk_thr=5).fit(ta_orig)


### *Save the GMM object for future use; no need to save the SVD, PCA or Scaler objects separately because they are not attributes of the GMM object*

In [None]:
n_gmm, n_svd, n_pca = gmm_model.n_gmm_, gmm_model.n_svd_, gmm_model.pca.n_components_
fn = f'gmm_headFixed_[{n_gmm}]_svd_[{n_svd}]_env_pca_[{n_pca}]_{util.timestamp("day")}.pkl'
%time path_gmm = joblib.dump(gmm_model, os.path.join(saveDir, fn))[0];
print(path_gmm)

### *Load the GMM model if resuming from here*

In [None]:
path_gmm = glob.glob(os.path.join(saveDir, 'gmm_headFixed_*.pkl'))[-1]
gmm_model = joblib.load(path_gmm)

### *Use the SVD object from the GMM object to clean tail angles* 

In [None]:
ta_orig = np.concatenate([np.array(_) for _ in df.tailAngles], axis=1)

print('Cleaning...')
%time ta, _, svd = hf.cleanTailAngles(ta_orig, svd=gmm_model.svd)


### *Make interactive plots of some example trials with GMM labels overlaid and save in specified directory*

In [None]:
trlLens = np.array([_.shape[1] for _ in df.tailAngles])
ta_trl = np.hsplit(ta, np.cumsum(trlLens))[:-1]

figDir = os.path.join(saveDir, 'Figs', 'Trials with GMM labels')
os.makedirs(figDir, exist_ok=True)


# ind = np.random.choice(range(len(ta_trl)), size=1)[0]
for ind in range(len(ta_trl)):
    t = np.arange(ta_trl[ind].shape[1])*(1/500)
    path_ = util.to_utf([df.iloc[ind].path])[0].replace("\\", "/")
    title = f'session-{df.iloc[ind].sessionIdx}_trl-{df.iloc[ind].trlIdx}_stim-{df.iloc[ind].stimLoc}'
    title = title + f'_{path_}'

    fig = gmm_model.plot_with_labels_interact(ta_trl[ind], x=t, title=title)

    fig.write_html(os.path.join(figDir, f'Trl-{ind}_with GMM labels.html'))
    print(f'Trl {ind} saved at \n{figDir}')

In [None]:
noisyDir = r'Y:\Avinash\Projects\RS recruitment\GCaMP imaging\Group\Figs\Trials with GMM labels\noisy'
regex = r'\d{1,}'


def get_behav_img_dir(df, trl):
    path = os.path.split(df.iloc[trl].path)[0]
    path = util.to_utf([path])[0]
    stim = df.iloc[trl].stimLoc
    session = df.iloc[trl].sessionIdx
    trlIdx = df.iloc[trl].trlIdx
#     regex = r'{}\\behav\\Autosave'.format(stim)
    regex = r'[ht]\\behav\\Autosave'.format(stim)
    
    trlDirs = [out[0] for out in os.walk(path) if re.search(regex, out[0])]
    trlDir = trlDirs[trlIdx]
    return trlDir

# Get trial indices for noisy trials from files in the noisy directory
fnames = ft.findAndSortFilesInDir(noisyDir, ext='html')

trls = np.array([int(re.findall(regex, fn)[0]) for fn in fnames])

# Now find the image containing folder corresponding to trials
trlDirs = []
for iTrl, trl in enumerate(trls):
    print(f'Trl {iTrl+1}/{len(trls)}')
    trlDir = get_behav_img_dir(df, trl)
    trlDirs.append(trlDir)
    


In [None]:
path_ = os.path.join(figDir, 'noisyTrlPaths.npy')
foo = dict(trlIdx_glob=trls, trlDir=np.array(trlDirs))
np.save(path_, foo)

In [None]:
hf.extractAndStoreBehaviorData_singleFish??