In [1]:
# Standard libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ipywidgets import IntProgress
from IPython.display import display
from scipy.stats import mannwhitneyu, wilcoxon

# Append base directory
import os,sys,inspect
rootname = "pub-2020-exploratory-analysis"
thispath = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
rootpath = os.path.join(thispath[:thispath.index(rootname)], rootname)
sys.path.append(rootpath)
print("Appended root directory", rootpath)

from mesostat.utils.qt_helper import gui_fnames, gui_fpath
from mesostat.metric.metric import MetricCalculator
from mesostat.utils.hdf5_io import DataStorage

from lib.gallerosalas.data_fc_db_aud_raw import DataFCDatabase
import lib.analysis.bulk_metrics as bulk_metrics

%load_ext autoreload
%autoreload 2

Appended root directory /media/aleksejs/DataHDD/work/codes/comp-neuro/analysis-mesoscopic/pub-2020-exploratory-analysis


In [2]:
params = {}
# params['root_path_data']  = gui_fpath("Path to data collection",  './')
# params['root_path_data'] = '/media/alyosha/Data/TE_data/yasirdata_raw/'
params['root_path_data'] = '/media/aleksejs/DataHDD/work/data/yasir/yasirdata_aud_raw'

In [3]:
dataDB = DataFCDatabase(params)

Reading channel label file
Reading channel area file
Reading allen brain map
Reading task structure
Reading session structure
Searching for data files
Found mice ['mou_5', 'mou_6', 'mou_7', 'mou_9']


In [4]:
ds = DataStorage('gallerosalas_bulk_metrics.h5')

In [5]:
mc = MetricCalculator(serial=True, verbose=False)

In [6]:
print('mice', dataDB.mice)
print('nSessions', len(dataDB.sessions))
print('datatypes', dataDB.get_data_types())
print('nChannel', dataDB.get_nchannels('mou_5'))

mice {'mou_5', 'mou_6', 'mou_7', 'mou_9'}
nSessions 4
datatypes ['raw', 'bn_session', 'bn_trial']
nChannel 27


In [7]:
dataDB.calc_shortest_distances()

# Bulk Metrics

In [8]:
intervNames = ["PRE", "TEX", "DEL", "REW", "AVG"]

In [9]:
exclQueryLst = [
    {'datatype' : 'bn_trial', 'intervName' : 'PRE'},  # Baseline normalized
    {'mousename' : 'mou_6', 'intervName' : 'REW'},    # No reward for this mouse
    {'mousename' : 'mou_6', 'intervName' : 'AVG'},    # Makes no sense to compare: no reward and longer delay
]

## 1. Mean
### 1.1. Mean vs time

In [10]:
bulk_metrics.metric_mouse_bulk(dataDB, mc, ds, 'mean', 's', 'time', verbose=False, minTrials=10,
                               trialTypeNames='auto', perfNames=None, exclQueryLst=exclQueryLst)

IntProgress(value=0, description='time', max=60)

  mu = np.nanmean(x, axis=axis).reshape(shapeNew)
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


ValueError: all the input array dimensions for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 200 and the array at index 7 has size 180

In [None]:
bulk_metrics.plot_metric_bulk(ds, 'mean', 'time', verbose=False, xFunc=lambda m, l: dataDB.get_times())

### 1.2. Mean vs session

In [21]:
bulk_metrics.metric_mouse_bulk_vs_session(dataDB, mc, ds, "mean", 'session', trialTypeNames='auto',
                                          minTrials=10, verbose=False, intervNames=intervNames,
                                          exclQueryLst=exclQueryLst)

IntProgress(value=0, description='session', max=250)

Skipping existing session_rs_raw_PRE_mou_7_mean_(sessions,)
Skipping existing session_rs_raw_TEX_mou_7_mean_(sessions,)
Skipping existing session_rs_raw_DEL_mou_7_mean_(sessions,)
Skipping existing session_rs_raw_REW_mou_7_mean_(sessions,)
Skipping existing session_rs_raw_AVG_mou_7_mean_(sessions,)
Skipping existing session_rs_raw_Hit_PRE_mou_7_mean_(sessions,)
Skipping existing session_rs_raw_Hit_TEX_mou_7_mean_(sessions,)
Skipping existing session_rs_raw_Hit_DEL_mou_7_mean_(sessions,)
Skipping existing session_rs_raw_Hit_REW_mou_7_mean_(sessions,)
Skipping existing session_rs_raw_Hit_AVG_mou_7_mean_(sessions,)
Skipping existing session_rs_raw_Miss_PRE_mou_7_mean_(sessions,)
Skipping existing session_rs_raw_Miss_TEX_mou_7_mean_(sessions,)
Skipping existing session_rs_raw_Miss_DEL_mou_7_mean_(sessions,)
Skipping existing session_rs_raw_Miss_REW_mou_7_mean_(sessions,)
Skipping existing session_rs_raw_Miss_AVG_mou_7_mean_(sessions,)
Skipping existing session_rs_raw_CR_PRE_mou_7_mean_(ses

IntProgress(value=0, description='mou_5', max=21)

IntProgress(value=0, description='mou_5', max=21)

Skipping existing session_rs_raw_Hit_PRE_mou_5_mean_(sessions,)
Skipping existing session_rs_raw_Hit_TEX_mou_5_mean_(sessions,)
Skipping existing session_rs_raw_Hit_DEL_mou_5_mean_(sessions,)


IntProgress(value=0, description='mou_5', max=21)

IntProgress(value=0, description='mou_5', max=21)

Skipping existing session_rs_raw_Miss_PRE_mou_5_mean_(sessions,)
Skipping existing session_rs_raw_Miss_TEX_mou_5_mean_(sessions,)
Skipping existing session_rs_raw_Miss_DEL_mou_5_mean_(sessions,)


  mu = np.nanmean(x, axis=axis).reshape(shapeNew)
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


IntProgress(value=0, description='mou_5', max=21)



IntProgress(value=0, description='mou_5', max=21)

Skipping existing session_rs_raw_CR_PRE_mou_5_mean_(sessions,)
Skipping existing session_rs_raw_CR_TEX_mou_5_mean_(sessions,)
Skipping existing session_rs_raw_CR_DEL_mou_5_mean_(sessions,)


IntProgress(value=0, description='mou_5', max=21)

IntProgress(value=0, description='mou_5', max=21)

Skipping existing session_rs_raw_FA_PRE_mou_5_mean_(sessions,)
Skipping existing session_rs_raw_FA_TEX_mou_5_mean_(sessions,)
Skipping existing session_rs_raw_FA_DEL_mou_5_mean_(sessions,)


IntProgress(value=0, description='mou_5', max=21)



IntProgress(value=0, description='mou_5', max=21)

Skipping existing session_bn_session_PRE_mou_5_mean_(sessions,)
Skipping existing session_bn_session_TEX_mou_5_mean_(sessions,)
Skipping existing session_bn_session_DEL_mou_5_mean_(sessions,)


IntProgress(value=0, description='mou_5', max=21)

IntProgress(value=0, description='mou_5', max=21)

Skipping existing session_bn_session_Hit_PRE_mou_5_mean_(sessions,)
Skipping existing session_bn_session_Hit_TEX_mou_5_mean_(sessions,)
Skipping existing session_bn_session_Hit_DEL_mou_5_mean_(sessions,)


IntProgress(value=0, description='mou_5', max=21)

IntProgress(value=0, description='mou_5', max=21)

Skipping existing session_bn_session_Miss_PRE_mou_5_mean_(sessions,)
Skipping existing session_bn_session_Miss_TEX_mou_5_mean_(sessions,)
Skipping existing session_bn_session_Miss_DEL_mou_5_mean_(sessions,)


IntProgress(value=0, description='mou_5', max=21)



IntProgress(value=0, description='mou_5', max=21)

Skipping existing session_bn_session_CR_PRE_mou_5_mean_(sessions,)
Skipping existing session_bn_session_CR_TEX_mou_5_mean_(sessions,)
Skipping existing session_bn_session_CR_DEL_mou_5_mean_(sessions,)


IntProgress(value=0, description='mou_5', max=21)

IntProgress(value=0, description='mou_5', max=21)

Skipping existing session_bn_session_FA_PRE_mou_5_mean_(sessions,)
Skipping existing session_bn_session_FA_TEX_mou_5_mean_(sessions,)
Skipping existing session_bn_session_FA_DEL_mou_5_mean_(sessions,)


IntProgress(value=0, description='mou_5', max=21)



IntProgress(value=0, description='mou_5', max=21)

Skipping existing session_bn_trial_TEX_mou_5_mean_(sessions,)
Skipping existing session_bn_trial_DEL_mou_5_mean_(sessions,)


IntProgress(value=0, description='mou_5', max=21)

IntProgress(value=0, description='mou_5', max=21)

Skipping existing session_bn_trial_Hit_TEX_mou_5_mean_(sessions,)
Skipping existing session_bn_trial_Hit_DEL_mou_5_mean_(sessions,)


IntProgress(value=0, description='mou_5', max=21)

IntProgress(value=0, description='mou_5', max=21)

Skipping existing session_bn_trial_Miss_TEX_mou_5_mean_(sessions,)
Skipping existing session_bn_trial_Miss_DEL_mou_5_mean_(sessions,)


IntProgress(value=0, description='mou_5', max=21)



IntProgress(value=0, description='mou_5', max=21)

Skipping existing session_bn_trial_CR_TEX_mou_5_mean_(sessions,)
Skipping existing session_bn_trial_CR_DEL_mou_5_mean_(sessions,)


IntProgress(value=0, description='mou_5', max=21)

IntProgress(value=0, description='mou_5', max=21)

Skipping existing session_bn_trial_FA_TEX_mou_5_mean_(sessions,)
Skipping existing session_bn_trial_FA_DEL_mou_5_mean_(sessions,)


IntProgress(value=0, description='mou_5', max=21)



IntProgress(value=0, description='mou_5', max=21)



In [22]:
# bulk_metrics.scatter_metric_bulk(ds, 'mean', 'session', verbose=False, xlim=[0, 1],
#                               xFunc=lambda m, l: dataDB.get_performance_mouse(m), haveRegression=True)

bulk_metrics.scatter_metric_bulk(ds, 'mean', 'session', verbose=False, xFunc=None, haveRegression=False)

['mean', 'session', 'mou_5', 'None', 'AVG', 'bn_session', 'dset257', 'CR']
(21,)
(21,)
['mean', 'session', 'mou_7', 'None', 'AVG', 'bn_session', 'dset218', 'CR']
(25,)
(25,)
['mean', 'session', 'mou_9', 'None', 'AVG', 'bn_session', 'dset233', 'CR']
(20,)
(20,)
['mean', 'session', 'mou_5', 'None', 'AVG', 'bn_trial', 'dset267', 'CR']
(21,)
(21,)
['mean', 'session', 'mou_7', 'None', 'AVG', 'bn_trial', 'dset223', 'CR']
(25,)
(25,)
['mean', 'session', 'mou_9', 'None', 'AVG', 'bn_trial', 'dset238', 'CR']
(20,)
(20,)
['mean', 'session', 'mou_5', 'None', 'DEL', 'bn_session', 'dset173', 'CR']
(21,)
(21,)
['mean', 'session', 'mou_6', 'None', 'DEL', 'bn_session', 'dset158', 'CR']
(30,)
(30,)
['mean', 'session', 'mou_7', 'None', 'DEL', 'bn_session', 'dset128', 'CR']
(25,)
(25,)
['mean', 'session', 'mou_9', 'None', 'DEL', 'bn_session', 'dset143', 'CR']
(20,)
(20,)
['mean', 'session', 'mou_5', 'None', 'DEL', 'bn_trial', 'dset178', 'CR']
(21,)
(21,)
['mean', 'session', 'mou_6', 'None', 'DEL', 'bn_tri

['mean', 'session', 'mou_5', 'None', 'TEX', 'bn_trial', 'dset116', 'Hit']
(21,)
(21,)
['mean', 'session', 'mou_6', 'None', 'TEX', 'bn_trial', 'dset101', 'Hit']
(30,)
(30,)
['mean', 'session', 'mou_7', 'None', 'TEX', 'bn_trial', 'dset71', 'Hit']
(25,)
(25,)
['mean', 'session', 'mou_9', 'None', 'TEX', 'bn_trial', 'dset86', 'Hit']
(20,)
(20,)
['mean', 'session', 'mou_5', 'None', 'AVG', 'bn_session', 'dset255', 'Miss']
(21,)
(8,)
['mean', 'session', 'mou_7', 'None', 'AVG', 'bn_session', 'dset217', 'Miss']
(25,)
(3,)
['mean', 'session', 'mou_9', 'None', 'AVG', 'bn_session', 'dset232', 'Miss']
(20,)
(14,)
['mean', 'session', 'mou_5', 'None', 'AVG', 'bn_trial', 'dset265', 'Miss']
(21,)
(8,)
['mean', 'session', 'mou_7', 'None', 'AVG', 'bn_trial', 'dset222', 'Miss']
(25,)
(3,)
['mean', 'session', 'mou_9', 'None', 'AVG', 'bn_trial', 'dset237', 'Miss']
(20,)
(14,)
['mean', 'session', 'mou_5', 'None', 'DEL', 'bn_session', 'dset172', 'Miss']
(21,)
(8,)
['mean', 'session', 'mou_6', 'None', 'DEL', 'b

['mean', 'session', 'mou_5', 'rs', 'DEL', 'raw', 'dset169', 'FA']
(21,)
(6,)
['mean', 'session', 'mou_6', 'rs', 'DEL', 'raw', 'dset154', 'FA']
(30,)
(1,)
['mean', 'session', 'mou_7', 'rs', 'DEL', 'raw', 'dset124', 'FA']
(25,)
(21,)
['mean', 'session', 'mou_9', 'rs', 'DEL', 'raw', 'dset139', 'FA']
(20,)
(8,)
['mean', 'session', 'mou_5', 'rs', 'PRE', 'raw', 'dset49', 'FA']
(21,)
(6,)
['mean', 'session', 'mou_6', 'rs', 'PRE', 'raw', 'dset34', 'FA']
(30,)
(1,)
['mean', 'session', 'mou_7', 'rs', 'PRE', 'raw', 'dset4', 'FA']
(25,)
(21,)
['mean', 'session', 'mou_9', 'rs', 'PRE', 'raw', 'dset19', 'FA']
(20,)
(8,)
['mean', 'session', 'mou_5', 'rs', 'REW', 'raw', 'dset248', 'FA']
(21,)
(6,)
['mean', 'session', 'mou_7', 'rs', 'REW', 'raw', 'dset184', 'FA']
(25,)
(21,)
['mean', 'session', 'mou_9', 'rs', 'REW', 'raw', 'dset199', 'FA']
(20,)
(8,)
['mean', 'session', 'mou_5', 'rs', 'TEX', 'raw', 'dset109', 'FA']
(21,)
(6,)
['mean', 'session', 'mou_6', 'rs', 'TEX', 'raw', 'dset94', 'FA']
(30,)
(1,)
['

### 1.3 Conditions

In [28]:
bulk_metrics.barplot_conditions(ds, 'mean', 'session', verbose=True, trialTypes=['Hit', 'CR'])

bn_session
bn_trial
raw


## 2. Variance

**TODO**
* Plot trial variance relative to temporal variance
* Plot mean with variance together

### Pros/Cons of Baseline Normalization
* DFF-Trial
    - Pos: Removes dynamic baseline changing on the order of trials.
    - Pos: Under assumption of signal-free pre-trial interval, baseline removal enhances relative change in significant activity during trial.
    - Neg: In presence of correlation between pre-trial interval and trial signals, this procedure destroys information during trial.

* DFF-Session vs ZScore-Session
    - Both linear transforms
    - Mean is more meaningful for DFF if pre-trial interval is at least somewhat stable
    - Va

In [None]:
bulk_metrics.metric_mouse_bulk(dataDB, mc, ds, 'varmean', 's', 'time', verbose=False, minTrials=50,
                               trialTypeNames='auto', exclQueryLst=exclQueryLst)

In [None]:
bulk_metrics.plot_metric_bulk(ds, 'varmean', 'time',
                              ylim=[0,None], verbose=False, xFunc=lambda m, l: dataDB.get_times())

### Session

In [None]:
bulk_metrics.metric_mouse_bulk_vs_session(dataDB, mc, ds, "varmean", 'session',
                                          trialTypeNames=[None, 'Hit', 'CR'],
                                          minTrials=50, verbose=False, intervNames=intervNames,
                                          exclQueryLst=exclQueryLst)

In [None]:
bulk_metrics.scatter_metric_bulk(ds, 'varmean', 'session', verbose=False, xFunc=None, haveRegression=False)

### Conditions

In [None]:
bulk_metrics.barplot_conditions(ds, 'varmean', 'session', verbose=True, trialTypes=['Hit', 'CR'])

### Test variance across channels for interesting interval

* Average signal over texture presentation interval
* Compute variance over trials for each channel
* Compare channels

In [None]:
bulk_metrics.metric_mouse_bulk(dataDB, mc, ds, 'varmean', 'p', 'channel',
                               cropTime=('TEX', (3, 3.5)), verbose=False)

In [None]:
bulk_metrics.plot_metric_bulk(ds, 'varmean', 'channel', yscale='log', verbose=False, # ylim=[0.005,2],
                              dropCols=['cropTime'])

# Effective Rank

### ByTime

In [None]:
bulk_metrics.metric_mouse_bulk(dataDB, mc, ds, "rank_effective", 's', 'time', verbose=False,
                               minTrials=50, trialTypeNames='auto', exclQueryLst=exclQueryLst)

In [None]:
bulk_metrics.plot_metric_bulk(ds, 'rank_effective', 'time', ylim=[1, None], verbose=False,
                              xFunc=lambda m, l: dataDB.get_times()) # ylim=[1,48]

### BySession

In [None]:
bulk_metrics.metric_mouse_bulk_vs_session(dataDB, mc, ds, "rank_effective", 'session', minTrials=50,
                                          trialTypeNames=[None, 'Hit', 'CR'], verbose=False,
                                          intervNames=intervNames, exclQueryLst=exclQueryLst)

In [None]:
bulk_metrics.scatter_metric_bulk(ds, 'rank_effective', 'session', ylim=[1, None], verbose=False,
                                 xFunc=None, haveRegression=False)

### Conditions

In [None]:
bulk_metrics.barplot_conditions(ds, 'rank_effective', 'session', verbose=True, trialTypes=['Hit', 'CR'])

## Hierarchical

**TODO**:
* Add composite selectors for metric helper
   - trialType = iGO & iNOGO
   - cropTime = TEX & REW

In [None]:
bulk_metrics.metric_mouse_bulk(dataDB, mc, ds, "rank_effective", '', 'flat', verbose=False,
                               trialTypeNames='auto',
                               perfNames='auto',
                               intervNames=intervNames, exclQueryLst=exclQueryLst)

In [None]:
df = ds.list_dsets_pd()
df = df[df['name'] == 'flat']

In [None]:
data = [np.round(ds.get_data(dset), 2) for dset in df['dset']]
df['rez'] = data

In [None]:
df.drop(['datetime', 'zscoreDim', 'dset', 'metric', 'name', 'shape', 'target_dim'], axis=1, inplace=True)

In [None]:
from mesostat.utils.pandas_helper import pd_category_to_column

In [None]:
index = list(set(df.columns) - {'mousename', 'rez'})
dfMouse = df.pivot(index=index, columns='mousename', values='rez').reset_index()

In [None]:
dfMouse[dfMouse['datatype'] == 'bn_trial']

# Total Correlation

In [None]:
# bulk_metrics.metric_mouse_bulk(dataDB, mc, ds, "avg_entropy", 'sp', 'time-channel', verbose=False)
# bulk_metrics.metric_mouse_bulk(dataDB, mc, ds, "avg_entropy", 's', 'time', verbose=False)

bulk_metrics.metric_mouse_bulk(dataDB, mc, ds, "avg_TC", 's', 'time', verbose=False)

In [None]:
bulk_metrics.plot_metric_bulk(dataDB, ds, 'avg_TC', 'time', verbose=True) # ylim=[1,48]

In [None]:
bulk_metrics.plot_TC(dataDB, ds, ylim=None, yscale=None, verbose=True)

### Cleaning up

In [None]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(ds.list_dsets_pd().sort_values(by='datetime'))

In [None]:
ds.delete_by_query(queryDict={"metric" : "rank_effective"}, timestr="2020-11-20 18:00:00")