In [12]:
# Standard libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ipywidgets import IntProgress
from IPython.display import display
from scipy.stats import mannwhitneyu, wilcoxon

# Append base directory
import os,sys,inspect
rootname = "pub-2020-exploratory-analysis"
thispath = os.getcwd()
# thispath = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
rootpath = os.path.join(thispath[:thispath.index(rootname)], rootname)
sys.path.append(rootpath)
print("Appended root directory", rootpath)

from mesostat.utils.qt_helper import gui_fnames, gui_fpath
from mesostat.metric.metric import MetricCalculator
from mesostat.utils.hdf5_io import DataStorage

from lib.gallerosalas.data_fc_db_raw import DataFCDatabase
from lib.common.metric_helper import calc_metric_mouse, calc_metric_session, calc_metric_mouse_delay
import lib.analysis.bulk_metrics as bulk_metrics


%load_ext autoreload
%autoreload 2

Appended root directory /media/aleksejs/DataHDD/work/codes/comp-neuro/analysis-mesoscopic/pub-2020-exploratory-analysis
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
params = {}
# params['root_path_data']  = gui_fpath("Path to data collection",  './')
# params['root_path_data'] = '/media/alyosha/Data/TE_data/yasirdata_raw/'
# params['root_path_data'] = '/home/alyosha/data/yasirdata_raw/'
params['root_path_data'] = '/media/aleksejs/DataHDD/work/data/yasir/yasirdata_raw'

In [3]:
dataDB = DataFCDatabase(params)

Reading channel label file
Reading channel area file
Reading allen brain map
Reading task structure
Reading session structure
Searching for data files
Found mice ['mou_5', 'mou_6', 'mou_7', 'mou_9']


In [4]:
ds = DataStorage('gallerosalas_bulk_metrics.h5')

In [5]:
mc = MetricCalculator(serial=True, verbose=False)

In [6]:
print('mice', dataDB.mice)
print('nSessions', len(dataDB.sessions))
print('datatypes', dataDB.get_data_types())
print('nChannel', dataDB.get_nchannels('mou_5'))

mice {'mou_5', 'mou_6', 'mou_9', 'mou_7'}
nSessions 4
datatypes ['raw', 'bn_session', 'bn_trial']
nChannel 27


In [7]:
dataDB.calc_shortest_distances()

# Bulk Metrics

In [8]:
argSweepDictTime = {
    'trialType': 'auto',
    'datatype': ['bn_trial', 'bn_session']
}

argSweepDictSession = {
    'trialType': 'auto',
    'intervName': ["PRE", "TEX", "DEL", "REW", "AVG"],
    'datatype': ['bn_trial', 'bn_session']
}

In [9]:
exclQueryLst = [
    {'datatype' : 'bn_trial', 'intervName' : 'PRE'},  # Baseline normalized
    {'mousename' : 'mou_6', 'intervName' : 'REW'},    # No reward for this mouse
    {'mousename' : 'mou_6', 'intervName' : 'AVG'},    # Makes no sense to compare: no reward and longer delay
]

## 1. Mean
### 1.1. Mean vs time

In [14]:
calc_metric_mouse_delay(dataDB, mc, ds, 'mean', 's', 'time', verbose=False, minTrials=10, haveDelay=True,
                        skipExisting=False, exclQueryLst=exclQueryLst, **argSweepDictTime)

IntProgress(value=0, description='time', max=40)

{'trialType': 'Hit', 'datatype': 'bn_trial'}
-- (1209, 160, 27)
{'trialType': 'Hit', 'datatype': 'bn_session'}
-- (1209, 160, 27)
{'trialType': 'Miss', 'datatype': 'bn_trial'}
-- (239, 160, 27)
{'trialType': 'Miss', 'datatype': 'bn_session'}
-- (239, 160, 27)
{'trialType': 'CR', 'datatype': 'bn_trial'}
-- (1315, 160, 27)
{'trialType': 'CR', 'datatype': 'bn_session'}
-- (1315, 160, 27)
{'trialType': 'FA', 'datatype': 'bn_trial'}
-- (144, 160, 27)
{'trialType': 'FA', 'datatype': 'bn_session'}
-- (144, 160, 27)
{'datatype': 'bn_trial'}
Skipping existing time_bn_trial_mou_5_mean_(timesteps,)
{'datatype': 'bn_session'}
Skipping existing time_bn_session_mou_5_mean_(timesteps,)
{'trialType': 'Hit', 'datatype': 'bn_trial'}
-- (1724, 160, 27)
{'trialType': 'Hit', 'datatype': 'bn_session'}
-- (1724, 160, 27)
{'trialType': 'Miss', 'datatype': 'bn_trial'}
-- (251, 160, 27)
{'trialType': 'Miss', 'datatype': 'bn_session'}
-- (251, 160, 27)
{'trialType': 'CR', 'datatype': 'bn_trial'}
-- (1910, 160, 2

In [19]:
# bulk_metrics.plot_metric_bulk(ds, 'mean', 'time', verbose=False, xFunc=lambda m, l: dataDB.get_times())
bulk_metrics.plot_metric_bulk_1D(dataDB, ds, 'mean', 'time', verbose=False, haveTimeLabels=True,
                                 xFunc=lambda m, l: dataDB.get_times())

['mean', 'time', 'mou_5', 'CR', 'bn_session', 'dset5']
['mean', 'time', 'mou_6', 'CR', 'bn_session', 'dset13']
['mean', 'time', 'mou_7', 'CR', 'bn_session', 'dset21']
['mean', 'time', 'mou_9', 'CR', 'bn_session', 'dset29']
['mean', 'time', 'mou_5', 'FA', 'bn_session', 'dset7']
['mean', 'time', 'mou_6', 'FA', 'bn_session', 'dset15']
['mean', 'time', 'mou_7', 'FA', 'bn_session', 'dset23']
['mean', 'time', 'mou_9', 'FA', 'bn_session', 'dset31']
['mean', 'time', 'mou_5', 'Hit', 'bn_session', 'dset1']
['mean', 'time', 'mou_6', 'Hit', 'bn_session', 'dset9']
['mean', 'time', 'mou_7', 'Hit', 'bn_session', 'dset17']
['mean', 'time', 'mou_9', 'Hit', 'bn_session', 'dset25']
['mean', 'time', 'mou_5', 'Miss', 'bn_session', 'dset3']
['mean', 'time', 'mou_6', 'Miss', 'bn_session', 'dset11']
['mean', 'time', 'mou_7', 'Miss', 'bn_session', 'dset19']
['mean', 'time', 'mou_9', 'Miss', 'bn_session', 'dset27']
['mean', 'time', 'mou_5', 'CR', 'bn_trial', 'dset4']
['mean', 'time', 'mou_6', 'CR', 'bn_trial', 

### 1.2. Mean vs session

In [None]:
calc_metric_session(dataDB, mc, ds, 'mean', '', 'session', verbose=False, minTrials=10,
                    skipExisting=False, exclQueryLst=exclQueryLst, **argSweepDictSession)

In [None]:
bulk_metrics.scatter_metric_bulk(ds, 'mean', 'session', verbose=False, xFunc=None, haveRegression=False)

### 1.3 Conditions

In [None]:
bulk_metrics.barplot_conditions(ds, 'mean', 'session', verbose=True,
                                trialTypes=['Hit', 'CR'],
                                intervNames=dataDB.get_interval_names())

## 2. Variance

**TODO**
* Plot trial variance relative to temporal variance
* Plot mean with variance together

### Pros/Cons of Baseline Normalization
* DFF-Trial
    - Pos: Removes dynamic baseline changing on the order of trials.
    - Pos: Under assumption of signal-free pre-trial interval, baseline removal enhances relative change in significant activity during trial.
    - Neg: In presence of correlation between pre-trial interval and trial signals, this procedure destroys information during trial.

* DFF-Session vs ZScore-Session
    - Both linear transforms
    - Mean is more meaningful for DFF if pre-trial interval is at least somewhat stable
    - Va

In [26]:
calc_metric_mouse_delay(dataDB, mc, ds, 'varmean', 's', 'time', verbose=False, minTrials=30, haveDelay=True,
                  skipExisting=False, exclQueryLst=exclQueryLst, **argSweepDictTime)

IntProgress(value=0, description='time', max=40)

{'trialType': 'Hit', 'datatype': 'bn_trial'}
Skipping existing time_Hit_bn_trial_mou_5_varmean_(timesteps,)
{'trialType': 'Hit', 'datatype': 'bn_session'}
Skipping existing time_Hit_bn_session_mou_5_varmean_(timesteps,)
{'trialType': 'Miss', 'datatype': 'bn_trial'}
Skipping existing time_Miss_bn_trial_mou_5_varmean_(timesteps,)
{'trialType': 'Miss', 'datatype': 'bn_session'}
Skipping existing time_Miss_bn_session_mou_5_varmean_(timesteps,)
{'trialType': 'CR', 'datatype': 'bn_trial'}
Skipping existing time_CR_bn_trial_mou_5_varmean_(timesteps,)
{'trialType': 'CR', 'datatype': 'bn_session'}
Skipping existing time_CR_bn_session_mou_5_varmean_(timesteps,)
{'trialType': 'FA', 'datatype': 'bn_trial'}
Skipping existing time_FA_bn_trial_mou_5_varmean_(timesteps,)
{'trialType': 'FA', 'datatype': 'bn_session'}
Skipping existing time_FA_bn_session_mou_5_varmean_(timesteps,)
{'datatype': 'bn_trial'}
Skipping existing time_bn_trial_mou_5_varmean_(timesteps,)
{'datatype': 'bn_session'}
Skipping exis

In [27]:
bulk_metrics.plot_metric_bulk_1D(dataDB, ds, 'varmean', 'time', verbose=False, haveTimeLabels=True,
                                 ylim=[0,None], xFunc=lambda m, l: dataDB.get_times())

['varmean', 'time', 'mou_5', 'CR', 'bn_session', 'dset37']
['varmean', 'time', 'mou_6', 'CR', 'bn_session', 'dset45']
['varmean', 'time', 'mou_7', 'CR', 'bn_session', 'dset53']
['varmean', 'time', 'mou_9', 'CR', 'bn_session', 'dset61']
['varmean', 'time', 'mou_5', 'FA', 'bn_session', 'dset39']
['varmean', 'time', 'mou_6', 'FA', 'bn_session', 'dset47']
['varmean', 'time', 'mou_7', 'FA', 'bn_session', 'dset55']
['varmean', 'time', 'mou_9', 'FA', 'bn_session', 'dset63']
['varmean', 'time', 'mou_5', 'Hit', 'bn_session', 'dset33']
['varmean', 'time', 'mou_6', 'Hit', 'bn_session', 'dset41']
['varmean', 'time', 'mou_7', 'Hit', 'bn_session', 'dset49']
['varmean', 'time', 'mou_9', 'Hit', 'bn_session', 'dset57']
['varmean', 'time', 'mou_5', 'Miss', 'bn_session', 'dset35']
['varmean', 'time', 'mou_6', 'Miss', 'bn_session', 'dset43']
['varmean', 'time', 'mou_7', 'Miss', 'bn_session', 'dset51']
['varmean', 'time', 'mou_9', 'Miss', 'bn_session', 'dset59']
['varmean', 'time', 'mou_5', 'CR', 'bn_trial

### Session

In [None]:
calc_metric_session(dataDB, mc, ds, 'varmean', '', 'session', verbose=False, minTrials=30,
                    skipExisting=False, exclQueryLst=exclQueryLst, **argSweepDictSession)

In [None]:
bulk_metrics.scatter_metric_bulk(ds, 'varmean', 'session', verbose=False, xFunc=None, haveRegression=False)

### Conditions

In [None]:
bulk_metrics.barplot_conditions(ds, 'varmean', 'session',
                                verbose=True,
                                trialTypes=['Hit', 'CR'],
                                intervNames=['PRE', 'DEL', 'TEX', 'REW'])

### Test variance across channels for interesting interval

* Average signal over texture presentation interval
* Compute variance over trials for each channel
* Compare channels

In [None]:
calc_metric_mouse(dataDB, mc, ds, 'varmean', 'p', 'channel', verbose=False, minTrials=30,
                  skipExisting=False, exclQueryLst=exclQueryLst, **argSweepDict)

In [None]:
bulk_metrics.plot_metric_bulk(ds, 'varmean', 'channel', yscale='log', verbose=False) # ylim=[0.005,2], dropCols=['cropTime']

# 3. Effective Rank

### ByTime

In [28]:
calc_metric_mouse_delay(dataDB, mc, ds, 'rank_effective', 's', 'time', verbose=False, minTrials=50,
                        metricSettings={'allowBadData': True}, haveDelay=True, skipExisting=False,
                        exclQueryLst=exclQueryLst, **argSweepDictTime)

IntProgress(value=0, description='time', max=40)

{'trialType': 'Hit', 'datatype': 'bn_trial'}
Skipping existing time_Hit_bn_trial_mou_5_rank_effective_(timesteps,)
{'trialType': 'Hit', 'datatype': 'bn_session'}
Skipping existing time_Hit_bn_session_mou_5_rank_effective_(timesteps,)
{'trialType': 'Miss', 'datatype': 'bn_trial'}
Skipping existing time_Miss_bn_trial_mou_5_rank_effective_(timesteps,)
{'trialType': 'Miss', 'datatype': 'bn_session'}
Skipping existing time_Miss_bn_session_mou_5_rank_effective_(timesteps,)
{'trialType': 'CR', 'datatype': 'bn_trial'}
Skipping existing time_CR_bn_trial_mou_5_rank_effective_(timesteps,)
{'trialType': 'CR', 'datatype': 'bn_session'}
Skipping existing time_CR_bn_session_mou_5_rank_effective_(timesteps,)
{'trialType': 'FA', 'datatype': 'bn_trial'}
Skipping existing time_FA_bn_trial_mou_5_rank_effective_(timesteps,)
{'trialType': 'FA', 'datatype': 'bn_session'}
Skipping existing time_FA_bn_session_mou_5_rank_effective_(timesteps,)
{'datatype': 'bn_trial'}
Skipping existing time_bn_trial_mou_5_rank_

In [29]:
bulk_metrics.plot_metric_bulk_1D(dataDB, ds, 'rank_effective', 'time', verbose=False, haveTimeLabels=True,
                                 ylim=[1,None], xFunc=lambda m, l: dataDB.get_times())

['rank_effective', 'time', 'mou_5', 'CR', 'bn_session', 'dset69']
['rank_effective', 'time', 'mou_6', 'CR', 'bn_session', 'dset77']
['rank_effective', 'time', 'mou_7', 'CR', 'bn_session', 'dset85']
['rank_effective', 'time', 'mou_9', 'CR', 'bn_session', 'dset93']
['rank_effective', 'time', 'mou_5', 'FA', 'bn_session', 'dset71']
['rank_effective', 'time', 'mou_6', 'FA', 'bn_session', 'dset79']
['rank_effective', 'time', 'mou_7', 'FA', 'bn_session', 'dset87']
['rank_effective', 'time', 'mou_9', 'FA', 'bn_session', 'dset95']
['rank_effective', 'time', 'mou_5', 'Hit', 'bn_session', 'dset65']
['rank_effective', 'time', 'mou_6', 'Hit', 'bn_session', 'dset73']
['rank_effective', 'time', 'mou_7', 'Hit', 'bn_session', 'dset81']
['rank_effective', 'time', 'mou_9', 'Hit', 'bn_session', 'dset89']
['rank_effective', 'time', 'mou_5', 'Miss', 'bn_session', 'dset67']
['rank_effective', 'time', 'mou_6', 'Miss', 'bn_session', 'dset75']
['rank_effective', 'time', 'mou_7', 'Miss', 'bn_session', 'dset83']


### BySession

In [None]:
calc_metric_session(dataDB, mc, ds, 'rank_effective', '', 'session',
                    verbose=False, minTrials=30,
                    skipExisting=False,
                    exclQueryLst=exclQueryLst,
                    **argSweepDictSession)  # trialTypeNames=[None, 'Hit', 'CR'],

In [None]:
bulk_metrics.scatter_metric_bulk(ds, 'rank_effective', 'session', ylim=[1, None], verbose=False,
                                 xFunc=None, haveRegression=False)

### Conditions

In [None]:
bulk_metrics.barplot_conditions(ds, 'rank_effective', 'session',
                                verbose=True, trialTypes=['Hit', 'CR'],
                                intervNames=dataDB.get_interval_names())

# Total Correlation

In [None]:
# bulk_metrics.metric_mouse_bulk(dataDB, mc, ds, "avg_entropy", 'sp', 'time-channel', verbose=False)
# bulk_metrics.metric_mouse_bulk(dataDB, mc, ds, "avg_entropy", 's', 'time', verbose=False)

bulk_metrics.metric_mouse_bulk(dataDB, mc, ds, "avg_TC", 's', 'time', verbose=False)

In [None]:
bulk_metrics.plot_metric_bulk(dataDB, ds, 'avg_TC', 'time', verbose=True) # ylim=[1,48]

In [None]:
bulk_metrics.plot_TC(dataDB, ds, ylim=None, yscale=None, verbose=True)

### Cleaning up

In [None]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(ds.list_dsets_pd().sort_values(by='datetime'))

In [None]:
ds.delete_by_query(queryDict={"metric" : "rank_effective"}, timestr="2020-11-20 18:00:00")