## Main notebook

In [9]:
''' Dependencies '''
from sklearn.decomposition import PCA
from tqdm.auto import tqdm
import warnings
import pandas as pd
import numpy as np
import scipy
import sys
import statsmodels.api as sm
from tqdm import tqdm as pbar

# my scripts
from pyaldata import * 
import preprocess

import TME
import utils
import importlib
importlib.reload(utils)
importlib.reload(TME)
importlib.reload(preprocess)
from utils import *

# Plotting
# from matplotlib.offsetbox import AnchoredText
import matplotlib.pyplot as plt
import seaborn as sns

%config InlineBackend.figure_format = 'retina'

## Load data and parameterize

In [10]:
''' Parameters '''
OTHER_ARRAY_D = 50 # Distance value to indicate other array

TYPE_ANALYSIS = 'pooled' # alternative: 'm1', 'pmd' #TODO

SESSIONS = ['Chewie_CO_VR_2016-09-09', 'Chewie_CO_VR_2016-09-12', 'Chewie_CO_VR_2016-09-14', 'Chewie_CO_VR_2016-10-06', 
            'Chewie_CO_FF_2016-09-15', 'Chewie_CO_FF_2016-09-21', 'Chewie_CO_FF_2016-10-05', 'Chewie_CO_FF_2016-10-07',
            'Mihili_CO_VR_2014-03-04', 'Mihili_CO_VR_2014-03-06', 'Mihili_CO_FF_2014-02-03', 'Mihili_CO_FF_2014-02-17', 
            'Mihili_CO_FF_2014-02-18', 'Mihili_CO_FF_2014-03-07', 'Mihili_CO_VR_2014-03-03']

THRESHOLDS_VARE = np.round(np.arange(0.2, 0.95, 0.025), 3)
INTERVALS = [(0, 2), (0, 4)] # Fixed within intervals

In [11]:
''' Experimental sessions and electrode maps '''
m1_emap  = localize_elecs(read_cmp(file_path='/Users/Daphne/Data/Chewie Left M1 SN 6250-001474.cmp'),  elecs=range(1,97))
pmd_emap = localize_elecs(read_cmp(file_path='/Users/Daphne/Data/Chewie Left PMd SN 6251-001469.cmp'), elecs=range(1,97))

In [12]:
from scipy.io import loadmat

Surr = scipy.io.loadmat('/Users/Daphne/Data/S.mat')['S']

In [13]:
Surr.shape

(2000, 218, 100)

In [None]:
''' Perform preprocessing and store '''

main_dict = {} # Initialize dictionary for empirical data

for s in pbar(range(len(SESSIONS))):
    main_dict[f'{SESSIONS[s][-10:]}'] = {
        'df' : preprocess.preprocess_data(SESSIONS[s], '/Users/Daphne/Data/'),
    }

  7%|▋         | 1/15 [00:06<01:26,  6.17s/it]

In [10]:
main_dict[s].

dict_keys(['2016-09-09', '2016-09-12', '2016-09-14', '2016-10-06', '2016-09-15', '2016-09-21', '2016-10-05', '2016-10-07', '2014-03-04', '2014-03-06', '2014-02-03', '2014-02-17', '2014-02-18', '2014-03-07', '2014-03-03'])

In [11]:
from scipy.io import savemat, loadmat

X = np.concatenate(main_dict['2016-09-09']['df']['both_rates'].values, axis=0) 

savemat('X_concat_2016-09-09.mat', surr_dict) 

NameError: name 'savemat' is not defined

In [42]:
surr_dict = {} # Initialize dictionary for surrogate data
i = 0

for s in pbar(main_dict.keys()):
    
    # Select trial data for session by idx
    td = main_dict[s]['df']
    X = np.concatenate(td['both_rates'].values, axis=0) 
    
    surr_dict[f'{i}'] = X
    
    i+=1

100%|██████████| 15/15 [00:02<00:00,  6.77it/s]


In [44]:
from scipy.io import savemat, loadmat

savemat('surr_dict.mat', surr_dict) 

## 

In [None]:
''' Trial-concatenate and perform dim reduction all sessions '''

for s in pbar(main_dict.keys()):
    
    # Select trial data for session by idx
    td = main_dict[s]['df']
    
    if TYPE_ANALYSIS == 'pooled':
        X = np.concatenate(td['both_rates'].values, axis=0) 
        N = td.both_rates[0].shape[1]
        
#     elif TYPE_ANALYSIS == 'm1':   
#         X = np.concatenate(td['M1_rates'].values, axis=0)
#         N = td.M1_rates[0].shape[1]

#     elif TYPE_ANALYSIS == 'pmd':  
#         X = np.concatenate(td['PMd_rates'].values, axis=0)
#         N = td.PMd_rates[0].shape[1]
        
    # Generate Surrogates
    X_surr = TME.TensorMaximumEntropy(X)
    
    # True data
    model = PCA(n_components=N)
    model.fit(X)
    main_dict[s]['model'] = model
    main_dict[s]['pcs'] = model.components_.T
   
    # Surrogate
    model_surr = PCA(n_components=N)
    model_surr.fit(X_surr)
    surr_dict[s]['model'] = model_surr
    surr_dict[s]['pcs'] = model_surr.components_.T

In [None]:
np.save('surr_dict.npy', surr_dict) 

## Get dataframes with correlations and spatial distances for real data, surrogates

In [None]:
for s in main_dict.keys():
    main_dict[s]['vare'] = {}
    for th in THRESHOLDS_VARE:
        main_dict[s]['vare'][f'{th}'] = { 'distances_df': {} }
        
for s in surr_dict.keys():
    surr_dict[s]['vare'] = {}
    for th in THRESHOLDS_VARE:
        surr_dict[s]['vare'][f'{th}'] = { 'distances_df': {} }

In [None]:
for s in pbar(main_dict.keys()):
    
    # Get trial data
    td = main_dict[s]['df']
    components_range = [np.argmax(main_dict[s]['model'].explained_variance_ratio_.cumsum() > THRESHOLDS_VARE[i]) for i in range(len(THRESHOLDS_VARE))]
    
    for i, r in enumerate(components_range):

        if r < 2: r += 1 # Can't compute correlation between two values

        L = main_dict[s]['pcs'][:, :r] # Get the first r PCs

        # Get correlations and physical distances
        C, PD, A = compute_stat_and_phys_distances(L, td['M1_unit_guide'][0], td['PMd_unit_guide'][0], m1_emap, pmd_emap)
        
        # Convert to dataframe
        df = pd.DataFrame(data={'correlation': C, 'distance': PD, 'on array': A})
        df['category'] = df['distance'].apply(lambda d: 'same electrode' if d == 0 else ('same array' if d < OTHER_ARRAY_D else ('other array')))
        df['within distance'] = pd.cut(df['distance'], bins=[-0.1, 0.001, 2.01, 4.01, OTHER_ARRAY_D], labels=['0', '(0, 2]','(2, 4]', '(4, inf)'])
        df['Type'] = 'Actual'
        
        # Store dataframe in main dictionary
        main_dict[s]['vare'][f'{THRESHOLDS_VARE[i]}']['distances_df'] = df

In [None]:
for s in pbar(surr_dict.keys()):
    
    # Get trial data
    td = main_dict[s]['df']
    components_range = [np.argmax(surr_dict[s]['model'].explained_variance_ratio_.cumsum() > THRESHOLDS_VARE[i]) for i in range(len(THRESHOLDS_VARE))]

    for i, r in enumerate(components_range):

        if r < 2: r += 1 # Can't compute correlation between two values

        L_surr = surr_dict[s]['pcs'][:, :r] # Get the first r PCs

        # Get correlations and physical distances
        C, PD, A = compute_stat_and_phys_distances(L_surr, td['M1_unit_guide'][0], td['PMd_unit_guide'][0], m1_emap, pmd_emap)
        
        # Convert to dataframe
        df = pd.DataFrame(data={'correlation': C, 'distance': PD, 'on array': A})
        df['category'] = df['distance'].apply(lambda d: 'same electrode' if d == 0 else ('same array' if d < OTHER_ARRAY_D else ('other array')))
        df['within distance'] = pd.cut(df['distance'], bins=[-0.1, 0.001, 2.01, 4.01, OTHER_ARRAY_D], labels=['0', '(0, 2]','(2, 4]', '(4, inf)'])
        df['Type'] = 'Surrogate'
        
        # Store dataframe in surrogate dictionary
        surr_dict[s]['vare'][f'{THRESHOLDS_VARE[i]}']['distances_df'] = df

In [None]:
for s in pbar(main_dict.keys()):
    
    for v in main_dict[s]['vare'].keys(): 
        
        # Empirical and surrogate data
        variants = {
          'Actual' : main_dict[s]['vare'][f'{v}']['distances_df'],
          'Surrogate' : surr_dict[s]['vare'][f'{v}']['distances_df'],
        }
        # Dummy functions
        int_dum = lambda df, lb, ub: df['distance'].apply(lambda x: 1 if lb < x <= ub else 0)
        on_arr = lambda df, arr: df['on array'].apply(lambda x: 1 if x == arr else 0)
        same_ele = lambda df: df['distance'].apply(lambda x: 1 if x == 0 else 0)
        same_arr = lambda df: df['distance'].apply(lambda x: 1 if 0 < x < OTHER_ARRAY_D else 0)

        # Analyses methods
        analyses = {
          'single_cat' : {
            'fixed cols' : {
              'OA Constant' : lambda df: [1] * len(df),
              'SE (d = 0)' : same_ele,
              'SA (d exists)' : same_arr,
            },
            'variable cols' : {}
          },
          'within' : {
            'fixed cols' : {
              'M1 Constant' : lambda df: on_arr(df, 'M1'),
              'OA Constant' : lambda df: on_arr(df, 'OA'),
              'PMd Constant' : lambda df: on_arr(df, 'PMd'),
              'SE (d = 0)' : same_ele
            },
            'variable cols' : {
              'd in ' : lambda df, lb, ub: int_dum(df, lb, ub),
            },
          },
          'within_separate' : {
            'fixed cols' : {
              'M1 Constant' : lambda df: on_arr(df, 'M1'),
              'OA Constant' : lambda df: on_arr(df, 'OA'),
              'PMd Constant' : lambda df: on_arr(df, 'PMd'),
              'M1 SE (d = 0)' : lambda df: on_arr(df, 'M1') * same_ele(df),
              'PMd SE (d = 0)' : lambda df: on_arr(df, 'PMd') * same_ele(df),
            },
            'variable cols' : {
              'M1 d in ' : lambda df, lb, ub: int_dum(df, lb, ub) * on_arr(df, 'M1'),
              'PMd d in ' : lambda df, lb, ub: int_dum(df, lb, ub) * on_arr(df, 'PMd'),
            }
          },
        }
        
        for study, df in variants.items():
            
            for analysis, cols_dict in analyses.items():
                df_X = pd.DataFrame()
                for col_name, col_func in cols_dict['fixed cols'].items():
                    df_X[col_name] = col_func(df)
                for (lb, ub) in INTERVALS:
                    for col_name, col_func in cols_dict['variable cols'].items():
                        df_X[col_name + f'({lb:.1f}, {ub:.1f}]'] = col_func(df, lb, ub)
                df_X = df_X.reindex(sorted(df_X.columns), axis=1)
                res = sm.OLS(df.correlation, df_X, hasconst=True).fit()
                title = f'{study}_{analysis}'
                
                if study == 'Actual': main_dict[s]['vare'][f'{v}'][title] = res
                elif study == 'Surrogate': surr_dict[s]['vare'][f'{v}'][title] = res

## Testing

In [None]:
np.save('surr_dict.npy', surr_dict) 

In [None]:
#main_dict[s]['vare']['0.2']['Actual_single_cat'].params.values

In [None]:
# coeffs = []

# for i, v in enumerate(main_dict[s]['vare'].keys()):
    
#     coeffs.append(main_dict[s]['vare'][f'{v}']['Actual_single_cat'].params.values)

In [None]:
#main_dict.keys()