### Configuration

In [1]:
import os
import re
import numpy as np
import pandas as pd
import datetime

import pandas as pd
from scipy.stats import spearmanr
#from dcor import distance_correlation
from pingouin import distance_corr
from statsmodels.stats.multitest import fdrcorrection

import utils__config

In [2]:
os.chdir(utils__config.working_directory)
os.getcwd()

'g:\\My Drive\\Residency\\Research\\Lab - Damisah\\Project - Sleep'

### Parameters

In [3]:
hypno_epoch_path = 'Cache/Subject01/S01_hypno_epochs_30s.csv' 
spike_epoch_path = 'Cache/Subject01/S01_spike_epochs_30s.csv'
hyp_out_path = 'Cache/Subject01/S01_hyp_correlation.csv'

# hypno_epoch_path = 'Cache/Subject02/Apr26/S02_hypno_epochs_30s.csv'
# spike_epoch_path = 'Cache/Subject02/Apr26/S02_spike_epochs_30s.csv'
# hyp_out_path = 'Cache/Subject02/Apr26/S02_hyp_correlation.csv'

# hypno_epoch_path = 'Cache/Subject02/Apr27/S02_hypno_epochs_30s.csv'
# spike_epoch_path = 'Cache/Subject02/Apr27/S02_spike_epochs_30s.csv'
# hyp_out_path = 'Cache/Subject02/Apr27/S02_hyp_correlation.csv'

### Load Data

In [4]:
# Load data
hypno = pd.read_csv(hypno_epoch_path)
spikes = pd.read_csv(spike_epoch_path)

# Merge with hypnogram to get sleep stage
spikes = spikes.merge(hypno, on = 'epoch')
spikes['sw_stage'] = np.where(spikes['stage'].isin([2, 3]), 1, 0)

### Correlation with sleep stage by epoch

In [5]:
corr = pd.DataFrame()

for unit in spikes['unit_id'].unique():

    # Get laterality and region for the unit
    # (we need to use the .iloc[0] function because the index
    # is not reset to 0 in these slices and we don't know what
    # those index numbers will start with...)
    unit_laterality = spikes[spikes['unit_id'] == unit]['unit_laterality'].iloc[0]
    unit_region = spikes[spikes['unit_id'] == unit]['unit_region'].iloc[0]

    # Select only the relevant Spikes data
    spikes_temp = spikes[spikes['unit_id'] == unit][['epoch', 'fr', 'sw_stage']]

    # Spearman's Rho (non-linear correlation; old and reliable)
    r, r_p = spearmanr(spikes_temp['sw_stage'], spikes_temp['fr'])
    
    # Distance Correlation (non-linear correlation; new and shiny)
    # (The implementation from dcor does not return a p-value unless
    #  you use one of their more complicated functions)
    d, d_p = distance_corr(x = spikes_temp['sw_stage'], 
                           y = spikes_temp['fr'],
                           alternative = 'two-sided',
                           n_boot = 10, # or 1000
                           seed = 42)

    # Append to dataframe
    unit_corr = pd.DataFrame({'unit_id' : [unit],
                              'unit_region' : [unit_region],
                              'unit_laterality' : [unit_laterality],
                              'rho' : [r],
                              'rho_p_value' : [r_p],
                              'dcor' : [d],
                              'dcor_p_value' : [d_p]})

    corr = pd.concat([corr, unit_corr])

corr['fdr_rho_p_value'] = fdrcorrection(corr['rho_p_value'], alpha = 0.05, method = 'indep')[1]
corr['fdr_dcor_p_value'] = fdrcorrection(corr['dcor_p_value'], alpha = 0.05, method = 'indep')[1]

corr.to_csv(hyp_out_path)