# Ripple-associated CG spectral analysis
https://github.com/Eden-Kramer-Lab/spectral_connectivity/blob/master/examples/Intro_tutorial.ipynb

<br>

### Imports

In [1]:
import os
import re
import glob
import pandas as pd
import numpy as np
import seaborn as sns
from tqdm import tqdm
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
from spectral_connectivity import Multitaper, Connectivity

warnings.filterwarnings('ignore')

<br>

### Open datasets

In [2]:
main_path = 'PreProcessedData'
cg_data = pd.read_csv(os.path.join(main_path, 'cg_data.csv'), index_col=False)
ripple_data = pd.read_csv(os.path.join(main_path, 'cg_analysis_ripple_library.csv'), index_col=False)

In [3]:
cg_data.head()

Unnamed: 0,ripple_nr,timestamp,TT1,TT2,TT3,TT4,TT5,TT6,TT7,TT8,TT9,TT10,TT11,TT12,TT13,TT14,start_time,end_time,phase,relative_timestamp
0,0,83.25725,151.905,105.105,158.73,191.685,-38.61,121.485,115.245,146.835,149.76,147.225,8.97,73.905,37.83,70.59,83.75725,83.81275,Sample,-0.5
1,0,83.25775,144.495,125.19,151.515,148.98,-86.775,116.415,108.42,145.47,145.08,130.455,7.02,70.785,44.07,67.275,83.75725,83.81275,Sample,-0.4995
2,0,83.25825,76.44,95.94,108.42,106.47,-139.62,68.25,33.15,111.735,109.59,75.855,-79.755,15.405,12.87,23.01,83.75725,83.81275,Sample,-0.499
3,0,83.25875,0.0,17.16,54.99,18.525,-220.155,-26.715,-52.455,40.365,65.52,-0.78,-129.87,-38.61,-71.955,-40.365,83.75725,83.81275,Sample,-0.4985
4,0,83.25925,-20.28,-10.92,28.665,15.795,-262.47,-54.015,-74.685,16.38,18.915,-39.585,-130.65,-69.615,-82.485,-88.14,83.75725,83.81275,Sample,-0.498


In [4]:
cg_data.groupby(['phase']).ripple_nr.nunique()

phase
Delay                 143
ITI                   104
Sample                200
Test (Past-choice)    177
Test (Pre-choice)       4
Name: ripple_nr, dtype: int64

<br>

## Prepare data for multitaper

"If we have three dimensions, dimension 1 is time, dimension 2 is trials, and dimensions 3 is signals. It is important to know note that dimension 2 now has a different meaning in that it represents trials and not signals now. Dimension 3 is now the signals dimension. We will show an example of this later."

time_series : array, shape (n_time_samples, n_trials, n_signals)

In [5]:
cg_data.loc[(cg_data['phase']=='Test (Pre-choice)')|(cg_data['phase']=='Test (Past-choice)'), 'phase']='Test'

In [6]:
# Split data by phase

### WORK WITh FIRST RIPPLES WITh SAMPLE RATE AT 2K FOR NOW
##test = cg_data[cg_data.ripple_nr.between(0,74)]
sample_df = cg_data.loc[cg_data['phase']=='Sample'].drop(['phase'], axis=1)
delay_df = cg_data.loc[cg_data['phase']=='Delay'].drop(['phase'], axis=1)
test_df = cg_data.loc[cg_data['phase']=='Test'].drop(['phase'], axis=1)
iti_df = cg_data.loc[cg_data['phase']=='ITI'].drop(['phase'], axis=1)

In [7]:
def prepare_for_multitaper(df, n_tts):
    '''
    Rearrange and prepare data for multitaper.
    df, Dataframe. 
    n_tts, int - Number of tetrodes (may vary with dataset)
    '''
    
    # Normalise relative timestamp to correct small jitters
    #df['relative_timestamp']=df.relative_timestamp.round(8)

    # Drop unecessary columns
    to_drop = ['start_time', 'end_time','timestamp']
    df = df.drop(to_drop, axis=1)

    # Create TT list
    tts = ['TT{}'.format(x) for x in range(1,n_tts+1)]
    
    # Re-shape dataset into array
    
    reshaped = df.pivot(
        index="relative_timestamp", 
        columns="ripple_nr", 
        values=tts,
    ).values.reshape(
        (
        df['relative_timestamp'].unique().size,
        df['ripple_nr'].unique().size,
        n_tts
        )
    )
    
    return reshaped

In [8]:
sample_array = prepare_for_multitaper(sample_df, 14)
iti_array = prepare_for_multitaper(iti_df, 14)
test_array = prepare_for_multitaper(test_df, 14)
delay_array = prepare_for_multitaper(delay_df, 14)

In [9]:
print( sample_array.shape, delay_array.shape, test_array.shape, iti_array.shape)

(43531, 200, 14) (36653, 143, 14) (36917, 181, 14) (37045, 104, 14)


<br>

### Create multitaper object

In [27]:
# Number of samples per time unit the signal(s) are recorded at
sampling_frequency = 2001    

# Controls the duration of the segment of time the transformation is computed on (seconds)
time_window_duration = 0.1

# Control how far the time window is slid (overlap).
# Setting the step to smaller than the time window duration will make the time windows overlap
time_window_step = 0.02

# Controls the frequency resolution of the Fourier transformed signal.
# Setting this parameter will define the default number of tapers used in the transform 
# (number of tapers = 2 * time_halfbandwidth_product - 1.).
 # Need to study this one although setting the above automatically changes this one
#time_halfbandwidth_product = 10 

In [28]:
sample_multitaper = Multitaper(
    sample_array, 
    sampling_frequency=sampling_frequency, 
    time_window_duration = time_window_duration,
    time_window_step = time_window_step#,
    #time_halfbandwidth_product= time_halfbandwidth_product
)

sample_multitaper

Multitaper(sampling_frequency=2001, time_halfbandwidth_product=3,
           time_window_duration=0.1, time_window_step=0.02,
           detrend_type='constant', start_time=0, n_tapers=5)

In [29]:
sample_multitaper.n_time_samples_per_window

200

In [30]:
sample_multitaper.frequencies

array([    0.   ,    10.005,    20.01 ,    30.015,    40.02 ,    50.025,
          60.03 ,    70.035,    80.04 ,    90.045,   100.05 ,   110.055,
         120.06 ,   130.065,   140.07 ,   150.075,   160.08 ,   170.085,
         180.09 ,   190.095,   200.1  ,   210.105,   220.11 ,   230.115,
         240.12 ,   250.125,   260.13 ,   270.135,   280.14 ,   290.145,
         300.15 ,   310.155,   320.16 ,   330.165,   340.17 ,   350.175,
         360.18 ,   370.185,   380.19 ,   390.195,   400.2  ,   410.205,
         420.21 ,   430.215,   440.22 ,   450.225,   460.23 ,   470.235,
         480.24 ,   490.245,   500.25 ,   510.255,   520.26 ,   530.265,
         540.27 ,   550.275,   560.28 ,   570.285,   580.29 ,   590.295,
         600.3  ,   610.305,   620.31 ,   630.315,   640.32 ,   650.325,
         660.33 ,   670.335,   680.34 ,   690.345,   700.35 ,   710.355,
         720.36 ,   730.365,   740.37 ,   750.375,   760.38 ,   770.385,
         780.39 ,   790.395,   800.4  ,   810.405, 

<br>

### Create connectivity object
The Connectivity class computes the frequency-domain connectivity measures from the Fourier coeffcients.

In [31]:
# expectation_type defines the dimensions of the cross-spectrum that are averaged over.
# Can be over time, trials, tapers or any combination of two or three. Default is by trials and tapers
expectation_type = "trials_tapers"

# Blocks is used to process metrics by array blocks is RAM is too expensive
blocks=3

In [None]:
conn = Connectivity.from_multitaper(
    sample_multitaper, 
    expectation_type=expectation_type, 
    blocks=blocks
)

In [None]:
power = (conn.power())
power.shape #(n time winodws, frequencies, n_tetrodes)

### Power spectral density plot - Heatmap

In [3]:
# PSD plot - Averaged across tetrodes (14 tetrodes = 1 response)   -- PEDING TO AFTER CHECKING VARIABILITY
# 1 plot per SWR location