# Depression and Anxiety classification with CSP spatial filter

Based on the study by [Cavanagh et al. (2019)](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6515849/)

Import packages

In [1]:
import io
import mne
import copy
import glob
import array
import matplotlib
import numpy as np
import pandas as pd
import sklearn.metrics
import seaborn as sns
import scipy.io as sio
import plotly.express as px
import matplotlib.pyplot as plt


from itertools import chain
from sklearn.svm import SVC
from scipy.io import loadmat
from sklearn import set_config

from sklearn import preprocessing
from sklearn.pipeline import Pipeline
from sklearn.metrics import roc_auc_score, classification_report
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV, ShuffleSplit
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import permutation_test_score


import numpy as np
import matplotlib.pyplot as plt

from sklearn.pipeline import Pipeline
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import ShuffleSplit, cross_val_score, RepeatedStratifiedKFold
from sklearn.utils import resample

from mne import Epochs, pick_types, events_from_annotations
from mne.channels import make_standard_montage
from mne.io import concatenate_raws, read_raw_edf
from mne.datasets import eegbci
from mne.decoding import CSP

from mne.preprocessing import Xdawn
from mne.decoding import Vectorizer

from mne.decoding import UnsupervisedSpatialFilter

from sklearn.decomposition import PCA, FastICA

from scipy import stats

# parameters for plotting
plt.rcParams["figure.figsize"] = (10,7)

import seaborn as sns
sns.set_theme(style="whitegrid", palette="deep")

from sklearn.metrics import r2_score
from scipy.stats import pearsonr
from scipy.stats import ttest_ind
from scipy.stats import ttest_rel

Constatnts

In [2]:
random_state = 42

## Load questionnarie data

In [11]:
data = pd.read_csv('data/sonata_data/questionnaires.csv', dtype={'Demo_kod': object})

In [12]:
ids_autoreject = np.load('data/sonata_data/ids/gng_fla_common_ids_autoreject.npy')
ids_autoreject.sort()
len(ids_autoreject)

218

In [13]:
ids_sonata = np.load('data/sonata_data/ids/gng_fla_common_ids.npy')
ids_sonata.sort()
len(ids_sonata)

190

In [14]:
data

Unnamed: 0,Demo_kod,BDI,BDI_cognitive,BDI_affective,BDI_affect_cog,BDI_somatic,STAI
0,000,6,0.17,0.67,0.83,0.50,40
1,001,17,0.67,1.00,1.67,0.83,52
2,002,4,0.33,0.33,0.67,0.00,50
3,003,1,0.00,0.00,0.00,0.17,32
4,004,5,0.33,0.33,0.67,0.00,46
...,...,...,...,...,...,...,...
220,220,7,0.33,0.33,0.67,0.33,38
221,221,27,0.67,1.67,2.33,1.83,54
222,222,12,0.17,0.00,0.17,1.50,49
223,223,0,0.00,0.00,0.00,0.00,31


In [72]:
ids = ids_sonata

In [73]:
data_df = data[data['Demo_kod'].isin(ids)]

In [74]:
dep = data_df[(data_df['BDI'] > 13) & (data_df['STAI'] > 41)]
len(dep)

61

In [75]:
ctrl_dep = data_df[(data_df['BDI'] <= 13) & (data_df['STAI'] > 41)]
len(ctrl_dep)

62

In [76]:
anx = data_df[(data_df['BDI'] <= 13) & (data_df['STAI'] > 42)]
len(anx)

57

In [77]:
ctrl_anx = data_df[(data_df['BDI'] <= 13) & (data_df['STAI'] < 41)]
len(ctrl_anx)

59

#### Depression

- TAI

In [78]:
control_depression_tai = ctrl_dep['STAI'].to_numpy()
depression_tai = dep['STAI'].to_numpy()

In [79]:
control_depression_tai

array([50, 46, 47, 50, 45, 46, 43, 55, 45, 51, 49, 48, 51, 45, 43, 45, 48,
       46, 45, 43, 42, 53, 48, 50, 47, 46, 50, 51, 42, 43, 45, 45, 44, 47,
       47, 47, 45, 51, 43, 43, 46, 51, 51, 46, 55, 45, 42, 49, 49, 46, 49,
       53, 49, 53, 52, 45, 53, 44, 42, 42, 48, 45])

In [80]:
depression_tai

array([52, 56, 58, 45, 42, 42, 56, 66, 53, 58, 58, 59, 50, 68, 52, 49, 61,
       47, 55, 56, 62, 53, 49, 51, 45, 57, 58, 58, 50, 49, 43, 70, 56, 43,
       62, 54, 58, 62, 56, 56, 58, 57, 64, 53, 54, 46, 56, 57, 54, 47, 51,
       57, 54, 49, 45, 56, 55, 50, 50, 62, 57])

In [69]:
print(f"Depression group: mean TAI score: {depression_tai.mean()} SD = {depression_tai.std()}")
print(f"Control group: mean TAI score: {control_depression_tai.mean()} SD = {control_depression_tai.std()}")

Depression group: mean TAI score: 54.35616438356164 SD = 6.086972081721993
Control group: mean TAI score: 47.18055555555556 SD = 3.5052594962145767


In [71]:
t_value, p_value = ttest_rel(depression_tai[1:], control_depression_tai)
print(f"t({len(control_depression_tai) -1}) = {t_value}, p = {p_value}")

t(71) = 8.167449780239213, p = 8.180078726635595e-12


In [81]:
print(f"Depression group: mean TAI score: {depression_tai.mean()} SD = {depression_tai.std()}")
print(f"Control group: mean TAI score: {control_depression_tai.mean()} SD = {control_depression_tai.std()}")

Depression group: mean TAI score: 54.21311475409836 SD = 6.196547833463828
Control group: mean TAI score: 47.17741935483871 SD = 3.4572233570121775


In [82]:
t_value, p_value = ttest_rel(depression_tai, control_depression_tai[1:])
print(f"t({len(control_depression_tai) -1}) = {t_value}, p = {p_value}")

t(61) = 8.139698347152219, p = 2.8317060199842808e-11


- BDI

In [83]:
control_depression_bdi = ctrl_dep['BDI'].to_numpy()
depression_bdi = dep['BDI'].to_numpy()

In [84]:
control_depression_bdi

array([ 4,  5,  9,  9,  9, 10,  6,  8, 13, 10, 12,  8, 11, 11,  6,  5,  7,
        1,  8,  2,  5, 11,  4, 12,  3, 11,  7,  6, 11, 10,  3, 11,  0,  9,
        3, 11,  2,  1,  2,  0,  2,  6, 11, 11,  6,  7, 11,  7,  4,  3,  2,
        9,  6,  5,  5,  7,  8, 11,  3, 13, 11,  8])

In [85]:
depression_bdi

array([17, 14, 17, 14, 16, 18, 21, 47, 18, 16, 17, 22, 37, 39, 20, 25, 38,
       15, 16, 19, 21, 19, 22, 28, 14, 25, 34, 17, 14, 21, 16, 48, 16, 20,
       44, 19, 30, 15, 26, 27, 14, 19, 25, 19, 29, 30, 25, 22, 26, 15, 19,
       32, 27, 15, 14, 19, 33, 29, 16, 38, 17])

In [30]:
print(f"Depression group: mean BDI score: {depression_bdi.mean()} SD = {depression_bdi.std()}")
print(f"Control group: mean BDI score: {control_depression_bdi.mean()} SD = {control_depression_bdi.std()}")

Depression group: mean BDI score: 23.71232876712329 SD = 8.78741231107252
Control group: mean BDI score: 7.055555555555555 SD = 3.66245549418878


In [46]:
t_value, p_value = ttest_rel(depression_bdi[1:], control_depression_bdi)
print(f"t({len(depression_bdi[1:]) -1}) = {t_value}, p = {p_value}")

t(71) = 13.939202326958592, p = 5.2141707752269915e-22


In [86]:
print(f"Depression group: mean BDI score: {depression_bdi.mean()} SD = {depression_bdi.std()}")
print(f"Control group: mean BDI score: {control_depression_bdi.mean()} SD = {control_depression_bdi.std()}")

Depression group: mean BDI score: 23.0327868852459 SD = 8.606073366561992
Control group: mean BDI score: 6.967741935483871 SD = 3.578464898553135


In [87]:
t_value, p_value = ttest_rel(depression_bdi, control_depression_bdi[1:])
print(f"t({len(depression_bdi[1:]) -1}) = {t_value}, p = {p_value}")

t(59) = 12.933271201067923, p = 5.28982380150104e-19


#### Anxiety

- TAI

In [88]:
control_anx_tai = ctrl_anx['STAI'].to_numpy()
anx_tai = anx['STAI'].to_numpy()

In [89]:
control_anx_tai

array([40, 32, 40, 40, 40, 38, 34, 39, 28, 40, 39, 33, 26, 32, 34, 35, 30,
       39, 30, 31, 29, 35, 37, 40, 31, 38, 31, 35, 39, 26, 38, 38, 40, 36,
       35, 38, 32, 35, 39, 39, 28, 29, 30, 34, 32, 35, 40, 30, 32, 35, 34,
       38, 37, 35, 37, 40, 37, 38, 31])

In [90]:
anx_tai

array([50, 46, 47, 50, 45, 46, 43, 55, 45, 51, 49, 48, 51, 45, 43, 45, 48,
       46, 45, 43, 53, 48, 50, 47, 46, 50, 51, 43, 45, 45, 44, 47, 47, 47,
       45, 51, 43, 43, 46, 51, 51, 46, 55, 45, 49, 49, 46, 49, 53, 49, 53,
       52, 45, 53, 44, 48, 45])

In [35]:
print(f"Anxiety group: mean TAI score: {anx_tai.mean()} SD = {anx_tai.std()}")
print(f"Control group: mean TAI score: {control_anx_tai.mean()} SD = {control_anx_tai.std()}")

Anxiety group: mean TAI score: 47.738461538461536 SD = 3.2262239088536884
Control group: mean TAI score: 34.78125 SD = 4.192510994320706


In [45]:
t_value, p_value = ttest_rel(anx_tai[1:], control_anx_tai)
print(f"t({len(anx_tai[1:]) -1}) = {t_value}, p = {p_value}")

t(63) = 19.156404243278974, p = 5.7518200739571165e-28


In [91]:
print(f"Anxiety group: mean TAI score: {anx_tai.mean()} SD = {anx_tai.std()}")
print(f"Control group: mean TAI score: {control_anx_tai.mean()} SD = {control_anx_tai.std()}")

Anxiety group: mean TAI score: 47.63157894736842 SD = 3.2315960911597372
Control group: mean TAI score: 34.96610169491525 SD = 4.012548544275399


In [92]:
t_value, p_value = ttest_rel(anx_tai, control_anx_tai[2:])
print(f"t({len(anx_tai[1:]) -1}) = {t_value}, p = {p_value}")

t(55) = 19.29026161025925, p = 2.0932656225986029e-26


- BDI

In [93]:
control_anx_bdi = ctrl_anx['BDI'].to_numpy()
anx_bdi = anx['BDI'].to_numpy()

In [94]:
print(control_anx_bdi)
print(anx_bdi)

[ 6  1 11  0  1  5 10  6  7  6  3  4  1  0  8  3  8  7  1  4  2  5  5  3
  1  5  0  6  8  0 10  9  7  1  2  4  4  8  7 11  0  5  0  2  3  5  5  0
  1  3  5  6 12  6  0 11  4  7  0]
[ 4  5  9  9  9 10  6  8 13 10 12  8 11 11  6  5  7  1  8  2 11  4 12  3
 11  7  6 10  3 11  0  9  3 11  2  1  2  0  2  6 11 11  6  7  7  4  3  2
  9  6  5  5  7  8 11 11  8]


In [39]:
print(f"Anxiety group: mean BDI score: {anx_bdi.mean()} SD = {anx_bdi.std()}")
print(f"Control group: mean BDI score: {control_anx_bdi.mean()} SD = {control_anx_bdi.std()}")

Anxiety group: mean BDI score: 7.0 SD = 3.6584990617212663
Control group: mean BDI score: 4.40625 SD = 3.334098219534032


In [44]:
t_value, p_value = ttest_rel(anx_bdi[1:], control_anx_bdi)
print(f"t({len(anx_bdi[1:]) -1}) = {t_value}, p = {p_value}")

t(63) = 3.7337277013803436, p = 0.0004078572756507282


In [95]:
print(f"Anxiety group: mean BDI score: {anx_bdi.mean()} SD = {anx_bdi.std()}")
print(f"Control group: mean BDI score: {control_anx_bdi.mean()} SD = {control_anx_bdi.std()}")

Anxiety group: mean BDI score: 6.824561403508772 SD = 3.5149944823771615
Control group: mean BDI score: 4.491525423728813 SD = 3.3414341859215426


In [96]:
t_value, p_value = ttest_rel(anx_bdi, control_anx_bdi[2:])
print(f"t({len(anx_bdi[1:]) -1}) = {t_value}, p = {p_value}")

t(55) = 3.179354788277148, p = 0.002405215027405793


---

In [4]:
data['disorder'] = 0
for i in range(len(data)):
    if data['BDI'][i] != '#NULL!' and data['BDI'][i] >= 13:
        data['disorder'][i] = 1
    else:
        data['disorder'][i] = 0

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['disorder'][i] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['disorder'][i] = 1


In [5]:
data

Unnamed: 0,id,SCID,SCID_note,sex,age,BDI,BDI_cog,BDI_aff,BDI_som,TAI,...,loconf_rt,pos_rt,neg_rt,hiconf_pos_rt,loconf_pos_rt,hiconf_neg_rt,loconf_neg_rt,TST_aG,TST_aL,disorder
0,507,99.0,,1,19,0.0,0.00,0.00,0.00,23.0,...,753.41,722.02,828.73,728.22,721.10,905.28,785.73,0.12,0.00,0
1,508,99.0,,1,18,4.0,0.00,0.67,0.17,47.0,...,932.63,772.36,986.75,771.20,896.43,1202.03,968.84,0.28,0.93,0
2,509,99.0,,1,18,7.0,0.17,0.00,0.67,44.0,...,739.20,739.28,830.53,794.16,736.67,830.53,741.74,0.07,0.05,0
3,510,99.0,,1,19,1.0,0.00,0.00,0.17,27.0,...,1239.09,1231.04,1500.54,1113.41,1123.20,1035.47,1354.97,0.37,0.63,0
4,511,99.0,,2,22,1.0,0.00,0.00,0.17,23.0,...,1527.80,1908.43,1305.88,1885.73,1731.81,1297.05,1323.80,0.95,0.00,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
116,624,1.0,,1,20,23.0,1.33,1.00,0.67,60.0,...,696.15,685.56,799.69,764.35,645.86,843.43,746.44,0.65,0.81,1
117,625,2.0,subsyndromal current,1,19,16.0,0.67,1.00,0.83,60.0,...,1149.39,1130.02,1153.62,1159.48,1116.37,1139.42,1182.42,0.69,1.00,1
118,626,1.0,,1,18,14.0,0.33,1.00,0.83,41.0,...,1805.77,1878.83,1732.71,1071.52,1878.83,1780.84,1732.71,0.70,0.38,1
119,627,2.0,,2,19,30.0,1.00,1.33,2.17,47.0,...,2131.00,2065.05,2270.20,2167.89,1941.75,2261.39,2320.25,0.10,0.61,1


In [6]:
ids = data['id'][:].astype(int)

## Load EEG data

In [18]:
ctrl_dep = [data.iloc[part] for part in range(len(data)) if data['BDI'][part] <= 13 and data['TAI'][part] > 28]
ctrl_dep = pd.DataFrame(ctrl_dep)
len(ctrl_dep)

47

In [19]:
ctrl_dep = ctrl_dep[(ctrl_dep['id'] != 599) & (ctrl_dep['id'] != 600)]
len(ctrl_dep)

45

In [20]:
dep = [data.iloc[part] for part in range(len(data)) if data['BDI'][part] > 13.0 and data['TAI'][part] > 28]
dep = pd.DataFrame(dep)
len(dep)

45

In [21]:
anx = [data.iloc[part] for part in range(len(data)) if (data['TAI'][part] > 31) and (data['BDI'][part] < 7)]
anx = pd.DataFrame(anx)
len(anx)

31

In [22]:
anx = anx[(anx['id'] != 599) & (anx['id'] != 600)]
len(anx)

29

In [23]:
ctrl_anx = [data.iloc[part] for part in range(len(data)) if (data['TAI'][part] < 29) and (data['BDI'][part] < 7)]
ctrl_anx = pd.DataFrame(ctrl_anx)
len(ctrl_anx)

29

In [17]:
# ctrl_dep = [data.iloc[part] for part in range(len(data)) if data['BDI'][part] <= 7 and data['TAI'][part] > 28]
# ctrl_dep = pd.DataFrame(ctrl_dep)
# len(ctrl_dep)

# dep = [data.iloc[part] for part in range(len(data)) if data['BDI'][part] > 13 and data['TAI'][part] < 60][:29]
# dep = pd.DataFrame(dep)
# len(dep)

# anx = [data.iloc[part] for part in range(len(data)) if (data['TAI'][part] > 31) and (data['BDI'][part] < 7)]
# anx = pd.DataFrame(anx)
# len(anx)

# anx = anx[(anx['id'] != 599) & (anx['id'] != 600)]
# len(anx)

# ctrl_anx = [data.iloc[part] for part in range(len(data)) if (data['TAI'][part] < 29) and (data['BDI'][part] < 7)]
# ctrl_anx = pd.DataFrame(ctrl_anx)
# len(ctrl_anx)

### Test statistical differences between gropus

#### Depression

- TAI

In [24]:
control_depression_tai = ctrl_dep['TAI'].to_numpy()
depression_tai = dep['TAI'].to_numpy()

In [25]:
control_depression_tai

array([47., 44., 37., 36., 36., 40., 30., 30., 38., 30., 30., 31., 33.,
       34., 33., 42., 37., 36., 37., 37., 32., 38., 29., 36., 30., 34.,
       29., 29., 38., 33., 34., 38., 29., 35., 33., 38., 30., 31., 31.,
       36., 34., 36., 30., 35., 57.])

In [26]:
depression_tai

array([51., 69., 54., 62., 59., 48., 56., 50., 38., 52., 59., 60., 57.,
       65., 60., 61., 50., 60., 65., 52., 57., 52., 47., 63., 57., 58.,
       58., 58., 63., 58., 58., 68., 48., 57., 53., 50., 64., 44., 41.,
       62., 60., 60., 41., 47., 56.])

In [27]:
print(f"Depression group: mean TAI score: {depression_tai.mean()} SD = {depression_tai.std()}")
print(f"Control group: mean TAI score: {control_depression_tai.mean()} SD = {control_depression_tai.std()}")

Depression group: mean TAI score: 55.733333333333334 SD = 7.078606265831337
Control group: mean TAI score: 34.955555555555556 SD = 5.261835571380667


In [28]:
t_value, p_value = ttest_rel(depression_tai, control_depression_tai)
print(f"t({len(control_depression_tai) -1}) = {t_value}, p = {p_value}")

t(44) = 16.908778593341868, p = 7.223361449183723e-21


- BDI

In [29]:
control_depression_bdi = ctrl_dep['BDI'].to_numpy()
depression_bdi = dep['BDI'].to_numpy()

In [30]:
control_depression_bdi

array([ 4.,  7.,  5.,  5.,  1.,  6.,  0.,  1.,  3.,  2.,  0.,  1.,  1.,
        1.,  1.,  1.,  5.,  2.,  2.,  2.,  0.,  1.,  0.,  0.,  1.,  0.,
        5.,  2.,  3.,  2.,  4.,  1.,  1.,  2.,  4.,  5.,  0.,  1.,  4.,
        3.,  3.,  3.,  0.,  1., 13.])

In [31]:
depression_bdi

array([29., 25., 27., 27., 24., 18., 24., 28., 22., 18., 22., 29., 22.,
       30., 27., 19., 20., 19., 27., 21., 22., 20., 20., 28., 23., 26.,
       15., 15., 25., 28., 19., 30., 16., 18., 24., 23., 27., 17., 14.,
       19., 23., 16., 14., 30., 19.])

In [32]:
print(f"Depression group: mean BDI score: {depression_bdi.mean()} SD = {depression_bdi.std()}")
print(f"Control group: mean BDI score: {control_depression_bdi.mean()} SD = {control_depression_bdi.std()}")

Depression group: mean BDI score: 22.42222222222222 SD = 4.697464511787841
Control group: mean BDI score: 2.422222222222222 SD = 2.4266015457277685


In [33]:
t_value, p_value = ttest_rel(depression_bdi, control_depression_bdi)
print(f"t({len(depression_bdi) -1}) = {t_value}, p = {p_value}")

t(44) = 23.50109051150873, p = 1.5465183832089604e-26


#### Anxiety

- TAI

In [34]:
control_anx_tai = ctrl_anx['TAI'].to_numpy()
anx_tai = anx['TAI'].to_numpy()

In [35]:
control_anx_tai

array([23., 27., 23., 26., 22., 28., 23., 28., 24., 27., 24., 27., 26.,
       27., 26., 27., 22., 25., 27., 27., 24., 24., 26., 28., 28., 27.,
       26., 25., 27.])

In [36]:
anx_tai

array([47., 37., 36., 36., 40., 38., 33., 34., 33., 42., 37., 36., 37.,
       37., 32., 38., 36., 34., 38., 33., 34., 38., 35., 33., 38., 36.,
       34., 36., 35.])

In [37]:
print(f"Anxiety group: mean TAI score: {anx_tai.mean()} SD = {anx_tai.std()}")
print(f"Control group: mean TAI score: {control_anx_tai.mean()} SD = {control_anx_tai.std()}")

Anxiety group: mean TAI score: 36.310344827586206 SD = 3.0183741715888415
Control group: mean TAI score: 25.655172413793103 SD = 1.8434574351205044


In [38]:
t_value, p_value = ttest_rel(anx_tai, control_anx_tai)
print(f"t({len(anx_tai) -1}) = {t_value}, p = {p_value}")

t(28) = 14.948557727567003, p = 7.11188718218075e-15


- BDI

In [39]:
control_anx_bdi = ctrl_anx['BDI'].to_numpy()
anx_bdi = anx['BDI'].to_numpy()

In [41]:
print(control_anx_bdi)
print(anx_bdi)

[0. 1. 1. 1. 0. 0. 0. 3. 2. 0. 2. 0. 1. 2. 2. 3. 0. 0. 0. 1. 1. 2. 2. 1.
 0. 2. 3. 0. 2.]
[4. 5. 5. 1. 6. 3. 1. 1. 1. 1. 5. 2. 2. 2. 0. 1. 0. 0. 3. 2. 4. 1. 2. 4.
 5. 3. 3. 3. 1.]


In [42]:
print(f"Anxiety group: mean BDI score: {anx_bdi.mean()} SD = {anx_bdi.std()}")
print(f"Control group: mean BDI score: {control_anx_bdi.mean()} SD = {control_anx_bdi.std()}")

Anxiety group: mean BDI score: 2.4482758620689653 SD = 1.6935212631460241
Control group: mean BDI score: 1.103448275862069 SD = 1.028719578632848


In [45]:
t_value, p_value = ttest_rel(anx_bdi, control_anx_bdi)
print(f"t({len(anx_bdi) -1}) = {t_value}, p = {p_value}")

t(28) = 3.3036953355784657, p = 0.0026156254959064104
