In [27]:
import pandas
import numpy as np
from utils import *
import mne
import matplotlib.pyplot as plt
from os import walk
from sklearn.naive_bayes import GaussianNB

def get_psd(raw):
    raw_copy = raw.copy()
    raw_copy.notch_filter([50,100], verbose=False)
    # raw_copy.filter(8,12, verbose=False)
    psd, freq = mne.time_frequency.psd_welch(raw_copy,n_fft = 1024, verbose=False)
    # print(psd.shape)
    psd = 10 * np.log10(psd)
    # print(psd.shape)
    mean = psd.mean(0)
    std = psd.std(0)
    # events = mne.find_events(raw_copy, stim_channel='Marker', initial_event=True,)
    # epochs = mne.Epochs(raw_copy, events, tmin=1, tmax=2, baseline=(3,3))
    # frequencies = np.arange(7,30,3)
    # psd = mne.time_frequency.tfr_morlet(epochs,freqs=frequencies, n_cycles=2, return_itc=False,)
    # freq = None
    # mean = None
    # std = None
    psd = psd.sum(axis=1)
    return psd, freq, mean, std

def plot_psd(raw):
    psd, freq, mean, std = get_psd(raw)
    fig, ax = plt.subplots(figsize=(10,5))
    for i in range(8):
        ax.plot(freq,psd[i] ,label=raw.info['ch_names'][i], lw=1, alpha=0.6)
    ax.fill_between(250//2, mean - std, mean + std, color='k', alpha=.5)
    ax.set_xlabel('Frequency (Hz)')
    ax.set_ylabel('Amplitube (dBV)')
    ax.set_title('EEG of ')
    ax.legend()
    plt.show()

In [25]:
psd,_,_,_ = get_psd(raw)

In [26]:
psd

array([-85924.63690357, -85299.85840538, -85739.90628144, -84955.12971245,
       -87675.88408826, -85603.50331312, -86703.58610764, -85611.18734765])

## Groud Truth

In [4]:
plt.style.use('seaborn-whitegrid')

df = pandas.read_csv('./HEXACO.csv')
# Honesty-Humility	Emotionality	eXtraversion	Agreeableness	Conscientiousness	Openness to Experience
gt = df[['id','Honesty-Humility','Emotionality','eXtraversion','Agreeableness','Conscientiousness','Openness to Experience']].rename(columns={'Honesty-Humility':'h',
                                  'Emotionality':'e',
                                  'eXtraversion':'x',
                                 'Agreeableness':'a',
                             'Conscientiousness':'c',
                        'Openness to Experience':'o'}).set_index('id')
labels = []
for i in gt.index:
    # print(gt.loc[i], np.argmax(gt[['h','e','x','a','c','o']].loc[i]))
    labels.append(np.argmax(gt[['h','e','x','a','c','o']].loc[i]))
gt['label'] = labels
gt

Unnamed: 0_level_0,h,e,x,a,c,o,label
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2,3.63,3.19,2.94,2.38,3.38,2.38,0
3,3.38,3.44,3.5,3.5,4.5,3.81,4
4,3.19,3.75,3.69,3.19,2.63,2.63,1
5,3.25,3.63,2.13,3.56,3.63,2.31,1
6,3.25,2.75,3.5,2.5,3.75,5.0,5
7,4.06,3.06,3.38,2.88,2.5,4.19,5
8,3.94,2.94,3.19,3.75,3.38,3.81,0
9,4.44,4.0,3.38,3.75,3.69,3.31,0
10,3.63,3.25,3.5,3.31,3.88,2.69,4
11,3.31,4.0,2.25,3.19,2.81,3.19,1


## EEG Data

In [5]:
path, folders, filenames = next(walk('./data'))
print(filenames)

['29-audio.csv', '5-audio.csv', '10-audio.csv', '17-audio.csv', '10-image.csv', '14-audio.csv', '33-audio.csv', '31-audio.csv', '9-audio.csv', '20-audio.csv', '6-audio.csv', '13-image.csv', '25-image.csv', '5-image.csv', '14-image.csv', '22-image.csv', '7-image.csv', '23-audio.csv', '28-image.csv', '15-audio.csv', '25-audio.csv', '32-audio.csv', '21-image.csv', '23-image.csv', '2-audio.csv', '36-audio.csv', '26-audio.csv', '12-audio.csv', '3-image.csv', '2-image.csv', '35-image.csv', '12-image.csv', '30-image.csv', '36-image.csv', '27-audio.csv', '15-image.csv', '32-image.csv', '26-image.csv', '8-audio.csv', '34-image.csv', '8-image.csv', '20-image.csv', '9-image.csv', '3-audio.csv', '35-audio.csv', '16-audio.csv', '22-audio.csv', '17-image.csv', '7-audio.csv', '11-audio.csv', '18-image.csv', '27-image.csv', '29-image.csv', '13-audio.csv', '6-image.csv', '30-audio.csv', '21-audio.csv', '31-image.csv', '33-image.csv', '34-audio.csv', '28-audio.csv', '11-image.csv', '16-image.csv', '18-a

In [6]:
# import pandas
# data = pandas.read_csv(f'./data/5-audio.csv', dtype={'Marker': str})#.rename(columns=columns).drop(columns='timestamps')
# # data[9]

In [7]:
gt.drop([4,36],inplace=True)

filenames.remove('36-audio.csv')
filenames.remove('36-image.csv')

In [8]:
path = './data'
columns = {'Unnamed: 1':'Fp1',
        'Unnamed: 2':'Fp2',
        'Unnamed: 3':'F3',
        'Unnamed: 4':'F4',
        'Unnamed: 5':'F7',
        'Unnamed: 6':'F8',
        'Unnamed: 7':'P7',
        'Unnamed: 8':'P8'}

EEG_audio, EEG_image = dict(), dict()
from itertools import product
categories = [1,2,3,4,5]
blocks = [1,2]
for filename in filenames:
    participant_id, stimuli = filename.split('-')
    stimuli = stimuli.rstrip('.csv')
    data = pandas.read_csv(f'{path}/{filename}', dtype={'Marker': str}).rename(columns=columns).drop(columns='timestamps')
    print(participant_id, stimuli)
    experiment = dict()
    for (category, block) in product(categories,blocks):
        # print("   ", category, block)
        section = get_section_from_catblock(data, category=category,block=block)
        experiment[f"{category}_{block}"] = section
    if(stimuli == 'audio'):
        EEG_audio[int(participant_id)] = experiment
    elif(stimuli == 'image'):
        EEG_image[int(participant_id)] = experiment
    else:
        raise ValueError(f"Stimuli:{stimuli} is unexpected.")

29 audio
5 audio
10 audio
17 audio
10 image
14 audio
33 audio
31 audio
9 audio
20 audio
6 audio
13 image
25 image
5 image
14 image
22 image
7 image
23 audio
28 image
15 audio
25 audio
32 audio
21 image
23 image
2 audio
26 audio
12 audio
3 image
2 image
35 image
12 image
30 image
27 audio
15 image
32 image
26 image
8 audio
34 image
8 image
20 image
9 image
3 audio
35 audio
16 audio
22 audio
17 image
7 audio
11 audio
18 image
27 image
29 image
13 audio
6 image
30 audio
21 audio
31 image
33 image
34 audio
28 audio
11 image
16 image
18 audio


In [9]:
# X = np.array()
X = []
y = []
features = []
# ids = [33,2,3,14,6]
ids = gt.index.tolist()
for id in ids:
    print('\n',"="*20,id)
    for i in ['1_1','1_2','2_1','2_2','3_1','3_2','4_1','4_2','5_1','5_2']:
        raw = dataframe_to_raw(EEG_image[id][i], sfreq=250)
        psd, _,_,_ = get_psd(raw)
        # raw.notch_filter([50,100])
        # # psd, freq = mne.time_frequency.psd_welch(raw)
        # psd, freq = mne.time_frequency.psd_welch(raw,n_fft = 2048*2  )
        # psd = 10 * np.log10(psd)
        features.append(psd.data)
    X.append(features)
    y.append(gt.loc[id]['label'])
X = np.array(X).reshape(len(ids),-1)
y = np.array(y)

s=9, n_times=7606
    Range : 0 ... 7605 =      0.000 ...    30.420 secs
Ready.
Creating RawArray with float64 data, n_channels=9, n_times=7592
    Range : 0 ... 7591 =      0.000 ...    30.364 secs
Ready.
Creating RawArray with float64 data, n_channels=9, n_times=7650
    Range : 0 ... 7649 =      0.000 ...    30.596 secs
Ready.
Creating RawArray with float64 data, n_channels=9, n_times=7590
    Range : 0 ... 7589 =      0.000 ...    30.356 secs
Ready.
Creating RawArray with float64 data, n_channels=9, n_times=6367
    Range : 0 ... 6366 =      0.000 ...    25.464 secs
Ready.
Creating RawArray with float64 data, n_channels=9, n_times=7650
    Range : 0 ... 7649 =      0.000 ...    30.596 secs
Ready.

Creating RawArray with float64 data, n_channels=9, n_times=7577
    Range : 0 ... 7576 =      0.000 ...    30.304 secs
Ready.
Creating RawArray with float64 data, n_channels=9, n_times=7591
    Range : 0 ... 7590 =      0.000 ...    30.360 secs
Ready.
Creating RawArray with float64 data, 

In [10]:
print(X.shape)
print(y.shape)
print(y)

(31, 2480)
(31,)
[0. 4. 1. 5. 5. 0. 0. 4. 1. 5. 5. 0. 4. 1. 0. 1. 2. 0. 4. 1. 4. 0. 1. 1.
 5. 0. 0. 2. 0. 4. 5.]


In [11]:
from sklearn.preprocessing import normalize
X_norm = normalize(X)
gnb = GaussianNB()
# y_pred = gnb.fit(X_train, y_train).predict(X_test)
gnb.fit(X_norm, y)
print(gnb.predict(X_norm))
print(gnb.predict_proba(X_norm))

[2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
 2. 2. 2. 2. 2. 2. 2.]
[[0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0.]]


In [12]:
# from sklearn.linear_model import LinearRegression
# reg = LinearRegression().fit(X, y)
# print(reg.coef_, reg.intercept_)
# reg.score(X, y)


In [13]:
# print(X.shape)
# X