In [1]:
import pandas
import numpy as np
from utils import *
import mne
import matplotlib.pyplot as plt
from os import walk
from sklearn.naive_bayes import GaussianNB

def get_psd(raw, filter=True):
    raw_copy = raw.copy()
    if(filter):
        # raw_copy.notch_filter([50,100], verbose=False)
        raw_copy.filter(8,12, verbose=False)
    psd, freq = mne.time_frequency.psd_welch(raw_copy,n_fft = 1024, verbose=False)
    psd = 10 * np.log10(psd)
    mean = psd.mean(0)
    std = psd.std(0)
    return psd, freq, mean, std

def get_epoch(raw, verbose=False):
    raw_copy = raw.copy()
    events = mne.find_events(raw_copy, stim_channel='Marker', initial_event=True, verbose=verbose)
    if(len(events) != 5):
        raise ValueError(f"The event is missing. Number of event not equal to 5 ({len(events)}).\n {events}")
    epochs = mne.Epochs(raw_copy, events, tmin=0.3, tmax=5.5, baseline=(0.3,0.3), verbose=verbose)
    if(epochs.get_data().shape[0] != 5):
        raise ValueError(f"There might be a bad data. epochs.get_data().shape = {epochs.get_data().shape}")
    return epochs

def plot_psd(raw):
    psd, freq, mean, std = get_psd(raw)
    fig, ax = plt.subplots(figsize=(10,5))
    for i in range(8):
        ax.plot(freq,psd[i] ,label=raw.info['ch_names'][i], lw=1, alpha=0.6)
    ax.fill_between(250//2, mean - std, mean + std, color='k', alpha=.5)
    ax.set_xlabel('Frequency (Hz)')
    ax.set_ylabel('Amplitube (dBV)')
    ax.set_title('EEG of ')
    ax.legend()
    plt.show()

## Groud Truth

In [2]:
plt.style.use('seaborn-whitegrid')

df = pandas.read_csv('./HEXACO.csv')
# Honesty-Humility	Emotionality	eXtraversion	Agreeableness	Conscientiousness	Openness to Experience
gt = df[['id','Honesty-Humility','Emotionality','eXtraversion','Agreeableness','Conscientiousness','Openness to Experience']].rename(columns={'Honesty-Humility':'h',
                                  'Emotionality':'e',
                                  'eXtraversion':'x',
                                 'Agreeableness':'a',
                             'Conscientiousness':'c',
                        'Openness to Experience':'o'}).set_index('id')
labels = []
for i in gt.index:
    # print(gt.loc[i], np.argmax(gt[['h','e','x','a','c','o']].loc[i]))
    labels.append(np.argmax(gt[['h','e','x','a','c','o']].loc[i]))
gt['label'] = labels
gt

Unnamed: 0_level_0,h,e,x,a,c,o,label
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2,3.63,3.19,2.94,2.38,3.38,2.38,0
3,3.38,3.44,3.5,3.5,4.5,3.81,4
4,3.19,3.75,3.69,3.19,2.63,2.63,1
5,3.25,3.63,2.13,3.56,3.63,2.31,1
6,3.25,2.75,3.5,2.5,3.75,5.0,5
7,4.06,3.06,3.38,2.88,2.5,4.19,5
8,3.94,2.94,3.19,3.75,3.38,3.81,0
9,4.44,4.0,3.38,3.75,3.69,3.31,0
10,3.63,3.25,3.5,3.31,3.88,2.69,4
11,3.31,4.0,2.25,3.19,2.81,3.19,1


## EEG Data

In [3]:
path, folders, filenames = next(walk('./data'))
print(filenames)

['29-audio.csv', '5-audio.csv', '10-audio.csv', '17-audio.csv', '10-image.csv', '14-audio.csv', '33-audio.csv', '31-audio.csv', '9-audio.csv', '20-audio.csv', '6-audio.csv', '13-image.csv', '25-image.csv', '5-image.csv', '14-image.csv', '22-image.csv', '7-image.csv', '23-audio.csv', '28-image.csv', '15-audio.csv', '25-audio.csv', '32-audio.csv', '21-image.csv', '23-image.csv', '2-audio.csv', '36-audio.csv', '26-audio.csv', '12-audio.csv', '3-image.csv', '2-image.csv', '35-image.csv', '12-image.csv', '30-image.csv', '36-image.csv', '27-audio.csv', '15-image.csv', '32-image.csv', '26-image.csv', '8-audio.csv', '34-image.csv', '8-image.csv', '20-image.csv', '9-image.csv', '3-audio.csv', '35-audio.csv', '16-audio.csv', '22-audio.csv', '17-image.csv', '7-audio.csv', '11-audio.csv', '18-image.csv', '27-image.csv', '29-image.csv', '13-audio.csv', '6-image.csv', '30-audio.csv', '21-audio.csv', '31-image.csv', '33-image.csv', '34-audio.csv', '28-audio.csv', '11-image.csv', '16-image.csv', '18-a

In [4]:
# import pandas
# data = pandas.read_csv(f'./data/5-audio.csv', dtype={'Marker': str})#.rename(columns=columns).drop(columns='timestamps')
# # data[9]

In [5]:
gt.drop([11,4,36],inplace=True)

filenames.remove('11-audio.csv')
filenames.remove('11-image.csv')

filenames.remove('36-audio.csv')
filenames.remove('36-image.csv')

In [6]:
path = './data'
columns = {'Unnamed: 1':'Fp1',
        'Unnamed: 2':'Fp2',
        'Unnamed: 3':'F3',
        'Unnamed: 4':'F4',
        'Unnamed: 5':'F7',
        'Unnamed: 6':'F8',
        'Unnamed: 7':'P7',
        'Unnamed: 8':'P8'}

EEG_audio, EEG_image = dict(), dict()
from itertools import product
categories = [1,2,3,4,5]
blocks = [1,2]
for filename in filenames:
    participant_id, stimuli = filename.split('-')
    stimuli = stimuli.rstrip('.csv')
    data = pandas.read_csv(f'{path}/{filename}', dtype={'Marker': str}).rename(columns=columns).drop(columns='timestamps')
    print(participant_id, stimuli)
    # experiment = dict()
    # for (category, block) in product(categories,blocks):
        # print("   ", category, block)
        # section = get_section_from_catblock(data, category=category,block=block)
        # experiment[f"{category}_{block}"] = section
    if(stimuli == 'audio'):
        EEG_audio[int(participant_id)] = data
    elif(stimuli == 'image'):
        EEG_image[int(participant_id)] = data
    else:
        raise ValueError(f"Stimuli:{stimuli} is unexpected.")

29 audio
5 audio
10 audio
17 audio
10 image
14 audio
33 audio
31 audio
9 audio
20 audio
6 audio
13 image
25 image
5 image
14 image
22 image
7 image
23 audio
28 image
15 audio
25 audio
32 audio
21 image
23 image
2 audio
26 audio
12 audio
3 image
2 image
35 image
12 image
30 image
27 audio
15 image
32 image
26 image
8 audio
34 image
8 image
20 image
9 image
3 audio
35 audio
16 audio
22 audio
17 image
7 audio
18 image
27 image
29 image
13 audio
6 image
30 audio
21 audio
31 image
33 image
34 audio
28 audio
16 image
18 audio


In [7]:
# X = np.array()
X = []
y = []
features = []
# ids = [33,2,10,12,16]
ids = gt.index.tolist()
for id in ids:
    print('\n',"="*20,id)
    raw = dataframe_to_raw(EEG_image[id], sfreq=250)
    events = mne.find_events(raw, stim_channel='Marker', initial_event=True, verbose=False)
    events = np.delete(events,np.argwhere(events[:,2] == 1), axis=0)
    if(events.shape[0] != 50):
        raise ValueError(f"Event missing: {events[:,2]}. len(events.shape[0])={events.shape[0]}")
    epochs = mne.Epochs(raw, events, tmin=0.3, tmax=5.8, baseline=(0.3,0.3), verbose=False)
    if(epochs.get_data().shape[0] != 50):
        raise ValueError(f"There might be a bad data. epochs.get_data().shape = {epochs.get_data().shape}")
    for epoch in epochs.iter_evoked():
        psd,_,_,_ = get_psd(epoch, filter=False)
        psd = psd.mean(axis=1)
        if(np.any(np.isnan(psd))):
            raise ValueError(f"value NaN: {epoch}{psd}")
        if(np.all(np.isfinite(X)) == False):
            raise ValueError(f"value inifinit: {epoch}{psd}")
        # print(psd.shape)
        # break
        
        # features.append(psd.data)
        X.append(psd)
        y.append(gt.loc[id]['label'])
    # break
X = np.array(X)#.reshape(len(ids),-1)
y = np.array(y)


Creating RawArray with float64 data, n_channels=9, n_times=116580
    Range : 0 ... 116579 =      0.000 ...   466.316 secs
Ready.

  events = mne.find_events(raw, stim_channel='Marker', initial_event=True, verbose=False)
Creating RawArray with float64 data, n_channels=9, n_times=117048
    Range : 0 ... 117047 =      0.000 ...   468.188 secs
Ready.

  events = mne.find_events(raw, stim_channel='Marker', initial_event=True, verbose=False)
Creating RawArray with float64 data, n_channels=9, n_times=116580
    Range : 0 ... 116579 =      0.000 ...   466.316 secs
Ready.

  events = mne.find_events(raw, stim_channel='Marker', initial_event=True, verbose=False)
Creating RawArray with float64 data, n_channels=9, n_times=115068
    Range : 0 ... 115067 =      0.000 ...   460.268 secs
Ready.

  events = mne.find_events(raw, stim_channel='Marker', initial_event=True, verbose=False)
Creating RawArray with float64 data, n_channels=9, n_times=120036
    Range : 0 ... 120035 =      0.000 ...   480.1

In [8]:
print(X.shape)
print(y.shape)
# print(y)

(1500, 8)
(1500,)


In [9]:
from sklearn.preprocessing import normalize
X_norm = normalize(X)
gnb = GaussianNB()
# y_pred = gnb.fit(X_train, y_train).predict(X_test)
gnb.fit(X_norm, y)
pred = gnb.predict(X_norm) 
print(gnb.predict(X_norm))
print(gnb.predict_proba(X_norm))

[0. 0. 0. ... 5. 5. 5.]
[[5.46206098e-01 1.59890856e-01 3.12784455e-07 2.37227129e-01
  5.66756041e-02]
 [4.66322404e-01 1.96323597e-01 1.47075324e-06 2.73342903e-01
  6.40096248e-02]
 [6.02922416e-01 1.22129916e-01 3.82760777e-06 1.84977161e-01
  8.99666799e-02]
 ...
 [1.77375221e-01 2.77527167e-02 1.54352264e-05 2.40970528e-01
  5.53886100e-01]
 [3.06525676e-01 3.84505085e-02 4.81781457e-06 2.78847458e-01
  3.76171540e-01]
 [2.88170716e-01 5.76104458e-02 3.62855209e-05 2.53463715e-01
  4.00718838e-01]]


In [10]:
acc = sum(y == pred)/len(y)
acc

0.536