# In this project We have used 22 features from EEG Signals to predict the six Emotion classes.
1. We have used Random forest classifier with 10 Fold Cross Validation
2. Best training split is giving mean classwise validation accuracy as 0.2555 

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, classification_report

In [3]:
train_360_array=np.load('TrainRawDataArray/Train_RawEEG360_Data.npy')
val_180_array=np.load('ValidationRawDataArray/Validation_RawEEG180_Data.npy')
train_labels=np.load('TrainRawDataArray/final_labels_360_encoded.npy')
val_labels=np.load('ValidationRawDataArray//val_labels_encoded.npy')

train_360_array.shape, val_180_array.shape,train_labels.shape,val_labels.shape

((360, 4500, 24), (180, 4500, 24), (360,), (180,))

In [4]:
from sklearn.metrics import accuracy_score, classification_report
import numpy as np

def classwise_accuracy(y_true, y_pred):
    # Calculate overall accuracy
    overall_accuracy = accuracy_score(y_true, y_pred)
    
    # Calculate confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    
    # Calculate class-wise accuracy
    classwise_accuracy = np.diag(cm) / cm.sum(axis=1)
    
    return overall_accuracy, classwise_accuracy

In [5]:
import numpy as np
import mne
from mne.io import read_raw_edf
from scipy.signal import welch
from scipy.stats import skew, kurtosis
import pywt
from scipy.signal import find_peaks, peak_widths 

def calculate_eeg_features(eeg_data, sampling_freq):
    # Welch's method for calculating power spectral density
    f_welch, psd_welch = welch(eeg_data, fs=sampling_freq, nperseg=256)
    
    # Handle the case where welch returns only a single array
    if len(psd_welch.shape) == 1:  
        psd_welch = psd_welch[np.newaxis, :]
    
    # Total power using Welch's method
    total_power_welch = np.sum(psd_welch)
    
    # Skewness and kurtosis of EEG signal
    skewness = skew(eeg_data)
    kurt = kurtosis(eeg_data)
    
    # Mean, median, variance, and standard deviation of EEG signal
    mean_val = np.mean(eeg_data)
    median_val = np.median(eeg_data)
    variance_val = np.var(eeg_data)
    std_deviation = np.std(eeg_data)
    
    # Wavelet entropy of EEG signal
    coeffs = pywt.wavedec(eeg_data, 'db4', level=6)
    wave_ent = np.sum([np.sum(np.square(c)) for c in coeffs])
    
    # Calculate power in different frequency bands
    freqs = f_welch
    delta_power = np.sum(psd_welch[:, (freqs >= 0.5) & (freqs < 4)], axis=1).item()
    theta_power = np.sum(psd_welch[:, (freqs >= 4) & (freqs < 8)], axis=1).item()
    alpha_power = np.sum(psd_welch[:, (freqs >= 8) & (freqs < 13)], axis=1).item()
    beta_power = np.sum(psd_welch[:, (freqs >= 13) & (freqs < 30)], axis=1).item()
    gamma_power = np.sum(psd_welch[:, (freqs >= 30) & (freqs < 45)], axis=1).item()
    sigma_power = np.sum(psd_welch[:, (freqs >= 12) & (freqs < 16)], axis=1).item()
    
    # Additional frequency band features
    theta_alpha_ratio = np.divide(theta_power, alpha_power, out=np.zeros_like(theta_power), where=(alpha_power != 0))
    beta_alpha_ratio = np.divide(beta_power, alpha_power, out=np.zeros_like(beta_power), where=(alpha_power != 0))
    theta_alpha_beta_sum = np.divide((theta_power + alpha_power), beta_power, out=np.zeros_like(beta_power), where=(beta_power != 0))
    

    # Zero-crossing rate
    zero_crossings = np.sum(np.diff(np.sign(eeg_data)) != 0) / len(eeg_data)
    
    # Number of waves
    peaks, _ = find_peaks(eeg_data)
    num_waves = len(peaks)
    
    # Wave duration
    peak_widths_arr = peak_widths(eeg_data, peaks, rel_height=0.5)
    wave_duration = np.mean(peak_widths_arr[0] / sampling_freq)
    
    # Peak amplitude
    peak_amplitude = np.max(eeg_data)
    
    # Energy
    energy = np.sum(np.square(eeg_data))
    
    return np.array([total_power_welch, skewness, kurt, mean_val, median_val, variance_val, std_deviation, wave_ent, delta_power, theta_power, alpha_power, beta_power, gamma_power, sigma_power, theta_alpha_ratio, beta_alpha_ratio, theta_alpha_beta_sum,zero_crossings,num_waves,wave_duration,peak_amplitude,energy])

In [6]:
test_array_180x4500x24=np.load('TestRawDataArray/test_array_180x4500x24.npy')
test_array_180x4500x24.shape

(180, 4500, 24)

In [7]:
train_360_eeg_transpose=np.transpose(train_360_array, (0, 2, 1))
val_180_eeg_transpose=np.transpose(val_180_array, (0, 2, 1))
test_360_eeg_transpose=np.transpose(test_array_180x4500x24, (0, 2, 1))
print("Train Transposed Shape: ",train_360_eeg_transpose.shape)
print("Validation Transposed Shape: ",val_180_eeg_transpose.shape)
print("Test Data Transposed Shape: ",test_360_eeg_transpose.shape)

train_eegData360_reshaped=train_360_eeg_transpose.reshape(-1,4500)
val_eegData180_reshaped=val_180_eeg_transpose.reshape(-1,4500)
test_eegData180_reshaped=test_360_eeg_transpose.reshape(-1,4500)

print("Train Reshaped: ",train_eegData360_reshaped.shape)
print("Validation Reshaped: ",val_eegData180_reshaped.shape)
print("Test Data Reshaped: ",test_eegData180_reshaped.shape)

Train Transposed Shape:  (360, 24, 4500)
Validation Transposed Shape:  (180, 24, 4500)
Test Data Transposed Shape:  (180, 24, 4500)
Train Reshaped:  (8640, 4500)
Validation Reshaped:  (4320, 4500)
Test Data Reshaped:  (4320, 4500)


In [8]:
train_features_22=[]
val_features_22=[]
test_features_22=[]

sampling_freq=300

for i in train_eegData360_reshaped:
    train_features_array= calculate_eeg_features(i, sampling_freq)
    train_features_22.append(train_features_array)
    
for j in val_eegData180_reshaped:
    val_features_array=calculate_eeg_features(j, sampling_freq)
    val_features_22.append(val_features_array)

for k in test_eegData180_reshaped:
    test_features_array=calculate_eeg_features(k, sampling_freq)
    test_features_22.append(test_features_array)
    
len(train_features_22), len(val_features_22), len(test_features_22)

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


(8640, 4320, 4320)

In [9]:
train_22Features=np.array(train_features_22)
val_22Features=np.array(val_features_22)
test_22Features=np.array(test_features_22)
print("Train Features Shape: ", train_22Features.shape)
print("Validation Features Shape: ", val_22Features.shape)
print("Test Features Shape: ", test_22Features.shape)

Train Features Shape:  (8640, 22)
Validation Features Shape:  (4320, 22)
Test Features Shape:  (4320, 22)


In [33]:
## Checking NaN values and Replacing in the Train Features data  {There are 2 NaN values in the Train Data}

print(f"It's np.isnan  : {np.isnan(train_22Features).sum()}")
train_22features_withoutNan=np.nan_to_num(train_22Features, np.nanmean(train_features_22))
print(f"It's np.isnan  : {np.isnan(train_22features_withoutNan).sum()}")

It's np.isnan  : 2
It's np.isnan  : 0


In [34]:
## Checking NaN values and Replacing in the Validation features data  {There are 5 NaN values in the Validation data}

print(f"It's np.isnan  : {np.isnan(val_22Features).sum()}")

val_22features_withoutNan=np.nan_to_num(val_22Features, np.nanmean(val_features_22)) 

print(f"It's np.isnan  : {np.isnan(val_22features_withoutNan).sum()}")

It's np.isnan  : 5
It's np.isnan  : 0


In [35]:
#   Checking NaN in the test features {There is No NaN value in test data}

print(f"It's np.isnan  : {np.isnan(test_22Features).sum()}")

It's np.isnan  : 0


In [36]:
# Reshaping Train, Validation and Test data into the original shape {i.e. Train: 360x24x22, Val:180x24x22, Test:180x24x22 } 

train_22features_reshaped = train_22features_withoutNan.reshape(360,24, 22)  # will have a shape of (380*24, 22)
val_22features_reshaped = val_22features_withoutNan.reshape(180,24, 22)
test_22features_reshaped = test_22Features.reshape(180,24, 22)

print("Train Reshaped 22 Features: ", train_22features_reshaped.shape)
print("Validation Reshaped 22 Features: ", val_22features_reshaped.shape)
print("Test Reshaped 22 Features: ", test_22features_reshaped.shape)

Train Reshaped 22 Features:  (360, 24, 22)
Validation Reshaped 22 Features:  (180, 24, 22)
Test Reshaped 22 Features:  (180, 24, 22)


In [37]:
# Loading the Indexes of Best Split of Train Data Saved at the time 10 fold cross validation
# Split-0 was giving the best accuracy on Validation Data so We have used aplit-0 indexes for training the model

train_idx=np.load('IndexesFrom10Fold/train_0.npy')
test_idx=np.load('IndexesFrom10Fold/test_0.npy')

print("Train Best indexes Shape: ", train_idx.shape)
print("Test Best indexes shape: ", test_idx.shape)

Train Best indexes Shape:  (324,)
Test Best indexes shape:  (36,)


In [38]:
# Creating the Best Train Data from the Split indexes obtained from the 10 fold cross validation
x_train, x_test, y_train, y_test = train_22features_reshaped[train_idx], train_22features_reshaped[test_idx], train_labels[train_idx], train_labels[test_idx]
x_train_324x24=x_train.reshape(-1,22)
x_test_36x24=x_test.reshape(-1,22)
print("Train Data Best Split Shape: ",x_train_324x24.shape)

Train Data Best Split Shape:  (7776, 22)


In [39]:
import random
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score,classification_report, confusion_matrix
    
x_train, x_test, y_train, y_test = train_22features_reshaped[train_idx], train_22features_reshaped[test_idx], train_labels[train_idx], train_labels[test_idx]
x_train_324x24=x_train.reshape(-1,22)
x_test_36x24=x_test.reshape(-1,22)
    
# Creating the Train labels according to the Train Data Shape
n = 24
train_labels_7776 = [item for item in y_train for iii in range(n)]
#val_labels_864=[item for item in y_test for jjj in range(n)]

# Trainig the Model on the best split Train Data
RF_classifier = RandomForestClassifier(random_state=2298, bootstrap= True,max_depth=8,max_features=1.0,max_samples=1.0,min_samples_leaf=2,min_samples_split=7,n_estimators=200)

RF_classifier.fit(x_train_324x24, train_labels_7776)

# Predicting Using The Validation Data
RF_prediction=RF_classifier.predict(val_22features_withoutNan)

# Creating the predited labels accoring to the Validation data shape 
l=0
r=24
RF_final_pred=[]
for ii in range(180):
    test_list=list(RF_prediction[l:r])
    l=r
    r=r+24
    res = max(set(test_list), key = test_list.count)
    RF_final_pred.append(res)
#len(RF_final_pred)

#print("Fold {0}: {1}".format(i,accuracy_score(val_labels,RF_final_pred)))

overall_acc, classwise_acc = classwise_accuracy(val_labels, RF_final_pred)
#overall_acc, classwise_acc = classwise_accuracy(y_true, y_pred)

print("Overall Accuracy:", overall_acc)
print("Class-wise Accuracy:", classwise_acc)
print("Mean Class-wise Accuracy:", np.mean(np.array(classwise_acc)))

Overall Accuracy: 0.25555555555555554
Class-wise Accuracy: [0.26666667 0.13333333 0.23333333 0.33333333 0.06666667 0.5       ]
Mean Class-wise Accuracy: 0.25555555555555554


In [40]:
## Predictions for the Test data (22 Features Data)
RF_test_prediction=RF_classifier.predict(test_22Features)

l=0
r=24
RF_test_pred=[]
for ii in range(180):
    #test_pred_list=list(RF_prediction[l:r])    #### Here it was wrong
    test_pred_list=list(RF_test_prediction[l:r])
    l=r
    r=r+24
    res = max(set(test_pred_list), key = test_pred_list.count)
    RF_test_pred.append(res)

In [41]:
#  Mapping the predicted labels to the actual classes
import pandas as pd
encoding_reverse = {0:'fear', 1:'joy', 2:'anger', 3:'sadness', 4:'disgust',5:'surprise'}

test_pred_labels_encoded= [encoding_reverse.get(emotion_pred) for emotion_pred in RF_test_pred]
test_pred_labels_encoded

['surprise',
 'fear',
 'joy',
 'fear',
 'fear',
 'fear',
 'surprise',
 'anger',
 'disgust',
 'disgust',
 'fear',
 'surprise',
 'fear',
 'joy',
 'disgust',
 'joy',
 'fear',
 'anger',
 'surprise',
 'anger',
 'anger',
 'anger',
 'anger',
 'anger',
 'disgust',
 'anger',
 'joy',
 'disgust',
 'disgust',
 'joy',
 'disgust',
 'anger',
 'anger',
 'anger',
 'surprise',
 'disgust',
 'sadness',
 'sadness',
 'anger',
 'surprise',
 'sadness',
 'sadness',
 'sadness',
 'sadness',
 'surprise',
 'sadness',
 'sadness',
 'sadness',
 'surprise',
 'surprise',
 'disgust',
 'sadness',
 'sadness',
 'joy',
 'fear',
 'fear',
 'joy',
 'joy',
 'joy',
 'anger',
 'fear',
 'anger',
 'anger',
 'joy',
 'anger',
 'joy',
 'fear',
 'joy',
 'joy',
 'anger',
 'joy',
 'surprise',
 'disgust',
 'surprise',
 'disgust',
 'disgust',
 'disgust',
 'surprise',
 'disgust',
 'disgust',
 'surprise',
 'disgust',
 'fear',
 'disgust',
 'disgust',
 'disgust',
 'disgust',
 'surprise',
 'disgust',
 'disgust',
 'sadness',
 'anger',
 'sadness'

In [42]:
# Reading the validation template and create the final Test prediction csv file to upload on the codabench portal

validation_filenames=pd.read_csv('fg24_submission_template_test_phase/predictions.csv')['filename']

#  Creating DataFrame with two lists
csv_to_upload = pd.DataFrame({'filename' : validation_filenames,
                                'class' : test_pred_labels_encoded},
                                columns=['filename','class'])

csv_to_upload.to_csv('Test_Predictions.csv', index=False)

In [43]:
csv_to_upload

Unnamed: 0,filename,class
0,user03_raw_01.csv,surprise
1,user03_raw_02.csv,fear
2,user03_raw_03.csv,joy
3,user03_raw_04.csv,fear
4,user03_raw_05.csv,fear
...,...,...
175,user32_raw_14.csv,sadness
176,user32_raw_15.csv,anger
177,user32_raw_16.csv,sadness
178,user32_raw_17.csv,sadness
