##### Dataset Information

In [None]:
# This data set was provided by Fraunhofer-FIRST, Intelligent Data Analysis Group (Klaus-Robert Muller), and Freie Universitat Berlin, Department of Neurology, Neurophysics Group (Gabriel Curio) Correspondence to Benjamin Blankertz for the BCI II competition (Data set IV). This dataset was recorded from a normal subject during a no-feedback session. The subject sat in a normal chair, relaxed arms resting on the table, fingers in the standard typing position at the computer keyboard. The task was to press with the index and little fingers the corresponding keys in a self-chosen order and timing 'self-paced key typing'. There are two classes: 0 for upcoming left hand movements and 1 for upcoming right hand movements. The experiment consisted of 3 sessions of 6 minutes each. All sessions were conducted on the same day with some minutes break in between. Typing was done at an average speed of 1 key per second. There are 316 train cases and 100 test cases. Each case is a recording of 28 EEG channels of 500 ms length each ending 130 ms before a keypress. This is downsampled at 100 Hz (as recommended) so each channel consists of 50 observations. Channels are in the following order: (F3, F1, Fz, F2, F4, FC5, FC3, FC1, FCz, FC2, FC4, FC6, C5, C3, C1, Cz, C2, C4, C6, CP5, CP3, CP1, CPz, CP2, CP4, CP6, O1, O2). The recording was made using a NeuroScan amplifier and a Ag/AgCl electrode cap from ECI. 28 EEG channels were measured at positions of the international 10/20-system (F, FC, C, and CP rows and O1, O2). Signals were recorded at 1000 Hz with a band-pass filter between 0.05 and 200 Hz. The winning entry achieved an error rate of 16\% using 3 features from combination of common subspace decomposition and Fisher discriminant, and classification with a perceptron neural network. 

##### Below Code is to Convert arff to csv  (Run only once at the beginning)

In [None]:
# #Convert arff to csv
# #Run only once

# #########################################
# # Project   : ARFF to CSV converter     #
# # Created   : 10/01/17 11:08:06         #
# # Author    : haloboy777                #
# # Licence   : MIT                       #
# #########################################

# # Importing library
# import os

# # Getting all the arff files from the current directory
# files = [arff for arff in os.listdir('.') if arff.endswith(".arff")]

# # Function for converting arff list to csv list
# def toCsv(text):
#     data = False
#     header = ""
#     new_content = []
#     for line in text:
#         if not data:
#             if "@ATTRIBUTE" in line or "@attribute" in line:
#                 attributes = line.split()
#                 if("@attribute" in line):
#                     attri_case = "@attribute"
#                 else:
#                     attri_case = "@ATTRIBUTE"
#                 column_name = attributes[attributes.index(attri_case) + 1]
#                 header = header + column_name + ","
#             elif "@DATA" in line or "@data" in line:
#                 data = True
#                 header = header[:-1]
#                 header += '\n'
#                 new_content.append(header)
#         else:
#             new_content.append(line)
#     return new_content


# # Main loop for reading and writing files
# for file in files:
#     with open(file, "r") as inFile:
#         content = inFile.readlines()
#         name, ext = os.path.splitext(inFile.name)
#         new = toCsv(content)
#         with open(name + ".csv", "w") as outFile:
#             outFile.writelines(new)

In [None]:
import numpy as np
import pandas as pd
import warnings
from time import time
warnings.filterwarnings('ignore')
from scipy import signal
from scipy.signal import butter, lfilter
import matplotlib.pyplot as plt
import math
import pickle
import statistics
from sklearn.metrics.pairwise import cosine_similarity
from math import log
from sklearn import preprocessing
from sklearn import metrics
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

In [None]:
scale_factor_list_1 = list(np.linspace(start=0.5, stop=4.5, num=5))
scale_factor_list_2 = list(range(1,6))
result = [None]*(len(scale_factor_list_1)+len(scale_factor_list_2))
result[::2] = scale_factor_list_1
result[1::2] = scale_factor_list_2
result.append('class')
result

In [None]:
#Butterworth low pass filtering for upscaled time-series
def butterworth_lowpass(fc,fs,order):
    nyq = 0.5 * fs
    Wn = fc/nyq
    b,a = butter(order,Wn)
    return b,a
b, a = butterworth_lowpass(0.49, 1, 4)
# print(b,a)

In [None]:
def correlation_entropy(baseline_initial):
    corr_int_avg = abs(cosine_similarity(baseline_initial.T))
 
    # Calculate eigen-values (w's)
    w,v = np.linalg.eig((corr_int_avg))
    w = np.real_if_close(w,tol=1)
    w_abs = abs(w)
    w_sum = np.sum(w_abs)
    ent_int = []
    for i in w_abs:
        if i == 0 or w_sum == 0:
            ent_i = 0
        else:
            if i/w_sum == 0:
                ent_i = 0
            else:
                ent_i = -(i/w_sum)*(log(i/w_sum))
                ent_int.append(ent_i)
    entropy_initial = np.sum(ent_int)
    entropy_initial = (entropy_initial)
    return entropy_initial

In [None]:
def ee_plot(df,ss):
    scaler = preprocessing.MinMaxScaler()
    modified_df = df.drop(columns=['groups'])
    modified_df_scaled = pd.DataFrame(scaler.fit_transform(modified_df), columns=modified_df.columns)
    mws = ss
    length = len(modified_df_scaled)
    EE = []
    while (ss < length):
        ee_df = modified_df_scaled.iloc[:ss,:]
        entropy = correlation_entropy(ee_df)
        EE.append(entropy)
        ss += mws
    if len(ee_df) < length:
        ee_df = modified_df_scaled
        entropy = correlation_entropy(ee_df)
        EE.append(entropy)
    return EE

In [None]:
# Parameters 
alpha = 0.5
# Input Horizontal DataFrame (final_df_transposed) to the Upscaling Function 
def upscaling(df,alpha,row_no):
    N = df.shape[1]
    N_new = int(N/alpha)
    y_df = pd.DataFrame(np.zeros((1, N_new)))
    p = []
    for j in range (1,N+1):
        p.append((j/alpha)-1)
    for j in range (N_new):
        if j == 0:
            y_df.iloc[0,j] = df.iloc[row_no,0]
        elif j in p:
            loc = int((j-1)/(1/alpha))
            y_df.iloc[0,j-1] = df.iloc[row_no,(loc)]
    return y_df

In [None]:
def read(path):
    df = pd.read_csv(path)
    df.columns = range(1, df.shape[1]+1)
    df = df.rename(columns={df.shape[1]: 'activity'})
    return df

In [None]:
# User Inputs
num_dim = 28
test_data_size = 100

In [None]:
# Build training dataframe
df_tr = pd.DataFrame()
for i in range(1,num_dim+1):
    path_train = f'/FingerMovements/FingerMovementsDimension{i}_TRAIN.csv'
    df =  read(path_train)
    df = df.assign(Dim=i)
    df_tr = df_tr.append(df)
display(df_tr.head())
sample_size = int(((df_tr.shape[1])-2)/50)

In [None]:
# Build testing dataframe
df_te = pd.DataFrame()
for i in range(1,num_dim+1):
    path_test = f'/FingerMovements/FingerMovementsDimension{i}_TEST.csv'
    df =  read(path_test)
    df = df.assign(Dim=i)
    df_te = df_te.append(df)
display(df_te.head())

In [None]:
df_tr['Sub'] = df_tr.groupby(['activity','Dim']).cumcount() + 1
df_te['Sub'] = df_te.groupby(['activity','Dim']).cumcount() + 1

In [None]:
# Form a class list of unique activities
class_list = list(df_tr.activity.unique())
class_list

# EE_Trend_All_Values and slopes

### Build train dataset

In [None]:
EE_values_train = pd.DataFrame(columns = result)
cols = [i for i in range(1,1001)]
EE_values_train_mod = pd.DataFrame(columns = cols)
EE_values_train_slope = pd.DataFrame(columns = cols)
 
for c in class_list:
    df_tr_act = df_tr[df_tr['activity']== c]
    number = df_tr_act.Sub.nunique()
    
    for i in range(1,number+1):
        df_tr_new = df_tr_act[(df_tr_act['Sub']== i)]

        df_tr_new = (df_tr_new.drop(columns=['activity','Sub','Dim'])).T
        df_tr_new = df_tr_new.reset_index().drop(columns='index')
        df_tr_new.columns = range(1,num_dim+1)
        df_tr_new = df_tr_new.astype(float)
        
        df_tr_new_transposed = df_tr_new.T
        n_cols = int((df_tr_new_transposed.shape[1]/alpha))
        df_upfil = pd.DataFrame(columns = range(n_cols))
        for j in range(df_tr_new_transposed.shape[0]):
            upscaled = upscaling(df_tr_new_transposed,alpha,j)
            filtered_row = lfilter(b, a,upscaled)
            df_upfil = df_upfil.append(pd.DataFrame(filtered_row))
        df_upfil_trans = df_upfil.T
        df_upfil_trans.columns = range(1,num_dim+1)
            
        df_upfil = df_upfil_trans.reset_index()
        df_tr_new = df_tr_new.reset_index()
        EE_scaled_basic = []
        EE_scaled = []
        EE_scaled_slope = []
        for scale_factor in result[:-1]:
            if type(scale_factor) == int:
                df_tr_new['groups'] = df_tr_new['index'].apply(lambda x: round(x/(scale_factor)-0.36,0)).astype(int) 
                df_tr_new_1 = df_tr_new.groupby('groups').mean().reset_index().drop(columns=['index'])

                EE_1 = ee_plot(df_tr_new_1,sample_size)
                # Calculate trend
                EE_scaled_basic.append((EE_1[-1]-EE_1[0]) / len(EE_1))
                
                # Include all Eigen values
                EE_scaled.extend(EE_1)
                
                # Calculate slopes
                slopes = []
                slope_len = len(EE_1)
                for a in range(1,slope_len):
                    slope = (EE_1[a]-EE_1[0])/(a+1)
                    slopes.append(slope)
                EE_scaled_slope.extend(slopes)
                
                print(f'class {c}; subject {i}; scale_factor {scale_factor}; slop_len{slope_len}')
                    
            else:
                q = int(scale_factor/alpha)
                df_upfil = df_upfil.apply(pd.to_numeric)

                df_upfil['groups'] = df_upfil['index'].apply(lambda x: round(x/(q)-0.36,0)).astype(int) 
                df_upfil_1 = df_upfil.groupby('groups').mean().reset_index().drop(columns=['index'])
    
                EE_1 = ee_plot(df_upfil_1,sample_size)
    
                # Calculate trend
                EE_scaled_basic.append((EE_1[-1]-EE_1[0]) / len(EE_1))
                
                # Include all Eigen values
                EE_scaled.extend(EE_1)
                
                # Calculate slopes
                slopes = []
                slope_len = len(EE_1)
                for a in range(1,slope_len):
                    slope = (EE_1[a]-EE_1[0])/(a+1)
                    slopes.append(slope)
                EE_scaled_slope.extend(slopes)
                
                print(f'class {c}; subject {i}; scale_factor {scale_factor}; slop_len{slope_len}')
                
        EE_scaled_basic.append(c)
        EE_values_train = EE_values_train.append(pd.DataFrame([EE_scaled_basic], columns=result), ignore_index=True)
    
        EE_scaled.append(c)
        nan_cols = [np.nan for k in range(1,1001-len(EE_scaled))]
        EE_scaled.extend(nan_cols)
        EE_values_train_mod = EE_values_train_mod.append(pd.DataFrame([EE_scaled], columns=cols), ignore_index=True)
        
        EE_scaled_slope.append(c)
        nan_cols = [np.nan for k in range(1,1001-len(EE_scaled_slope))]
        EE_scaled_slope.extend(nan_cols)
        EE_values_train_slope = EE_values_train_slope.append(pd.DataFrame([EE_scaled_slope], columns=cols), ignore_index=True)

In [None]:
EE_values_train_mod = EE_values_train_mod.dropna(axis=1)
EE_values_train_mod.head(3)

In [None]:
EE_values_train_slope = EE_values_train_slope.dropna(axis=1)
EE_values_train_slope.head(3)

### Build test dataset

In [None]:
EE_values_test = pd.DataFrame(columns = result)
cols = [i for i in range(1,1001)]
EE_values_test_mod = pd.DataFrame(columns = cols)
EE_values_test_slope = pd.DataFrame(columns = cols)

for c in class_list:
    df_te_act = df_te[df_te['activity']== c]
    number = df_te_act.Sub.nunique()
    
    for i in range(1,number+1):
        df_te_new = df_te_act[(df_te_act['Sub']== i)]
        df_te_new = (df_te_new.drop(columns=['activity','Sub','Dim'])).T
        df_te_new = df_te_new.reset_index().drop(columns='index')
        df_te_new.columns = range(1,num_dim+1)
        df_te_new = df_te_new.astype(float)
        
        df_te_new_transposed = df_te_new.T
        n_cols = int((df_te_new_transposed.shape[1]/alpha))
        
        df_upfil = pd.DataFrame(columns = range(n_cols))
        for j in range(df_te_new_transposed.shape[0]):
            upscaled = upscaling(df_te_new_transposed,alpha,j)
            filtered_row = lfilter(b, a,upscaled)
            df_upfil = df_upfil.append(pd.DataFrame(filtered_row))
        df_upfil_trans = df_upfil.T
        df_upfil_trans.columns = range(1,num_dim+1)
            
        df_upfil = df_upfil_trans.reset_index()
        df_te_new = df_te_new.reset_index()

        EE_scaled_basic = []
        EE_scaled = []
        EE_scaled_slope = []
        
        for scale_factor in result[:-1]:
            if type(scale_factor) == int:
                df_te_new['groups'] = df_te_new['index'].apply(lambda x: round(x/(scale_factor)-0.36,0)).astype(int) 
                df_te_new_1 = df_te_new.groupby('groups').mean().reset_index().drop(columns=['index'])
                
                EE_1 = ee_plot(df_te_new_1,sample_size)
                # Calculate trend
                EE_scaled_basic.append((EE_1[-1]-EE_1[0]) / len(EE_1))
                
                # Include all Eigen values
                EE_scaled.extend(EE_1)
                
                # Calculate slopes
                slopes = []
                slope_len = len(EE_1)
                for a in range(1,slope_len):
                    slope = (EE_1[a]-EE_1[0])/(a+1)
                    slopes.append(slope)
                EE_scaled_slope.extend(slopes)
                
                print(f'class {c}; subject {i}; scale_factor {scale_factor}; slope_len{slope_len}')
                

            else:
                q = int(scale_factor/alpha)
                df_upfil = df_upfil.apply(pd.to_numeric)

                df_upfil['groups'] = df_upfil['index'].apply(lambda x: round(x/(q)-0.36,0)).astype(int) 
                df_upfil_1 = df_upfil.groupby('groups').mean().reset_index().drop(columns=['index'])

                EE_1 = ee_plot(df_upfil_1,sample_size)
    
                # Calculate trend
                EE_scaled_basic.append((EE_1[-1]-EE_1[0]) / len(EE_1))
                
                # Include all Eigen values
                EE_scaled.extend(EE_1)
                
                # Calculate slopes
                slopes = []
                slope_len = len(EE_1)
                for a in range(1,slope_len):
                    slope = (EE_1[a]-EE_1[0])/(a+1)
                    slopes.append(slope)
                EE_scaled_slope.extend(slopes)
                
                print(f'class {c}; subject {i}; scale_factor {scale_factor}; slope_len{slope_len}')
                
        EE_scaled_basic.append(c)
        EE_values_test = EE_values_test.append(pd.DataFrame([EE_scaled_basic], columns=result), ignore_index=True)
    
        EE_scaled.append(c)
        nan_cols = [np.nan for k in range(1,1001-len(EE_scaled))]
        EE_scaled.extend(nan_cols)
        EE_values_test_mod = EE_values_test_mod.append(pd.DataFrame([EE_scaled], columns=cols), ignore_index=True)
        
        EE_scaled_slope.append(c)
        nan_cols = [np.nan for k in range(1,1001-len(EE_scaled_slope))]
        EE_scaled_slope.extend(nan_cols)
        EE_values_test_slope = EE_values_test_slope.append(pd.DataFrame([EE_scaled_slope], columns=cols), ignore_index=True)

In [None]:
EE_values_train_slope.to_csv('EE_values_train_slope.csv')
EE_values_test_slope.to_csv('EE_values_test_slope.csv')