In [1]:
# Classification
import math
import h5py
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import GlobalAveragePooling1D, Conv1D, GlobalMaxPooling1D
from keras.layers import Dense
from keras.regularizers import L1L2
from tensorflow.keras.models import Model
from tensorflow.keras import layers, losses, regularizers
#from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score, KFold
from sklearn.pipeline import Pipeline
from sklearn.metrics import roc_curve, confusion_matrix, auc
from sklearn.metrics import roc_auc_score, precision_recall_curve
import pandas as pd
import os

ModuleNotFoundError: No module named 'tensorflow'

In [None]:
# Best performing architect

# CM: [39 2 4 29]
# Accuracy: 0.919
# Sensitivity: 0.879
# Specificity: 0.951
# MCC: 0.836

# def create_model():
#     # Create model
#     model = Sequential()
#     model.add(Conv1D(filters=32, kernel_size=14, strides=1, activation='relu', input_shape=[None,1], name = 'L1'))
#     model.add(tf.keras.layers.Dropout(0.2)),  # dropout rate of 0.2
#     model.add(Conv1D(filters=32, kernel_size=4, strides=1, activation='relu', name = 'L2'))  # New Conv1D layer
#     #model.add(Conv1D(filters=16, kernel_size=3, strides=1, activation='relu', name = 'L3'))  # New Conv1D layer
#     model.add(GlobalAveragePooling1D())
#     model.add(Dense(32, activation='relu'))
#     model.add(Dense(1, activation='sigmoid', name='classification'))
#     # Compile model
#     adam = tf.keras.optimizers.Adam(learning_rate=1.e-04)
#     model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
#     #model.summary()
#     return model

In [None]:
folds = 5
# Function to create model, required for KerasClassifier
def create_model():
    # Create model
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=8, strides=1, activation='relu', padding = 'same', input_shape=[None,1], name = 'L1'))
    model.add(tf.keras.layers.Dropout(0.4)),  # dropout rate of 0.2
    model.add(Conv1D(filters=64, kernel_size=6, strides=1, activation='relu', padding ='same', name = 'L2'))  # New Conv1D layer
    model.add(Conv1D(filters=32, kernel_size=2, strides=1, activation='relu', name = 'L3'))  # New Conv1D layer
    #model.add(Conv1D(filters=16, kernel_size=3, strides=1, activation='relu', name = 'L3'))  # New Conv1D layer
    model.add(GlobalAveragePooling1D())
    model.add(tf.keras.layers.Dropout(0.4))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(1, activation='sigmoid', name='classification'))
    # Compile model
    adam = tf.keras.optimizers.Adam(learning_rate=1.e-04)
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    #model.summary()
    return model

In [None]:
# copy of Rahul's model
# def create_model():
#     # Create model
#     model = Sequential()
#     model.add(Conv1D(filters=14, kernel_size=10, strides=1, activation='relu', input_shape=[None,1]))
#     model.add(GlobalAveragePooling1D())
#     model.add(Dense(1, activation='sigmoid', name='classification'))
#     # Compile model
#     adam = tf.keras.optimizers.Adam(lr=1.e-04)
#     model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
#     #model.summary()
#     return model

In [None]:
# Fix random seed for reproducibility
seed = 128
np.random.seed(seed)
# Split into input (X) and output (y) variables
dir = r'C:\Users\_Kamat_\Desktop\RPI\ResearchWork\Papers_\Effective_Connectivity\EEG_fNIRS_paper_Brain_informatics\channelEEG_codes_results_alphaBand\Results\Connectivities_LSTMED'
csvfilename = 'exp_novST1.csv'
# csvfilename = 'EEG_GC_Exp_Nov_T123_svmSelectedConn.csv'
data = pd.read_csv(os.path.join(dir,csvfilename)).values
m,n = data.shape
X0 = data[:,0:n-1]
print(X0.shape)
X = X0.reshape(X0.shape[0],X0.shape[1],1)
print(X.shape)
y = data[:,n-1]
y = np.array([0 if y[i]==-1 else 1 for i in range(len(y))])
print(y)

In [None]:
classifier = create_model() #KerasClassifier(build_fn=create_model, epochs=2000, batch_size=5, verbose=0)
classifier.summary()

In [None]:
def model_performance(y_true, y_pred):
    # ROC and AUC
    fpr, tpr, thresholds = roc_curve(y_true, y_pred)
    area = auc(fpr, tpr)
    # calculate the g-mean for each threshold
    gmeans = np.sqrt(tpr * (1-fpr))
    # locate the index of the largest g-mean
    ix = np.argmax(gmeans)
    #print('Best Threshold=%f, G-Mean=%.3f' % (thresholds[ix], gmeans[ix]))

    # calculate roc curves
    precision, recall, thresholds = precision_recall_curve(y_true, y_pred)
    area = auc(recall, precision)
    # convert to f score
    fscore = (2 * precision * recall) / (precision + recall)
    # locate the index of the largest f score
    ix = np.argmax(fscore)
    #print('Best Threshold=%f, F-Score=%.3f' % (thresholds[ix], fscore[ix]))

    # Making the Confusion Matrix [tn, fp, fn, tp]
    tn, fp, fn, tp = confusion_matrix(y_true, (y_pred >= thresholds[ix])).ravel()
    N = tn+fp+fn+tp
    S = (tp+fn)/N
    P = (tp+fp)/N
    MCC = ((tp/N)-S*P)/np.sqrt(P*S*(1.-S)*(1.-P))
    accuracy = (tp+tn)/(tn+fp+fn+tp)
    sensitivity = (tp)/(tp+fn)
    specificity = (tn)/(tn+fp)

    return accuracy, sensitivity, specificity, MCC, tn, fp, fn, tp, y_true, y_pred

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

def evaluate_classifier(XX, yy):
    # Assuming classifier_function() returns a trained classifier
    y_true = []
    y_pred = []
    itest = []
    kf = StratifiedKFold(n_splits=folds, shuffle=False)
    #kf = KFold(n_splits=folds, shuffle=True, random_state=seed)
    for train, test in kf.split(XX,yy):
        x_train, x_test, y_train, y_test = XX[train], XX[test], yy[train], yy[test]

        classifier = create_model() #KerasClassifier(build_fn=create_model, epochs=2000, batch_size=5, verbose=0)
        es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=200)
        classifier.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=5000, batch_size=8, shuffle=True, verbose=0, callbacks=[es]) #, callbacks=[es]
        # Predicting the Test set results
        k_pred = classifier.predict(x_test)
        y_true = np.append(y_true, y_test)
        y_pred = np.append(y_pred, k_pred)
    metrics = model_performance(y_true, y_pred) #tuple of metrics
    
    return metrics
# Initialize lists to store feature indices and corresponding accuracies
feature_indices = list(range(X.shape[1]))

accuracies = []
# Iterate over each feature, dropping one at a time
for i in range(X.shape[1]):   #range(X.shape[1])
    X_reduced = np.delete(X, i, axis=1)  # remove one feature
    # Evaluate accuracy with reduced feature set
    metrics = evaluate_classifier(X_reduced, y)
    reduced_accuracy = metrics[0] #extract accuracy
    # Append the accuracy and feature index to lists
    accuracies.append(reduced_accuracy)
#accuracies.append([0.86,0.59,0.71,0.93,0.78,0.84,0.94,0.93,0.5])
accuracies = np.squeeze(accuracies)
sorted_indices = np.argsort(accuracies)
print(accuracies)
print(sorted_indices)

feature_ranking = [feature_indices[i] for i in sorted_indices]
accuracies_ranked = [accuracies[i] for i in sorted_indices]
print(feature_ranking[:10]) # select the top 10 features

# # Print the ranked features and their corresponding accuracies
# for rank, (feature_index, accuracy) in enumerate(zip(feature_ranking, accuracies_ranked), 1):
#     print(f"Rank {rank}: Feature {feature_index}, Accuracy: {accuracy}")

In [None]:
print(accuracies)
print(accuracies_ranked)
print(feature_ranking)
print(X[:,feature_ranking[:10],:].shape)
print(X[:,feature_ranking[:10],:])

In [None]:
# Evaluate accuracy using all features
initial_model_metrics = evaluate_classifier(X, y)  #accuracy, sensitivity, specificity, MCC, (tn, fp, fn, tp)
accuracies = []
# Final model test with top 10 features.
X_final = X[:,feature_ranking[:10],:]
final_model_metrics = evaluate_classifier(X_final, y)

In [None]:
# ROC and AUC
def plot_model_performace(metrics, type):
    y_true, y_pred = metrics[-2],metrics[-1]
    fpr, tpr, thresholds = roc_curve(y_true, y_pred)
    area = auc(fpr, tpr)

    # calculate the g-mean for each threshold
    gmeans = np.sqrt(tpr * (1-fpr))
    # locate the index of the largest g-mean
    ix = np.argmax(gmeans)
    print('Best Threshold=%f, G-Mean=%.3f' % (thresholds[ix], gmeans[ix]))

    plt.figure()
    plt.plot([0, 1], [0, 1], 'k--')
    plt.plot(fpr, tpr, label='AUC = {:.3f}'.format(area))
    plt.xlabel('False positive rate')
    plt.ylabel('True positive rate')
    plt.title('ROC curve '+str(type))
    plt.legend(loc='best')
    plt.show()
    plt.close()

    # calculate roc curves
    precision, recall, thresholds = precision_recall_curve(y_true, y_pred)
    area = auc(recall, precision)
    # convert to f score
    fscore = (2 * precision * recall) / (precision + recall)
    # locate the index of the largest f score
    ix = np.argmax(fscore)
    print('Best Threshold=%f, F-Score=%.3f' % (thresholds[ix], fscore[ix]))

    # plot the roc curve for the model
    plt.figure()
    no_skill = len(y_true[y_true==1]) / len(y_true)
    plt.plot([0,1], [no_skill,no_skill], linestyle='--')
    plt.plot(recall, precision, marker='.', label='AUC = {:.3f}'.format(area))
    #plt.scatter(recall[ix], precision[ix], marker='o', color='black', label='Best')
    # axis labels
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.legend()
    plt.title('PR curve '+str(type))
    # show the plot
    plt.show()
    plt.close()

    # Making the Confusion Matrix [tn, fp, fn, tp]
    from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
    tn, fp, fn, tp = confusion_matrix(y_true, (y_pred >= thresholds[ix])).ravel()
    N = tn+fp+fn+tp
    S = (tp+fn)/N
    P = (tp+fp)/N
    MCC = ((tp/N)-S*P)/np.sqrt(P*S*(1.-S)*(1.-P))
    accuracy = (tp+tn)/(tn+fp+fn+tp)
    sensitivity = (tp)/(tp+fn)
    specificity = (tn)/(tn+fp)

    print("CM: [%d %d %d %d]" %(tn, fp, fn, tp))
    print("Accuracy: %0.3f" %(accuracy))
    print("Sensitivity: %0.3f" %(sensitivity))
    print("Specificity: %0.3f" %(specificity))
    print("MCC: %0.3f" %(MCC))

    # from sklearn.metrics import plot_confusion_matrix, confusion_matrix
    from matplotlib import rcParams
    #rcParams['font.sans-serif'] = ['calibri']  # You can change 'Arial' to any other desired font
    rcParams['font.size'] = 14
    tn, fp, fn, tp = metrics[-6:-2]
    conf_matrix = [[tn, fp],
                [fn, tp]]
    conf_matrix =confusion_matrix(y_true, (y_pred >= thresholds[ix]))
    # Plot the confusion matrix
    labels = ['Novice', 'Expert']
    # plot_confusion_matrix(conf_matrix, display_labels=labels, cmap=plt.cm.Blues, normalize=None)

    disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix,display_labels=labels)
    disp.plot()
    plt.title('CM '+str(type))
    plt.show()
    plt.close()

    return None

In [None]:
plot_model_performace(initial_model_metrics, type= 'All connectivity')


In [None]:
plot_model_performace(final_model_metrics, type= 'Selected connectivity')