In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import os

# Google Drive setup
from google.colab import drive
drive.mount('/gdrive')
tf.random.set_seed(89)
# File paths
ruta = '/gdrive/MyDrive/TEG-EEG/dataset_bonn/'
ruta_F = ruta + 'F/'
ruta_N = ruta + 'N/'
ruta_O = ruta + 'O/'
ruta_S = ruta + 'S/'
ruta_Z = ruta + 'Z/'

# Subset categories
LABEL_ZO = 0  # Normal subjects (sets Z and O), 200 in total
LABEL_NF = 1  # Abnormal subjects interictal epileptic signals (N and F), 200 in total
LABEL_S = 2   # Abnormal subjects ictal epileptic signals (S), 100 in total

def read_data():
    data_0 = []
    data_1 = []
    data_2 = []
    N = 0  # Number of files

    # Read subset F
    print('Reading subset F... (category 1)')
    for filename in os.listdir(ruta_F):
        datum = np.loadtxt(ruta_F + filename)
        datum = datum[:-1]  # Take the first 4,096 data points, exclude the 4,097th data point
        data_1.append([datum, np.array([LABEL_NF])])
        N += 1

    # Read subset N
    print('Reading subset N... (category 1)')
    for filename in os.listdir(ruta_N):
        datum = np.loadtxt(ruta_N + filename)
        datum = datum[:-1]  # Take the first 4,096 data points, exclude the 4,097th data point
        data_1.append([datum, np.array([LABEL_NF])])
        N += 1

    # Read subset Z
    print('Reading subset Z... (category 0)')
    for filename in os.listdir(ruta_Z):
        datum = np.loadtxt(ruta_Z + filename)
        datum = datum[:-1]  # Take the first 4,096 data points, exclude the 4,097th data point
        data_0.append([datum, np.array([LABEL_ZO])])
        N += 1

    # Read subset O
    print('Reading subset O... (category 0)')
    for filename in os.listdir(ruta_O):
        datum = np.loadtxt(ruta_O + filename)
        datum = datum[:-1]  # Take the first 4,096 data points, exclude the 4,097th data point
        data_0.append([datum, np.array([LABEL_ZO])])
        N += 1

    # Read subset S
    print('Reading subset S... (category 2)')
    for filename in os.listdir(ruta_S):
        datum = np.loadtxt(ruta_S + filename)
        datum = datum[:-1]  # Take the first 4,096 data points, exclude the 4,097th data point
        data_2.append([datum, np.array([LABEL_S])])
        N += 1

    print(f'Reading completed. Total {N} records')

    return data_0, data_1, data_2

def list_to_array(lst):
    N = len(lst)
    nx = 4096
    ny = 1

    array = np.zeros((N, nx+ny), dtype='float32')
    for i, item in enumerate(lst):
        x, y = item
        array[i, :4096] = x
        array[i, 4096:] = y

    return array

def partition_data(train_proportion, val_proportion, test_proportion, data_0, data_1, data_2):
    array_0 = list_to_array(data_0)
    array_1 = list_to_array(data_1)
    array_2 = list_to_array(data_2)

    np.random.shuffle(array_0)
    np.random.shuffle(array_1)
    np.random.shuffle(array_2)

    N_0 = array_0.shape[0]
    N_1 = array_1.shape[0]
    N_2 = array_2.shape[0]

    train_data = np.concatenate((array_0[:int(train_proportion*N_0)],
                                 array_1[:int(train_proportion*N_1)],
                                 array_2[:int(train_proportion*N_2)]), axis=0)

    val_data = np.concatenate((array_0[int(train_proportion*N_0):int((train_proportion+val_proportion)*N_0)],
                               array_1[int(train_proportion*N_1):int((train_proportion+val_proportion)*N_1)],
                               array_2[int(train_proportion*N_2):int((train_proportion+val_proportion)*N_2)]), axis=0)

    test_data = np.concatenate((array_0[int((train_proportion+val_proportion)*N_0):],
                                array_1[int((train_proportion+val_proportion)*N_1):],
                                array_2[int((train_proportion+val_proportion)*N_2):]), axis=0)

    # Shuffle the data again for good measure
    np.random.shuffle(train_data)
    np.random.shuffle(val_data)
    np.random.shuffle(test_data)

    return train_data, val_data, test_data

def create_autoencoder_model(input_dim, encoding_dim):
    # Encoder model
    # inputs = tf.keras.Input(shape=(input_dim,))
    # encoder_layer1 = tf.keras.layers.Dense(256, activation='relu')(inputs)
    # encoder_layer2 = tf.keras.layers.Dense(encoding_dim, activation='relu')(encoder_layer1)
    # encoder = tf.keras.Model(inputs, encoder_layer2, name='encoder')

    # # Decoder model
    # decoder_layer1 = tf.keras.layers.Dense(256, activation='relu')(encoder_layer2)
    # decoder_output = tf.keras.layers.Dense(input_dim, activation='sigmoid')(decoder_layer1)
    # decoder = tf.keras.Model(encoder_layer2, decoder_output, name='decoder')

    # # Autoencoder model
    # autoencoder_output = decoder(encoder(inputs))
    # autoencoder_model = tf.keras.Model(inputs, autoencoder_output, name='autoencoder')
    # autoencoder_model.compile(optimizer='adam', loss='mean_squared_error')

    # return autoencoder_model, encoder


    ## Encoder stage 1
    # convolutional layer 1
    print(input_dim)
    inputs = tf.keras.Input(shape=(input_dim,))
    print(inputs)


    encoder_layer1 = tf.keras.layers.Reshape((input_dim, 1))(inputs)
    encoder_layer1 = tf.keras.layers.Conv1D(16, 10, activation='relu', padding='same')(encoder_layer1)

    # bach normalization layer 1
    encoder_layer1 = tf.keras.layers.BatchNormalization()(encoder_layer1)

    # Activation layer 1
    encoder_layer1 = tf.keras.layers.Activation('relu')(encoder_layer1)

    # Pooling Layer 1 
    encoder_layer1 = tf.keras.layers.MaxPooling1D(2, padding='same')(encoder_layer1)

    ## Encoder stage 2
    # convolutional layer 2
    encoder_layer2 = tf.keras.layers.Conv1D(8, 10, activation='relu', padding='same')(encoder_layer1)

    # bach normalization layer 2
    encoder_layer2 = tf.keras.layers.BatchNormalization()(encoder_layer2)

    # Activation layer 2
    encoder_layer2 = tf.keras.layers.Activation('relu')(encoder_layer2)

    # Pooling Layer 2
    encoder_layer2 = tf.keras.layers.MaxPooling1D(2, padding='same')(encoder_layer2)


    ## Encoder stage 3
    # convolutional layer 3
    encoder_layer3 = tf.keras.layers.Conv1D(8, 10, activation='relu', padding='same')(encoder_layer2)

    # bach normalization layer 3
    encoder_layer3 = tf.keras.layers.BatchNormalization()(encoder_layer3)

    # Activation layer 3
    encoder_layer3 = tf.keras.layers.Activation('relu')(encoder_layer3)

    # Pooling Layer 3
    encoder_layer3 = tf.keras.layers.MaxPooling1D(2, padding='same')(encoder_layer3)

    encoder = tf.keras.layers.Dense(128, activation = 'relu',
                                name='bottleneck')(encoder_layer3)

    encoder = tf.keras.layers.ActivityRegularization(l1=0.004)(encoder)

    encoder = tf.keras.layers.Dropout(0.05)(encoder)

    encoder = tf.keras.Model(inputs, encoder_layer3, name='encoder')


    ## Decoder layer 1
    # Convolutional layer 1

    decoder_layer1 = tf.keras.layers.Conv1D(8, 10, activation='relu', padding='same')(encoder_layer3)

    # barch normalization layer 1
    decoder_layer1 = tf.keras.layers.BatchNormalization()(decoder_layer1)

    # Activation layer 1

    decoder_layer1 = tf.keras.layers.Activation('relu')(decoder_layer1)

    # Pooling Layer 1
    decoder_layer1 = tf.keras.layers.UpSampling1D(2)(decoder_layer1)

    ## Decoder layer 2
    # Upsampling layer 2

    decoder_layer2 = tf.keras.layers.UpSampling1D(2)(decoder_layer1)

    # Convolutional layer 2
    decoder_layer2 = tf.keras.layers.Conv1D(8, 10, activation='relu', padding='same')(decoder_layer2)

    # bach normalization layer 2
    decoder_layer2 = tf.keras.layers.BatchNormalization()(decoder_layer2)

    # Activation layer 2
    decoder_layer2 = tf.keras.layers.Activation('relu')(decoder_layer2)

    ## Decoder layer 3 
    # Upsampling layer 3

    decoder_layer3 = tf.keras.layers.UpSampling1D(2)(decoder_layer2)
    
    # Convolutional layer 3
    decoder_layer3 = tf.keras.layers.Conv1D(16, 10, activation='relu', padding='same')(decoder_layer3)

    # bach normalization layer 3
    decoder_layer3 = tf.keras.layers.BatchNormalization()(decoder_layer3)

    # Activation layer 3
    decoder_layer3 = tf.keras.layers.Activation('relu')(decoder_layer3)

   # reconstruction layer
    decoder_layer3 = tf.keras.layers.Conv1D(1, 10, activation='sigmoid', padding='same')(decoder_layer3)

    # Reshape layer
    decoder_layer3 = tf.keras.layers.Reshape((input_dim,))(decoder_layer3)

    # dense layer
    # decoder_layer3 = tf.keras.layers.Dense(input_dim, activation='sigmoid')(decoder_layer3)


    decoder_layer3 = tf.keras.layers.Dropout(0.05)(decoder_layer3)

    decoder_layer3 = tf.keras.layers.Dense(128, activation = 'relu',
                                name='bottleneck')(decoder_layer3)



    decoder = tf.keras.Model(encoder_layer3, decoder_layer3, name='decoder')

    ## Autoencoder model
    autoencoder_output = decoder(encoder(inputs))
    autoencoder_model = tf.keras.Model(inputs, autoencoder_output, name='autoencoder')
    autoencoder_model.compile(optimizer='adam', loss='mean_squared_error')
    return autoencoder_model, encoder

# Ventaneo
def ventanear(X,Y):
    X_v = []
    Y_v = []

    wsize = 128
    #nwind = 4096/wsize

    for x, y in zip(X, Y):
        # Ventanear el registro "x"
        # en teoria como son 400 registros por 16 ventaneos da un total 6400
        x_v = np.reshape(x,(32,wsize))      # 16x256
        y_v = np.repeat(y,32).reshape(32,1) # 16x1

        X_v.append(x_v)
        Y_v.append(y_v)

    # Y convertir las listas X_v, Y_v a arreglos NumPy
    X_v = np.vstack(X_v)
    Y_v = np.vstack(Y_v)

    np.random.shuffle(X_v)
    np.random.shuffle(Y_v)

    # flatten lo convierte en 1 dimension

    return X_v, Y_v.flatten()


def train_autoencoder(autoencoder_model, x_train, x_val, epochs, batch_size):
    history = autoencoder_model.fit(x_train, x_train,
                                    validation_data=(x_val, x_val),
                                    epochs=epochs,
                                    batch_size=batch_size,
                                    shuffle=True,
                                    verbose=1)
    return history

def create_encoder_model(input_shape, encoder):
    inputs = tf.keras.Input(shape=input_shape[1:])
    outputs = encoder(inputs)
    model = tf.keras.Model(inputs, outputs, name="encoder_model")
    return model

def create_classifier_model(encoder, num_classes):
    inputs = tf.keras.Input(shape=(encoder.output.shape[1],))
    classifier_layer1 = tf.keras.layers.Dense(128, activation='relu')(inputs)
    classifier_output = tf.keras.layers.Dense(num_classes, activation='softmax')(classifier_layer1)
    model = tf.keras.Model(inputs, classifier_output, name='classifier')
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

def train_classifier(classifier_model, x_train, y_train, x_val, y_val, epochs, batch_size):
    history = classifier_model.fit(x_train, y_train,
                                   validation_data=(x_val, y_val),
                                   epochs=epochs,
                                   batch_size=batch_size,
                                   shuffle=True,
                                   verbose=1)
    return history

def calculate_f1_score(y_true, y_pred):
    # Calculate true positives (TP), false positives (FP), false negatives (FN), and true negatives (TN)
    tp = np.sum((y_true == 1) & (y_pred == 1))
    fp = np.sum((y_true == 0) & (y_pred == 1))
    fn = np.sum((y_true == 1) & (y_pred == 0))
    tn = np.sum((y_true == 0) & (y_pred == 0))

    # Calculate precision and recall
    precision = tp / (tp + fp) if tp + fp > 0 else 0
    recall = tp / (tp + fn) if tp + fn > 0 else 0

    # Calculate F1 score
    f1_score = 2 * (precision * recall) / (precision + recall) if precision + recall > 0 else 0
    return f1_score

def calculate_sensitivity_specificity(y_true, y_pred):
    # Calculate true positives (TP), false positives (FP), false negatives (FN), and true negatives (TN)
    tp = np.sum((y_true == 1) & (y_pred == 1))
    fp = np.sum((y_true == 0) & (y_pred == 1))
    fn = np.sum((y_true == 1) & (y_pred == 0))
    tn = np.sum((y_true == 0) & (y_pred == 0))

    # Calculate sensitivity and specificity
    sensitivity = tp / (tp + fn) if tp + fn > 0 else 0
    specificity = tn / (tn + fp) if tn + fp > 0 else 0
    return sensitivity, specificity

def normalization(X, mu=None, sigma=None):
  # data normalization

    if mu and sigma:
        X_s = (X-mu)/sigma
    else:
        mu = np.mean(X)
        sigma = np.std(X)
        X_s = (X - mu)/sigma

    return X_s, mu, sigma



In [None]:
def main():
    # Google Drive setup
    from google.colab import drive
    drive.mount('/gdrive')

    # File paths
    ruta = '/gdrive/MyDrive/dataset_bonn/'
    ruta_F = ruta + 'F/'
    ruta_N = ruta + 'N/'
    ruta_O = ruta + 'O/'
    ruta_S = ruta + 'S/'
    ruta_Z = ruta + 'Z/'

    # Read data from file paths
    data_0, data_1, data_2 = read_data()

    # Partition data into train, validation, and test sets
    train_proportion = 0.8
    val_proportion = 0.1
    test_proportion = 0.1
    train_data, val_data, test_data = partition_data(train_proportion, val_proportion, test_proportion, data_0, data_1, data_2)

    # Convert data into the required format (x_data and y_data)
    x_train = train_data[:, :-1]
    y_train = train_data[:, -1].astype(int)
    x_val = val_data[:, :-1]
    y_val = val_data[:, -1].astype(int)
    x_test = test_data[:, :-1]
    y_test = test_data[:, -1].astype(int)

    # Normalize data 
    x_train, mu, sigma = normalization(x_train)
    x_val, _, _ = normalization(x_val, mu, sigma)
    x_test, _, _ = normalization(x_test, mu, sigma)

    # Ventaneo
    x_train, y_train = ventanear(x_train,y_train)
    x_val, y_val = ventanear(x_val,y_val)
    x_test, y_test = ventanear(x_test,y_test)

    # Define the input and encoding dimensions for the autoencoder
    input_dim = x_train.shape[1]
    encoding_dim = 32  # You can adjust this dimension as needed

    # Create and train the Autoencoder model
    autoencoder_model, encoder = create_autoencoder_model(128, encoding_dim)
    autoencoder_model.summary()
    train_autoencoder(autoencoder_model, x_train, x_val, epochs=100, batch_size=128)

    # # Encode the data using the trained autoencoder
    # x_train_encoded = encoder.predict(x_train)
    # x_val_encoded = encoder.predict(x_val)
    # x_test_encoded = encoder.predict(x_test)

    # # Define the number of classes for the classifier
    # num_classes = len(np.unique(y_train))

    # # Create one-hot encoded labels for the classifier
    # y_train_onehot = tf.keras.utils.to_categorical(y_train, num_classes)
    # y_val_onehot = tf.keras.utils.to_categorical(y_val, num_classes)
    # y_test_onehot = tf.keras.utils.to_categorical(y_test, num_classes)

    # # Create and train the Classifier model
    # classifier_model = create_classifier_model(encoder, num_classes)
    # classifier_model.summary()
    # train_classifier(classifier_model, x_train_encoded, y_train_onehot, x_val_encoded, y_val_onehot, epochs=100, batch_size=64)

    # # Evaluate the models on test data and calculate metrics
    # y_pred = classifier_model.predict(x_test_encoded)
    # y_pred_labels = np.argmax(y_pred, axis=1)
    # y_test_labels = np.argmax(y_test_onehot, axis=1)

    # f1score = calculate_f1_score(y_test_labels, y_pred_labels)
    # sensitivity, specificity = calculate_sensitivity_specificity(y_test_labels, y_pred_labels)


    # num_thresholds = 100
    # thresholds = np.linspace(0, 1, num_thresholds)
    # tpr_values = []
    # fpr_values = []

    # for threshold in thresholds:
    #      y_pred_thresholded = (y_pred[:, 1] >= threshold).astype(int)
    #      tpr = np.sum((y_test_labels == 1) & (y_pred_thresholded == 1)) / np.sum(y_test_labels == 1)
    #      fpr = np.sum((y_test_labels == 0) & (y_pred_thresholded == 1)) / np.sum(y_test_labels == 0)
    #      tpr_values.append(tpr)
    #      fpr_values.append(fpr)

    #       # Plot the ROC curve
    # plt.figure()
    # plt.plot(fpr_values, tpr_values, color='darkorange', lw=2, label='ROC curve')
    # plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    # plt.xlim([0.0, 1.0])
    # plt.ylim([0.0, 1.05])
    # plt.xlabel('False Positive Rate')
    # plt.ylabel('True Positive Rate')
    # plt.title('Receiver Operating Characteristic')
    # plt.legend(loc="lower right")
    # plt.show()
    # # Display the calculated metrics in the command prompt
    # print("Sensitivity: {:.4f}".format(sensitivity))
    # print("Specificity: {:.4f}".format(specificity))
    # print("F1 Score: {:.4f}".format(f1score))

if __name__ == "__main__":
    main()