# Installation of packages

In [None]:
pip install gdown -q

# Download Dataset from google drive

In [None]:
!gdown 1V5B7Bt6aJm0UHbR7cRKBEK8jx7lYPVuX

# Importing packages

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import json
import random
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split


In [None]:
from tensorflow.data.Dataset import from_tensor_slices

In [None]:
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import (Input, Conv1D, BatchNormalization, Dropout, Flatten, Dense)
from tensorflow.keras.regularizers import L2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import CategoricalCrossEntropy
from tensorflow.keras.metrics import TopKCategoricalAccuracy, AUC, Precision, Recall

# Reading Data

In [None]:
eeg = pd.read_csv("eeg-data.csv")

# Data Cleaning

In [None]:
unlabeled_eeg = eeg[eeg["label"] == "unlabeled"]
eeg = eeg.loc[eeg["label"] != "unlabeled"]
eeg = eeg.loc[eeg["label"] != "everyone paired"]

eeg.drop(
    [
        "indra_time",
        "Unnamed: 0",
        "browser_latency",
        "reading_time",
        "attention_esense",
        "meditation_esense",
        "updatedAt",
        "createdAt",
    ],
    axis=1,
    inplace=True,
)

eeg.reset_index(drop=True, inplace=True)
eeg.head()

In [None]:
def convert_string_data_to_values(value_string):
    str_list = json.loads(value_string)
    return str_list


eeg["raw_values"] = eeg["raw_values"].apply(convert_string_data_to_values)

QUALITY_THRESHOLD = 128

eeg = eeg.loc[eeg["signal_quality"] < QUALITY_THRESHOLD]
eeg.head()

In [None]:
print("Before replacing labels")
print(eeg["label"].unique(), "\n")
print(len(eeg["label"].unique()), "\n")


eeg.replace(
    {
        "label": {
            "blink1": "blink",
            "blink2": "blink",
            "blink3": "blink",
            "blink4": "blink",
            "blink5": "blink",
            "math1": "math",
            "math2": "math",
            "math3": "math",
            "math4": "math",
            "math5": "math",
            "math6": "math",
            "math7": "math",
            "math8": "math",
            "math9": "math",
            "math10": "math",
            "math11": "math",
            "math12": "math",
            "thinkOfItems-ver1": "thinkOfItems",
            "thinkOfItems-ver2": "thinkOfItems",
            "video-ver1": "video",
            "video-ver2": "video",
            "thinkOfItemsInstruction-ver1": "thinkOfItemsInstruction",
            "thinkOfItemsInstruction-ver2": "thinkOfItemsInstruction",
            "colorRound1-1": "colorRound1",
            "colorRound1-2": "colorRound1",
            "colorRound1-3": "colorRound1",
            "colorRound1-4": "colorRound1",
            "colorRound1-5": "colorRound1",
            "colorRound1-6": "colorRound1",
            "colorRound2-1": "colorRound2",
            "colorRound2-2": "colorRound2",
            "colorRound2-3": "colorRound2",
            "colorRound2-4": "colorRound2",
            "colorRound2-5": "colorRound2",
            "colorRound2-6": "colorRound2",
            "colorRound3-1": "colorRound3",
            "colorRound3-2": "colorRound3",
            "colorRound3-3": "colorRound3",
            "colorRound3-4": "colorRound3",
            "colorRound3-5": "colorRound3",
            "colorRound3-6": "colorRound3",
            "colorRound4-1": "colorRound4",
            "colorRound4-2": "colorRound4",
            "colorRound4-3": "colorRound4",
            "colorRound4-4": "colorRound4",
            "colorRound4-5": "colorRound4",
            "colorRound4-6": "colorRound4",
            "colorRound5-1": "colorRound5",
            "colorRound5-2": "colorRound5",
            "colorRound5-3": "colorRound5",
            "colorRound5-4": "colorRound5",
            "colorRound5-5": "colorRound5",
            "colorRound5-6": "colorRound5",
            "colorInstruction1": "colorInstruction",
            "colorInstruction2": "colorInstruction",
            "readyRound1": "readyRound",
            "readyRound2": "readyRound",
            "readyRound3": "readyRound",
            "readyRound4": "readyRound",
            "readyRound5": "readyRound",
            "colorRound1": "colorRound",
            "colorRound2": "colorRound",
            "colorRound3": "colorRound",
            "colorRound4": "colorRound",
            "colorRound5": "colorRound",
        }
    },
    inplace=True,
)

print("After replacing labels")
print(eeg["label"].unique())
print(len(eeg["label"].unique()))



In [None]:
num_classes = len(eeg["label"].unique())
print(num_classes)

# Visualization of data

In [None]:
def view_eeg_plot(idx):
    data = eeg.loc[idx, "raw_values"]
    plt.plot(data)
    plt.title(f"Sample random plot")
    plt.show()


view_eeg_plot(7)

In [None]:
plt.bar(range(num_classes), eeg["label"].value_counts())
plt.title("Number of samples per class")
plt.show()

# Data Preprocessing

In [None]:
le = preprocessing.LabelEncoder()  # Generates a look-up table
le.fit(eeg["label"])
eeg["label"] = le.transform(eeg["label"])

In [None]:
scaler = preprocessing.MinMaxScaler()
series_list = [
    scaler.fit_transform(np.asarray(i).reshape(-1, 1)) for i in eeg["raw_values"]
]

labels_list = [i for i in eeg["label"]]

In [None]:
x_train, x_test, y_train, y_test = model_selection.train_test_split(
    series_list, labels_list, test_size=0.15, random_state=42, shuffle=True
)

print(
    f"Length of x_train : {len(x_train)}\nLength of x_test : {len(x_test)}\nLength of y_train : {len(y_train)}\nLength of y_test : {len(y_test)}"
)

x_train = np.asarray(x_train).astype(np.float32).reshape(-1, 512, 1)
y_train = np.asarray(y_train).astype(np.float32).reshape(-1, 1)
y_train = to_categorical(y_train)

x_test = np.asarray(x_test).astype(np.float32).reshape(-1, 512, 1)
y_test = np.asarray(y_test).astype(np.float32).reshape(-1, 1)
y_test = to_categorical(y_test)

In [None]:
BATCH_SIZE = 64
SHUFFLE_BUFFER_SIZE = BATCH_SIZE * 2

In [None]:
train_dataset =  from_tensor_slices((x_train, y_train))
test_dataset =  from_tensor_slices((x_test, y_test))

train_dataset = train_dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
test_dataset = test_dataset.batch(BATCH_SIZE)

In [None]:
vals_dict = {}
for i in eeg["label"]:
    if i in vals_dict.keys():
        vals_dict[i] += 1
    else:
        vals_dict[i] = 1
total = sum(vals_dict.values())

# Formula used - Naive method where
# weight = 1 - (no. of samples present / total no. of samples)
# So more the samples, lower the weight

weight_dict = {k: (1 - (v / total)) for k, v in vals_dict.items()}
print(weight_dict)

# Custom Functions

In [None]:
def plot_history_metrics(history: keras.callbacks.History):
    total_plots = len(history.history)
    cols = total_plots // 2

    rows = total_plots // cols

    if total_plots % cols != 0:
        rows += 1

    pos = range(1, total_plots + 1)
    plt.figure(figsize=(15, 10))
    for i, (key, value) in enumerate(history.history.items()):
        plt.subplot(rows, cols, pos[i])
        plt.plot(range(len(value)), value)
        plt.title(str(key))
    plt.show()

# Define Model

In [None]:
def SignalModel():

    inputLayer = Input(shape=(512, 1))

    layer1 = Conv1D(filters=32, kernel_size=3, strides=2, padding="same")(inputLayer)
    layer1 = BatchNormalization()(layer1)

    layer2 = Conv1D(filters=64, kernel_size=3, strides=2, padding='same', activation='relu')(layer1)
    layer2 = BatchNormalization()(layer2)

    layer3 = Conv1D(filters=128, kernel_size=5, strides=2, padding='same', activation='relu')(layer1)
    layer3 = BatchNormalization()(layer3)

    layer4 = Conv1D(filters=256, kernel_size=5, strides=2, padding='same', activation='relu')(layer1)
    layer4 = BatchNormalization()(layer4)

    layer5 = Conv1D(filters=512, kernel_size=7, strides=2, padding='same', activation='relu')(layer1)
    layer5 = BatchNormalization()(layer5)

    layer6 = Conv1D(filters=1024, kernel_size=7, strides=2, padding='same', activation='relu')(layer1)
    layer6 = BatchNormalization()(layer6)
    layer6 = Dropout(0.2)(layer6)

    layer7 = Flatten()(layer6)
    layer7 = Dense(units=4096, activation="relu")(layer7)
    layer7 = Dropout(0.2)(layer7)

    layer8 = Dense(units=2048, activation="relu", kernel_regularizer=L2())(layer7)
    layer8 = Dropout(0.2)(layer8)

    layer9 = Dense(units=1024, activation="relu", kernel_regularizer=L2())(layer8)
    layer9 = Dropout(0.2)(layer9)

    layer10 = Dense(units=128, activation="relu", kernel_regularizer=L2())(layer9)

    outputLayer = Dense(units=num_classes, activation="softmax")(layer10)

    return Model(inputs=inputLayer, outputs=outputLayer)

In [None]:
model = SignalModel()
model.summary()

In [None]:
callbacks = [
    ModelCheckpoint(filepath="best_model.keras", save_best_onpy=True, monitor="loss"),
    ReduceLROnPlateau(monitor="val_top_k_categorical_accuracy", factor=0.2, patience=2, min_lr=0.000001)
]

optimizer = Adam(learning_rate=0.001, amsgrad=True)
loss = CategoricalCrossEntropy()
metric = [
    TopKCategoricalAccuracy(k=3),
    AUC(),
    Precision(),
    Recall()
]
epochs = 5

# Compile and Fit

In [None]:
model.compile(optimizer=optimizer, loss = loss, metrics = metric)

modelHistory = model.fit(train_dataset, epochs=epochs, callbacks=callbacks, validation_data=test_dataset, class_weight=weight_dict)

# Evaluation

In [None]:
loss, accuracy, auc, precision, recall = model.evaluate(test_data)

print(f"Loss : {loss}")
print(f"Top 3 Categorical Accuracy : {accuracy}")
print(f"Area under the Curve (ROC) : {auc}")
print(f"Precision : {precision}")
print(f"Recall : {recall}")


# Visualization of model

In [None]:
plot_history_metrics(model)

In [None]:
def view_evaluated_eeg_plots(model):
    start_index = random.randint(10, len(eeg))
    end_index = start_index + 11
    data = eeg.loc[start_index:end_index, "raw_values"]
    data_array = [scaler.fit_transform(np.asarray(i).reshape(-1, 1)) for i in data]
    data_array = [np.asarray(data_array).astype(np.float32).reshape(-1, 512, 1)]
    original_labels = eeg.loc[start_index:end_index, "label"]
    predicted_labels = np.argmax(model.predict(data_array, verbose=0), axis=1)
    original_labels = [
        le.inverse_transform(np.array(label).reshape(-1))[0]
        for label in original_labels
    ]
    predicted_labels = [
        le.inverse_transform(np.array(label).reshape(-1))[0]
        for label in predicted_labels
    ]
    total_plots = 12
    cols = total_plots // 3
    rows = total_plots // cols
    if total_plots % cols != 0:
        rows += 1
    pos = range(1, total_plots + 1)
    fig = plt.figure(figsize=(20, 10))
    for i, (plot_data, og_label, pred_label) in enumerate(
        zip(data, original_labels, predicted_labels)
    ):
        plt.subplot(rows, cols, pos[i])
        plt.plot(plot_data)
        plt.title(f"Actual Label : {og_label}\nPredicted Label : {pred_label}")
        fig.subplots_adjust(hspace=0.5)
    plt.show()


view_evaluated_eeg_plots(model)