In [1]:
import pandas as pd
import numpy as np
import pywt
import os
import pickle
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import scipy
from helper import strToNp
from preprocess import 
from readData import readData
from FeatureExtraction import FeatureExtraction

In [2]:
# Prepare Data Files
data_csv = "combined_dataset.csv"
if not os.path.exists(data_csv):
    data = readData(
        r"EOG dataset/3-class/",
        data_csv,
        "combined dataset",
    )

In [3]:
# Load data
data = pd.read_csv(data_csv)

In [4]:
# Split data into features and labels
X = data.iloc[:, 0].apply(lambda x: strToNp(x)).to_list()
y = data.iloc[:, 1].values

In [5]:
# Create Preprocess object
preprocess = Preprocess()
featureExtraction = FeatureExtraction()

In [6]:
# Apply preprocessing
X = [preprocess.fullPreprocess(x) for x in X]

In [7]:
X_Wavelet_features = [featureExtraction.extract_wavelet_features(x) for x in X]
X_Wavelet_features = np.array(X_Wavelet_features)

X_PSD_features = [featureExtraction.extract_psd_features(x) for x in X]
X_PSD_features = np.array(X_PSD_features)


# Check shape of training data
print("Shape of X_Wavelet_features:", X_Wavelet_features.shape)
print("Shape of X_PSD_features:", X_PSD_features.shape)



Shape of X_Wavelet_features: (200, 1, 75, 164)
Shape of X_PSD_features: (200, 25, 2)


In [8]:

# Reshape features to have a compatible shape
X_Wavelet_features = X_Wavelet_features.reshape(len(X_Wavelet_features), -1)
X_PSD_features = X_PSD_features.reshape(len(X_PSD_features), -1)


# Check shape of training data
print("Shape of X_Wavelet_features:", X_Wavelet_features.shape)
print("Shape of X_PSD_features:", X_PSD_features.shape)

Shape of X_Wavelet_features: (200, 12300)
Shape of X_PSD_features: (200, 50)


In [9]:
# Split data into training and testing sets
X_Wavelet_train, X_Wavelet_test, y_train, y_test = train_test_split(
    X_Wavelet_features, y, test_size=0.2, random_state=42
)

X_PSD_train, X_PSD_test, y_train, y_test = train_test_split(
    X_PSD_features, y, test_size=0.2, random_state=42
)

In [10]:
# Scale features
scaler_Wavelet = StandardScaler()
X_Wavelet_train = scaler_Wavelet.fit_transform(X_Wavelet_train)
X_Wavelet_test = scaler_Wavelet.transform(X_Wavelet_test)

scaler_PSD = StandardScaler()
X_PSD_train = scaler_PSD.fit_transform(X_PSD_train)
X_PSD_test = scaler_PSD.transform(X_PSD_test)

In [15]:
# Save Scaler in a pickle file
with open("scaler_Wavelet.pkl", "wb") as file:
    pickle.dump(scaler_Wavelet, file)
    
# Save Scaler in a pickle file
with open("scaler_PSD.pkl", "wb") as file:
    pickle.dump(scaler_PSD, file)

In [12]:
# Train and evaluate SVM classifier
svm = SVC()
svm_params = {
    "C": [0.01, 0.1, 1, 10, 100],
    "gamma": [0.001, 0.01, 0.1, 1, 10],
}  # svm_params = {"C": [0.1, 1, 10], "gamma": [0.1, 1, 10]}
svm_grid = GridSearchCV(svm, svm_params, cv=5)
svm_grid.fit(X_Wavelet_train, y_train)

svm_preds = svm_grid.predict(X_Wavelet_test)
    svm_accuracy = accuracy_score(y_test, svm_preds)
print("SVM accuracy_Wavelet:", svm_accuracy)


# Save SVM model in a pickle file
with open("svm_model_Wavelet.pkl", "wb") as file:
    pickle.dump(svm_grid, file)

SVM accuracy_Wavelet: 0.9


In [39]:
svm = SVC()
svm_params = {
    "C": [0.01, 0.1, 1, 10, 100],
    "gamma": [0.001, 0.01, 0.1, 1, 10],
}  # svm_params = {"C": [0.1, 1, 10], "gamma": [0.1, 1, 10]}
svm_grid = GridSearchCV(svm, svm_params, cv=5)
svm_grid.fit(X_PSD_train, y_train)

svm_preds = svm_grid.predict(X_PSD_test)
svm_accuracy = accuracy_score(y_test, svm_preds)
print("SVM accuracy_PSD:", svm_accuracy)


# Save SVM model in a pickle file
with open("svm_model_PSD.pkl", "wb") as file:
    pickle.dump(svm_grid, file)

SVM accuracy_PSD: 0.575


In [40]:
# Train and evaluate random forest classifier
rf = RandomForestClassifier()
rf_params = {
    "n_estimators": [50, 100, 200, 300, 400],
    "max_depth": [5, 10, 20, 30, 40],
}  # rf_params = {"n_estimators": [100, 200, 300], "max_depth": [5, 10, 15]}
rf_grid = GridSearchCV(rf, rf_params, cv=5)
rf_grid.fit(X_Wavelet_train, y_train)


rf_preds = rf_grid.predict(X_Wavelet_test)
rf_accuracy = accuracy_score(y_test, rf_preds)
print("Random forest accuracy_Wavelet:", rf_accuracy)

# Save random forest model in a pickle file
with open("rf_model_Wavelet.pkl", "wb") as file:
    pickle.dump(rf_grid, file)

Random forest accuracy_Wavelet: 0.85


In [41]:
# Train and evaluate random forest classifier
rf = RandomForestClassifier()
rf_params = {
    "n_estimators": [50, 100, 200, 300, 400],
    "max_depth": [5, 10, 20, 30, 40],
}  # rf_params = {"n_estimators": [100, 200, 300], "max_depth": [5, 10, 15]}
rf_grid = GridSearchCV(rf, rf_params, cv=5)
rf_grid.fit(X_PSD_train, y_train)


rf_preds = rf_grid.predict(X_PSD_test)
rf_accuracy = accuracy_score(y_test, rf_preds)
print("Random forest accuracy_PSD:", rf_accuracy)

# Save random forest model in a pickle file
with open("rf_model_PSD.pkl", "wb") as file:
    pickle.dump(rf_grid, file)

Random forest accuracy_PSD: 0.575
