In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.fft import fft
from scipy.signal import welch
from scipy.stats import entropy
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import StandardScaler
import sklearn.decomposition

# Load EEG data
df = pd.read_csv("EEG-data.csv")


In [5]:
# Filter data and select relevant features
filtered_data = df[df['y'].isin([3, 4])]
X = filtered_data.drop(['Unnamed: 0', 'y'], axis=1)  # Features
y = filtered_data['y']  # Target


In [6]:
# Perform PCA with a high explained variance threshold
pca = sklearn.decomposition.PCA(n_components=0.9999)

# Transform the data to PCA space
X_pca = pca.fit_transform(X)

# Reconstruct the original data from PCA space
X_ori = pca.inverse_transform(X_pca)

# Calculate anomaly scores
anomaly_score = np.abs(X.to_numpy() - X_ori).sum(axis=1)

# Set anomaly detection threshold
threshold = np.quantile(anomaly_score, 0.99)

# Find anomalous IDs
anomalous_ids = np.argwhere(anomaly_score > threshold).squeeze()

# Display anomalous IDs
anomalous_ids


array([33, 37])

In [7]:
# Initialize the StandardScaler
scaler = StandardScaler()

# Fit and transform the features
X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)


In [8]:
# Compute the Fast Fourier Transform (FFT) of the data along the specified axis
fft_data = np.abs(np.fft.fft(X, axis=1))


In [9]:
# Define function to compute PSD features
def compute_psd_features(fft_data, fs):
    psd_features = []
    freq_bands = {'delta': (0.5, 4), 'theta': (4, 8), 'alpha': (8, 13), 'beta': (13, 30), 'gamma': (30, 70)}

    for sample_fft in fft_data:
        psd_values, psd_freqs = welch(sample_fft, fs=fs, nperseg=fs)
        psd_band_values = {band: np.mean(psd_values[(psd_freqs >= f_low) & (psd_freqs < f_high)])
                           for band, (f_low, f_high) in freq_bands.items()}
        psd_features.append(psd_band_values)

    return pd.DataFrame(psd_features)

# Extract frequency-domain features
fs = 1000  # Sampling frequency
psd_df = compute_psd_features(fft_data, fs)


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [10]:
# Fill NaN values in the DataFrame with 0
psd_df.fillna(0, inplace=True)


In [13]:
x_train, x_test, y_train, y_test = train_test_split(psd_df, y, test_size=0.2, random_state=42)

In [14]:
# Define the SVM classifier
svm_classifier = SVC(kernel='linear', C=1)

# Perform cross-validation
accuracy_scores = cross_val_score(svm_classifier, psd_df, y, cv=5)

# Calculate average accuracy
average_accuracy = np.mean(accuracy_scores)
print("Average Accuracy:", average_accuracy)

# Fit the classifier on the entire dataset
svm_classifier.fit(psd_df, y)

# Predict on the last test set
y_pred = svm_classifier.predict(x_test)

# Compute confusion matrix
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()

# Calculate sensitivity and specificity
sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print("Sensitivity:", sensitivity)
print("Specificity:", specificity)


Average Accuracy: 0.875
Sensitivity: 1.0
Specificity: 0.6666666666666666


In [15]:
# Initialize Random Forest classifier
rf = RandomForestClassifier(n_estimators=100)

# Fit the classifier on the training data
rf.fit(x_train, y_train)

# Predict on the test data
y_pred = rf.predict(x_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Compute confusion matrix
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()

# Calculate sensitivity and specificity
sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print("Sensitivity:", sensitivity)
print("Specificity:", specificity)


Accuracy: 0.95
Sensitivity: 0.96
Specificity: 0.9333333333333333


In [16]:
# Initialize Naive Bayes classifier
naive_bayes_classifier = GaussianNB()

# Fit the classifier on the training data
naive_bayes_classifier.fit(x_train, y_train)

# Predict on the test data
y_pred = naive_bayes_classifier.predict(x_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Compute confusion matrix
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()

# Calculate sensitivity and specificity
sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print("Sensitivity:", sensitivity)
print("Specificity:", specificity)

Accuracy: 0.9
Sensitivity: 1.0
Specificity: 0.7333333333333333
