In [1]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score,
    confusion_matrix,
    make_scorer,
    recall_score,
    roc_auc_score,
)
from sklearn.model_selection import cross_val_score, train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

seed = 42


In [2]:
SVC().get_params()

{'C': 1.0,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'scale',
 'kernel': 'rbf',
 'max_iter': -1,
 'probability': False,
 'random_state': None,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

In [3]:
df = pd.read_csv("data.csv", index_col=0)
df1 = df[df["y"] == 1]
df2 = df[df["y"] == 2]
df3 = df[df["y"] == 3]
df4 = df[df["y"] == 4]
df5 = df[df["y"] == 5]

df12 = pd.concat((df1, df2), axis=0)
df13 = pd.concat((df1, df3), axis=0)
df14 = pd.concat((df1, df4), axis=0)
df15 = pd.concat((df1, df5), axis=0)

labels12 = df12.pop("y")
labels13 = df13.pop("y")
labels14 = df14.pop("y")
labels15 = df15.pop("y")
df.shape

(500, 4095)

**CROSS-VALIDATION**

In [4]:
def cross_validate(estimator, X, y, k, neg):
    specificity = make_scorer(recall_score, pos_label=1)
    sensitivity = make_scorer(recall_score, pos_label=neg)
    metrics = {
        "Specificity": specificity,
        "Sensitivity": sensitivity,
        "Accuracy": make_scorer(accuracy_score),
        "ROC-AUC": make_scorer(roc_auc_score),
    }
    for name, func in metrics.items():
        results = cross_val_score(estimator=estimator, X=X, y=y, cv=k, scoring=func)
        mu = results.mean()
        sigma = results.std()
        print(f"{name}: {mu:.3f} +- {sigma:.3f}")
    print()


## **SVM**

**FITTING MODELS**

In [5]:
def get_principal_components(X, threshold=0.9):
  l, v = np.linalg.eig(X.corr())
  eigen = pd.DataFrame(v.real.T)
  eigen["l"] = l.real
  eigen.sort_values("l", ascending=False, inplace=True)
  cumulative = np.cumsum(eigen["l"] / sum(eigen["l"]))
  n_comp = sum(cumulative <= threshold) + 1
  return eigen.head(n_comp)

def pca(X, components):
  transformed = np.dot(X, components.drop("l", axis=1).T)
  transfomed = (transformed - np.mean(transformed, axis=0)) / np.std(transformed, axis=0)
  return transformed

In [6]:
def svc_pca_classification(df, labels, neg_class, **kwargs):
  # Train-test split
  X_train, X_test, y_train, y_test = train_test_split(df, labels, test_size=0.25, random_state=seed)
  
  # PCA on train
  components = get_principal_components(X_train) 
  X_train = pca(X_train, components)
  X_test = pca(X_test, components)
  

  # Train the models
  svc = SVC()
  clf = GridSearchCV(svc, kwargs.get("params"), scoring=make_scorer(recall_score, pos_label=neg_class), cv=10)
  clf.fit(X_train, y_train)
  print(clf.best_params_)
  # Cross Validation
  # print(f"Linear standardized SVC 1v{neg_class}")
  # cross_validate(svc_linear, X_train_scaled, y_train, 10, neg_class)

In [7]:
params = [
    {"C": [1, 5, 10, 50, 100], "kernel": ["linear", "gaussian"]},
]


svc_pca_classification(df12, labels12, 2, params=params)
# svc_pca_classification(df13, labels13, 3, C=5)
# svc_pca_classification(df14, labels14, 4, C=5)
# svc_pca_classification(df15, labels15, 5, C=5)

**RANDOM FOREST**

In [None]:
classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
classifier.fit(X_train, y_train)

**CROSSVALIDATION FOR SVM**

In [None]:
accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10)
print("Accuracy: {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))

**CROSS VALIDATION FOR RANDOM FOREST**

In [None]:
accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10)
print("Accuracy: {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))

In [None]:
print(classifier.predict(sc.transform([[ , ]])))

In [None]:
y_pred = classifier.predict(X_test)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

In [None]:
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

**TESTING FOR SVM**

**TESTING FOR RANDOM FOREST**

**BIAS FOR SVM**

**BIAS FOR RANDOM FORST**

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

from google.colab import drive
drive.mount('/content/drive')
%matplotlib inline

seed = 42
# ADASYN
# 5-CV
# sensitivity +- std

In [None]:
df = pd.read_csv("drive/MyDrive/Colab Notebooks/EEG Classifier/data.csv", index_col=0)
df1 = df[df["y"] == 1]
df2 = df[df["y"] == 2]#.sample(frac=0.25, axis=0, random_state=seed)
df3 = df[df["y"] == 3]#.sample(frac=0.25, axis=0, random_state=seed)
df4 = df[df["y"] == 4]#.sample(frac=0.25, axis=0, random_state=seed)
df5 = df[df["y"] == 5]#.sample(frac=0.25, axis=0, random_state=seed)

df12 = pd.concat((df1, df2), axis=0)
df13 = pd.concat((df1, df3), axis=0)
df14 = pd.concat((df1, df4), axis=0)
df15 = pd.concat((df1, df5), axis=0)

labels12 = df12.pop("y")
labels13 = df13.pop("y")
labels14 = df14.pop("y")
labels15 = df15.pop("y")

labels = df.pop("y")
#labels[labels != 1] = 0

#df.shape, labels.shape

In [None]:
t = np.arange(1, 4095, 1)
fig, axes = plt.subplots(5, 1)
axes[0].plot(t, df.iloc[5], color="green")
axes[1].plot(t, df.iloc[4], color="blue")
axes[2].plot(t, df.iloc[3], color="red")
axes[3].plot(t, df.iloc[2], color="orange")
axes[4].plot(t, df.iloc[1], color="purple")

In [None]:
df.iloc[5].idxmax(), df.iloc[4].idxmax(), df.iloc[3].idxmax(), df.iloc[2].idxmax(), df.iloc[1].idxmax(),

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score

In [None]:
def print_metrics(y_true, y_pred, model_name):
  tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
  print(model_name)
  print(f"Sensitivity: {tp / (tp + fn)}")
  print(f"Specificity: {tn / (tn + fp)}")
  print(f"Accuracy: {accuracy_score(y_true, y_pred)}")
  print(f"ROC-AUC: {roc_auc_score(y_true, y_pred)}")
  print()

 **TEST TRAIN SPLIT**


In [None]:
X_train, X_test, y_train, y_test = train_test_split(df, labels, test_size = 0.25, random_state = 0)

In [None]:
print(X_train)

In [None]:
print(X_test)

In [None]:
print(y_train)

In [None]:
print(y_test)

## **SVM**

**FEATURE SCALING**

In [None]:
sc = StandardScaler()
X_train_scaled = sc.fit_transform(X_train)
X_test_scaled = sc.transform(X_test)

**FITTING MODELS**

In [None]:
svc_linear = SVC(kernel="linear", random_state=seed)
svc_linear.fit(X_train, y_train)
y_pred_linear = svc_linear.predict(X_test)
svc_linear.fit(X_train_scaled, y_train)
y_pred_linear_scaled = svc_linear.predict(X_test_scaled)

svc_gaussian = SVC(kernel="sigmoid", random_state=seed)
svc_gaussian.fit(X_train, y_train)
y_pred_gaussian = svc_gaussian.predict(X_test)
svc_gaussian.fit(X_train_scaled, y_train)
y_pred_gaussian_scaled = svc_gaussian.predict(X_test_scaled)

In [None]:
print_metrics(y_test.values, y_pred_linear)
print_metrics(y_test.values, y_pred_linear_scaled)
print_metrics(y_test.values, y_pred_gaussian)
print_metrics(y_test.values, y_pred_gaussian_scaled)

In [None]:
def svc_classification(df, labels, classes):
  X_train, X_test, y_train, y_test = train_test_split(df, labels, test_size = 0.25, random_state = 0)
  
  sc = StandardScaler()
  X_train_scaled = sc.fit_transform(X_train)
  X_test_scaled = sc.transform(X_test)

  svc_linear = SVC(kernel="linear", random_state=seed)
  svc_linear.fit(X_train, y_train)
  y_pred_linear = svc_linear.predict(X_test)
  svc_linear.fit(X_train_scaled, y_train)
  y_pred_linear_scaled = svc_linear.predict(X_test_scaled)

  svc_gaussian = SVC(kernel="sigmoid", random_state=seed)
  svc_gaussian.fit(X_train, y_train)
  y_pred_gaussian = svc_gaussian.predict(X_test)
  svc_gaussian.fit(X_train_scaled, y_train)
  y_pred_gaussian_scaled = svc_gaussian.predict(X_test_scaled)

  print_metrics(y_test.values, y_pred_linear, f"Linear SVC {classes}")
  print_metrics(y_test.values, y_pred_linear_scaled, f"Linear scaled SVC {classes}")
  print_metrics(y_test.values, y_pred_gaussian, f"Gaussian SVC {classes}")
  print_metrics(y_test.values, y_pred_gaussian_scaled, f"Gaussian linear SVC {classes}")

In [None]:
svc_classification(df12, labels12, "1v2")
svc_classification(df13, labels13, "1v3")
svc_classification(df14, labels14, "1v4")
svc_classification(df15, labels15, "1v5")

In [None]:
svc_classification(df, labels, "Full")

**RANDOM FOREST**

In [None]:
classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
classifier.fit(X_train, y_train)

**CROSSVALIDATION FOR SVM**

In [None]:
accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10)
print("Accuracy: {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))

**CROSS VALIDATION FOR RANDOM FOREST**

In [None]:
accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10)
print("Accuracy: {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))

In [None]:
print(classifier.predict(sc.transform([[ , ]])))

In [None]:
y_pred = classifier.predict(X_test)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

In [None]:
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

**TESTING FOR SVM**

**TESTING FOR RANDOM FOREST**

**BIAS FOR SVM**

**BIAS FOR RANDOM FORST**