In [103]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn import svm
from sklearn.datasets import make_blobs
from sklearn.inspection import DecisionBoundaryDisplay
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import ElasticNet
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import xgboost as xgb

In [168]:
# Load in your data here
data = pd.read_csv("anxietyhc_gammaPSD.csv") #file name here


In [169]:
# Data preprocessing
X = data.drop('19', axis=1)
y = data['19']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

In [170]:
# PCA data
from sklearn.decomposition import PCA
pca = PCA(0.95)
X_pca = pca.fit_transform(X)
X_pca.shape
X_train_pca, X_test_pca, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=30)

# SVM Values

In [171]:
svclassifier = SVC(kernel='linear',probability=True)
svclassifier.fit(X_train, y_train)
y_pred = svclassifier.predict(X_test)
print('SVM accuracy:', svclassifier.score(X_test, y_test))
print('SVM classification report:\n', classification_report(y_test, y_pred))
y_scores = svclassifier.predict_proba(X_test)[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, y_scores)
# Calculate AUC
roc_auc = auc(fpr, tpr)
print('SVM AUC value:', roc_auc)
newsvm=SVC(kernel='linear', probability=True)
newsvm.fit(X_train_pca, y_train)
y_pred = newsvm.predict(X_test_pca)
print('\nSVM accuracy for PCA:', newsvm.score(X_test_pca, y_test))
print('SVM classification report for PCA:\n', classification_report(y_test, y_pred))
y_scores = newsvm.predict_proba(X_test_pca)[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, y_scores)
roc_auc = auc(fpr, tpr)
print('SVM AUC value for PCA:', roc_auc)


SVM accuracy: 0.5365853658536586
SVM classification report:
               precision    recall  f1-score   support

           0       0.50      0.74      0.60        19
           1       0.62      0.36      0.46        22

    accuracy                           0.54        41
   macro avg       0.56      0.55      0.53        41
weighted avg       0.56      0.54      0.52        41

SVM AUC value: 0.5478468899521531

SVM accuracy for PCA: 0.5853658536585366
SVM classification report for PCA:
               precision    recall  f1-score   support

           0       0.58      0.37      0.45        19
           1       0.59      0.77      0.67        22

    accuracy                           0.59        41
   macro avg       0.58      0.57      0.56        41
weighted avg       0.58      0.59      0.57        41

SVM AUC value for PCA: 0.42822966507177035


# KNN Values

In [172]:
knn = KNeighborsClassifier(n_neighbors=10)
knn.fit(X_train, y_train)
print('KNN accuracy:', knn.score(X_test, y_test))
y_pred = knn.predict(X_test)
print('KNN classification report:\n',classification_report(y_test, y_pred))
y_scores = knn.predict_proba(X_test)[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, y_scores)
roc_auc = auc(fpr, tpr)
print('KNN AUC value:', roc_auc)
knn = KNeighborsClassifier(n_neighbors=10)
knn.fit(X_train_pca, y_train)
print('\nKNN accuracy for PCA:', knn.score(X_test_pca, y_test))
y_pred = knn.predict(X_test_pca)
print('KNN classification report for PCA\n', classification_report(y_test, y_pred))
y_scores = knn.predict_proba(X_test_pca)[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, y_scores)
roc_auc = auc(fpr, tpr)
print('KNN AUC value for PCA',roc_auc)

KNN accuracy: 0.4146341463414634
KNN classification report:
               precision    recall  f1-score   support

           0       0.42      0.74      0.54        19
           1       0.38      0.14      0.20        22

    accuracy                           0.41        41
   macro avg       0.40      0.44      0.37        41
weighted avg       0.40      0.41      0.36        41

KNN AUC value: 0.4820574162679426

KNN accuracy for PCA: 0.4878048780487805
KNN classification report for PCA
               precision    recall  f1-score   support

           0       0.46      0.58      0.51        19
           1       0.53      0.41      0.46        22

    accuracy                           0.49        41
   macro avg       0.49      0.49      0.49        41
weighted avg       0.50      0.49      0.48        41

KNN AUC value for PCA 0.5


# Random Forest Values

In [173]:
model = RandomForestClassifier(n_estimators=40)
model.fit(X_train, y_train)
print('Random Forest accuracy:', model.score(X_test, y_test))
y_pred = model.predict(X_test)
print('Random Forest classification report\n',classification_report(y_test, y_pred))
y_scores = model.predict_proba(X_test)[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, y_scores)
roc_auc = auc(fpr, tpr)
print('Random Forest AUC value:', roc_auc)
pmodel = RandomForestClassifier(n_estimators=40)
pmodel.fit(X_train_pca, y_train)
print('\nRandom Forest accuracy for PCA:', pmodel.score(X_test_pca, y_test))
y_pred = pmodel.predict(X_test_pca)
print('Random Forest classification report for PCA:\n',classification_report(y_test, y_pred))
y_scores = pmodel.predict_proba(X_test_pca)[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, y_scores)
roc_auc = auc(fpr, tpr)
print('Random Forest AUC value for PCA:', roc_auc)

Random Forest accuracy: 0.4878048780487805
Random Forest classification report
               precision    recall  f1-score   support

           0       0.45      0.53      0.49        19
           1       0.53      0.45      0.49        22

    accuracy                           0.49        41
   macro avg       0.49      0.49      0.49        41
weighted avg       0.49      0.49      0.49        41

Random Forest AUC value: 0.46889952153110054

Random Forest accuracy for PCA: 0.5121951219512195
Random Forest classification report for PCA:
               precision    recall  f1-score   support

           0       0.48      0.63      0.55        19
           1       0.56      0.41      0.47        22

    accuracy                           0.51        41
   macro avg       0.52      0.52      0.51        41
weighted avg       0.52      0.51      0.51        41

Random Forest AUC value for PCA: 0.5669856459330144


# Logistic Regression values

In [174]:
model = LogisticRegression()
model.fit(X_train, y_train)
print('Logistic Regression accuracy:', model.score(X_test, y_test))
y_pred = model.predict(X_test)
print('Logistic Regression classification report:\n',classification_report(y_test, y_pred))
y_scores = model.predict_proba(X_test)[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, y_scores)
roc_auc = auc(fpr, tpr)
print('Logistic Regression AUC value:',roc_auc)
pmodel = LogisticRegression()
pmodel.fit(X_train_pca, y_train)
print('\nLogistic Regression accuracy for PCA:',pmodel.score(X_test_pca, y_test))
y_pred = pmodel.predict(X_test_pca)
print('Logistic Regression classification report for PCA:\n',classification_report(y_test, y_pred))
y_scores = pmodel.predict_proba(X_test_pca)[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, y_scores)
roc_auc = auc(fpr, tpr)
print('Logistic Regression AUC value for PCA',roc_auc)

Logistic Regression accuracy: 0.5853658536585366
Logistic Regression classification report:
               precision    recall  f1-score   support

           0       0.54      0.74      0.62        19
           1       0.67      0.45      0.54        22

    accuracy                           0.59        41
   macro avg       0.60      0.60      0.58        41
weighted avg       0.61      0.59      0.58        41

Logistic Regression AUC value: 0.5311004784688995

Logistic Regression accuracy for PCA: 0.6341463414634146
Logistic Regression classification report for PCA:
               precision    recall  f1-score   support

           0       0.62      0.53      0.57        19
           1       0.64      0.73      0.68        22

    accuracy                           0.63        41
   macro avg       0.63      0.63      0.63        41
weighted avg       0.63      0.63      0.63        41

Logistic Regression AUC value for PCA 0.6267942583732058


# Elastic Net Values

In [175]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)
elastic_net = ElasticNet()
elastic_net.fit(X_train, y_train)
y_pred = elastic_net.predict(X_test)
print('Mean absolute error:',mean_absolute_error(y_test, y_pred))
print('Mean squared error:', mean_squared_error(y_test, y_pred))
print('r2 score:', r2_score(y_test, y_pred))
pcaX_train = scaler.fit_transform(X_train_pca)
pcaX_test = scaler.fit_transform(X_test_pca)
elastic_net = ElasticNet()
elastic_net.fit(pcaX_train, y_train)
y_pred = elastic_net.predict(pcaX_test)
print('\nMean absolute error for PCA:',mean_absolute_error(y_test, y_pred))
print('Mean squared error for PCA:', mean_squared_error(y_test, y_pred))
print('r2 score for PCA:', r2_score(y_test, y_pred))

Mean absolute error: 0.4979548553249507
Mean squared error: 0.2487360751852957
r2 score: -0.0002998621686174374

Mean absolute error for PCA: 0.4979548553249507
Mean squared error for PCA: 0.2487360751852957
r2 score for PCA: -0.0002998621686174374


# XGB Values

In [176]:
clf = xgb.XGBClassifier(tree_method="hist", early_stopping_rounds=2)
clf.fit(X_train, y_train, eval_set=[(X_test, y_test)])
y_pred = clf.predict(X_test)
print('XGB accuracy:',clf.score(X_test, y_test))
print('XGB Classification report:\n',classification_report(y_test, y_pred))
y_scores = clf.predict_proba(X_test)[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, y_scores)
roc_auc = auc(fpr, tpr)
print('XGB AUC value:',roc_auc)
pxgb = xgb.XGBClassifier(tree_method="hist", early_stopping_rounds=2)
pxgb.fit(X_train_pca, y_train, eval_set=[(X_test_pca, y_test)])
print('\nXGB accuracy for PCA:', pxgb.score(X_test_pca, y_test))
y_pred = pxgb.predict(X_test_pca)
print('XGB classification report for PCA:\n',classification_report(y_test, y_pred))
y_scores = pxgb.predict_proba(X_test_pca)[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, y_scores)
roc_auc = auc(fpr, tpr)
print('XBG AUC for PCA:', roc_auc)

[0]	validation_0-logloss:0.69567
[1]	validation_0-logloss:0.69094
[2]	validation_0-logloss:0.69261
[3]	validation_0-logloss:0.70088
XGB accuracy: 0.5609756097560976
XGB Classification report:
               precision    recall  f1-score   support

           0       0.53      0.53      0.53        19
           1       0.59      0.59      0.59        22

    accuracy                           0.56        41
   macro avg       0.56      0.56      0.56        41
weighted avg       0.56      0.56      0.56        41

XGB AUC value: 0.5610047846889952
[0]	validation_0-logloss:0.70908
[1]	validation_0-logloss:0.72372
[2]	validation_0-logloss:0.74988

XGB accuracy for PCA: 0.4878048780487805
XGB classification report for PCA:
               precision    recall  f1-score   support

           0       0.44      0.42      0.43        19
           1       0.52      0.55      0.53        22

    accuracy                           0.49        41
   macro avg       0.48      0.48      0.48        