In [26]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
import xgboost as xgb
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import Normalizer


In [27]:
%matplotlib inline
import matplotlib.pyplot as plt
import time
import numpy as np
import pandas as pd
import seaborn as sb

In [28]:
data = pd.read_csv("selected_features_final.csv")
X = data.iloc[:, :-1]
y = data.iloc[:, -1]
X = np.asarray(X)
y = np.asarray(y)

In [29]:
y.shape


(64,)

In [30]:
# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [31]:
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier
rf_classifier.fit(X_train, y_train)

# Make predictions on the test set
y_pred = rf_classifier.predict(X_test)

# Evaluate the accuracy of the classifier
rf_accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", rf_accuracy)

Accuracy: 0.7692307692307693


In [32]:
# Create an SVM classifier
svm_classifier = SVC(kernel='rbf', random_state=42)

# Train the classifier
svm_classifier.fit(X_train, y_train)

# Make predictions on the test set
y_pred = svm_classifier.predict(X_test)

# Evaluate the accuracy of the classifier
svm_accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", svm_accuracy)

Accuracy: 0.6923076923076923


In [33]:
gb_classifier = xgb.XGBClassifier(random_state=42)

# Train the classifier
gb_classifier.fit(X_train, y_train)

# Make predictions on the test set
y_pred = gb_classifier.predict(X_test)

# Evaluate the accuracy of the classifier
gb_accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", gb_accuracy)


Accuracy: 0.6153846153846154


In [34]:
# Create a logistic regression classifier
logreg_classifier = LogisticRegression()
logreg_classifier = LogisticRegression(max_iter=1000)
# Train the classifier
logreg_classifier.fit(X_train, y_train)


# Make predictions on the test set
y_pred = logreg_classifier.predict(X_test)

# Evaluate the accuracy of the classifier
lr_accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", lr_accuracy)

Accuracy: 0.8461538461538461


In [35]:
from keras.models import Sequential, load_model
from keras.layers import Dense

In [36]:
norm = Normalizer(norm='max')
norm.fit(X_train)
X_train = norm.transform(X_train)

In [56]:
from keras.callbacks import ModelCheckpoint
model1 = Sequential()
model1.add(Dense(512, activation='relu', input_dim=8))
model1.add(Dense(512, activation='relu'))
model1.add(Dense(1, activation='sigmoid'))
model1.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# Create a ModelCheckpoint callback to save the best model
checkpoint = ModelCheckpoint('best_model.h5', monitor='val_accuracy', save_best_only=True, mode='max', verbose=1)

# Train the model with the callback
model1.fit(X_train, y_train, epochs=1000, batch_size=32, validation_data=(X_test, y_test), callbacks=[checkpoint])

# Load the best model
model = load_model('best_model.h5')

# Make predictions on the test set
y_pred = model1.predict(X_test)
y_pred = (y_pred > 0.5).astype(int)  # Convert probabilities to binary predictions

# Evaluate the accuracy of the model
ann_accuracy = accuracy_score(y_test, y_pred)
print("Best Accuracy:", ann_accuracy)

Epoch 1/1000
Epoch 1: val_accuracy improved from -inf to 0.46154, saving model to best_model.h5
Epoch 2/1000
Epoch 2: val_accuracy improved from 0.46154 to 0.61538, saving model to best_model.h5
Epoch 3/1000
Epoch 3: val_accuracy improved from 0.61538 to 0.69231, saving model to best_model.h5
Epoch 4/1000
Epoch 4: val_accuracy did not improve from 0.69231
Epoch 5/1000
Epoch 5: val_accuracy did not improve from 0.69231
Epoch 6/1000
Epoch 6: val_accuracy did not improve from 0.69231
Epoch 7/1000
Epoch 7: val_accuracy did not improve from 0.69231
Epoch 8/1000
Epoch 8: val_accuracy did not improve from 0.69231
Epoch 9/1000
Epoch 9: val_accuracy did not improve from 0.69231
Epoch 10/1000
Epoch 10: val_accuracy did not improve from 0.69231
Epoch 11/1000
Epoch 11: val_accuracy did not improve from 0.69231
Epoch 12/1000
Epoch 12: val_accuracy did not improve from 0.69231
Epoch 13/1000
Epoch 13: val_accuracy did not improve from 0.69231
Epoch 14/1000
Epoch 14: val_accuracy did not improve from 

In [60]:
from sklearn.metrics import precision_score, recall_score, roc_auc_score
from sklearn.svm import SVC

# Make predictions on the test set
predicted_labels_rf = rf_classifier.predict(X_test)
predicted_labels_gb = gb_classifier.predict(X_test)
predicted_labels_svm = svm_classifier.predict(X_test)
predicted_labels_lr = logreg_classifier.predict(X_test)
predicted_labels_nn = model1.predict(X_test)




In [61]:
# Calculate precision for each model
precision_rf = precision_score(y_test, predicted_labels_rf)
precision_gb = precision_score(y_test, predicted_labels_gb)
precision_svm = precision_score(y_test, predicted_labels_svm)
precision_lr = precision_score(y_test, predicted_labels_lr)
precision_nn = precision_score(y_test, predicted_labels_nn)

In [62]:
# Calculate recall for each model
recall_rf = recall_score(y_test, predicted_labels_rf)
recall_gb = recall_score(y_test, predicted_labels_gb)
recall_svm = recall_score(y_test, predicted_labels_svm)
recall_lr = recall_score(y_test, predicted_labels_lr)
recall_nn = recall_score(y_test, predicted_labels_nn)

In [71]:
# Calculate predicted probabilities for each model
svm_classifier = SVC(probability=True)
svm_classifier.fit(X_train, y_train)
probabilities_rf = rf_classifier.predict_proba(X_test)[:, 1]
probabilities_gb = gb_classifier.predict_proba(X_test)[:, 1]
probabilities_svm = svm_classifier.predict_proba(X_test)[:, 1]
probabilities_lr = logreg_classifier.predict_proba(X_test)[:, 1]
probabilities_ann = model1.predict(X_test)




In [72]:
# Calculate AUC for each model
auc_rf = roc_auc_score(y_test, probabilities_rf)
auc_gb = roc_auc_score(y_test, probabilities_gb)
auc_svm = roc_auc_score(y_test, probabilities_svm)
auc_lr = roc_auc_score(y_test, probabilities_lr)
auc_nn = roc_auc_score(y_test, probabilities_ann)

In [75]:

models = ['Random Forest', 'Gradient Boost', 'SVM', 'Logistic Regression', 'Neural Network']
precision_scores = [precision_rf, precision_gb, precision_svm, precision_lr, precision_nn]
recall_scores = [recall_rf, recall_gb, recall_svm, recall_lr, recall_nn]
auc_scores = [auc_rf, auc_gb, auc_svm, auc_lr, auc_nn]
accuracy_score = [rf_accuracy, gb_accuracy, svm_accuracy, lr_accuracy, ann_accuracy]

In [76]:
# Print the performance scores
for i in range(len(models)):
    print(f"Performance scores for {models[i]}:")
    print("Precision:", precision_scores[i])
    print("Recall:", recall_scores[i])
    print("AUC:", auc_scores[i])
    print("Accuracy", accuracy_score[i])
    print()

Performance scores for Random Forest:
Precision: 0.6666666666666666
Recall: 1.0
AUC: 0.6904761904761905
Accuracy 0.7692307692307693

Performance scores for Gradient Boost:
Precision: 0.5555555555555556
Recall: 0.8333333333333334
AUC: 0.5714285714285714
Accuracy 0.6153846153846154

Performance scores for SVM:
Precision: 0.6666666666666666
Recall: 0.6666666666666666
AUC: 0.5
Accuracy 0.6923076923076923

Performance scores for Logistic Regression:
Precision: 0.75
Recall: 1.0
AUC: 0.9047619047619048
Accuracy 0.8461538461538461

Performance scores for Neural Network:
Precision: 0.46153846153846156
Recall: 1.0
AUC: 0.5
Accuracy 0.46153846153846156



In [79]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score


# Perform cross-validation
scores = cross_val_score(logreg_classifier, X, y, cv=5, scoring='accuracy')

print("Cross-Validation Scores:", scores)

# Compute mean accuracy
mean_accuracy = scores.mean()
print("Mean Accuracy:", mean_accuracy)

Cross-Validation Scores: [0.30769231 0.84615385 0.38461538 0.61538462 0.66666667]
Mean Accuracy: 0.5641025641025641
