In [7]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression

# Load the features and labels from CSV files
features_df = pd.read_csv('concussion_classification_features.csv')
labels_df = pd.read_csv('concussion_classification_labels.csv')

# Convert dataframes to numpy arrays
X = features_df.values
y = labels_df['Label'].values

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize models
random_forest = RandomForestClassifier(n_estimators=100, random_state=42)
svm = make_pipeline(StandardScaler(), SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42))
gradient_boosting = GradientBoostingClassifier(n_estimators=100, random_state=42)

# Train models
random_forest.fit(X_train, y_train)
svm.fit(X_train, y_train)
gradient_boosting.fit(X_train, y_train)

# Evaluate models
models = {'Random Forest': random_forest, 'SVM': svm, 'Gradient Boosting': gradient_boosting}
for name, model in models.items():
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{name} accuracy: {accuracy:.4f}")
    print(classification_report(y_test, y_pred))
    print()

# Regularized Logistic Regression
logistic_regression = make_pipeline(StandardScaler(), LogisticRegression(penalty='l2', C=1.0, random_state=42))
scores = cross_val_score(logistic_regression, X, y, cv=5)
print(f"Regularized Logistic Regression accuracy: {np.mean(scores):.4f} (+/- {np.std(scores):.4f})")



Random Forest accuracy: 0.6927
              precision    recall  f1-score   support

           0       0.68      0.75      0.71       227
           1       0.71      0.64      0.67       222

    accuracy                           0.69       449
   macro avg       0.69      0.69      0.69       449
weighted avg       0.69      0.69      0.69       449


SVM accuracy: 0.6503
              precision    recall  f1-score   support

           0       0.63      0.73      0.68       227
           1       0.67      0.57      0.62       222

    accuracy                           0.65       449
   macro avg       0.65      0.65      0.65       449
weighted avg       0.65      0.65      0.65       449


Gradient Boosting accuracy: 0.6147
              precision    recall  f1-score   support

           0       0.62      0.62      0.62       227
           1       0.61      0.61      0.61       222

    accuracy                           0.61       449
   macro avg       0.61      0.61      

In [8]:
from joblib import dump

# Save the trained Random Forest model
dump(random_forest, 'random_forest_model.joblib')

['random_forest_model.joblib']

In [9]:
from sklearn.metrics import confusion_matrix
from joblib import load

# Load the saved Random Forest model
rf_model = load('random_forest_model.joblib')

# Use the model to predict labels for your test data
rf_predictions = rf_model.predict(X_test)

conf_matrix = confusion_matrix(y_test, rf_predictions)

print("Confusion Matrix:")
print(conf_matrix)

# Calculate the confusion matrix
tn, fp, fn, tp = confusion_matrix(y_test, rf_predictions).ravel()

# Calculate sensitivity and specificity
sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print("Random Forest predictions:")
print(rf_predictions)
print("Sensitivity:", sensitivity)
print("Specificity:", specificity)


Confusion Matrix:
[[170  57]
 [ 81 141]]
Random Forest predictions:
[1 0 0 1 1 0 1 1 0 0 0 0 1 0 0 0 0 1 0 1 1 0 0 1 1 1 0 1 0 0 0 0 1 0 1 1 1
 0 1 1 0 0 0 0 1 0 1 1 1 0 1 0 0 1 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 1 0 1 1 1
 0 0 1 1 1 0 1 0 0 1 1 1 0 1 0 0 0 0 1 1 1 0 0 0 1 0 1 1 0 1 0 1 0 1 0 0 1
 0 0 0 0 1 1 0 1 0 0 0 1 0 1 0 0 1 0 1 0 1 0 1 1 0 1 0 1 0 0 0 1 0 1 0 0 0
 0 1 1 1 0 0 1 1 1 0 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 0 0 0 0 0 1 1 0 0 0
 1 1 0 1 1 0 0 1 0 0 1 0 0 0 0 0 0 0 1 0 1 1 0 1 1 1 0 0 0 0 0 1 0 1 0 1 0
 0 1 1 0 0 0 1 0 1 1 1 0 0 1 0 1 1 0 1 1 1 1 1 1 0 1 0 1 0 1 1 1 1 0 1 1 1
 0 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 1 0 0 1 0 1 1 1 0 0 1 0 1 1 0 0 0 1 0
 0 1 1 1 0 0 0 0 1 1 1 1 0 1 0 1 0 1 0 1 1 0 1 1 0 1 1 1 0 1 1 0 1 1 1 1 0
 0 1 0 0 1 1 0 0 1 0 0 1 1 0 1 1 1 0 1 0 0 0 0 0 0 0 0 1 1 1 1 1 0 0 0 1 0
 0 1 0 0 0 0 0 1 0 1 0 0 0 1 0 0 0 1 0 1 1 0 1 0 0 0 1 0 1 1 0 0 1 1 0 0 0
 1 0 0 0 0 0 1 0 1 0 1 0 1 1 0 0 1 0 1 0 0 0 0 1 1 0 1 1 1 0 0 1 1 0 1 0 1
 0 0 0 0 1]
Sensitivity: 0.63513