In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from pathlib import Path
import os
import pickle


In [15]:
# Load the dataset
data = pd.read_csv("../csv/data.csv")
data.columns = data.columns.str.strip()
data

Unnamed: 0,heartRate,oxygenSaturation,temperature,systolicBloodPressure,diastolicBloodPressure,OUTPUT
0,110,97.0,41.0,124,93,Abnormal
1,125,100.0,29.0,112,71,Normal
2,88,99.0,43.0,161,99,Abnormal
3,69,97.0,38.0,124,91,Abnormal
4,82,93.0,41.0,147,115,Abnormal
...,...,...,...,...,...,...
25488,78,93.0,29.0,116,71,Normal
25489,80,93.0,42.0,121,88,Abnormal
25490,97,98.0,33.0,166,118,Abnormal
25491,61,100.0,39.0,157,96,Abnormal


In [16]:
# Separate features and target variable
X = data.drop(['OUTPUT', 'systolicBloodPressure', 'diastolicBloodPressure'], axis=1)
y = data['OUTPUT']

In [17]:
# Perform label encoding on the target variable
le = LabelEncoder()
y = le.fit_transform(y)

In [18]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [19]:
# Define a list of classifiers to evaluate
classifiers = [
    LogisticRegression(max_iter=1000),
    DecisionTreeClassifier(),
    RandomForestClassifier(),
    SVC()
]


In [20]:
# Evaluate each classifier using cross-validation
for classifier in classifiers:
    scores = cross_val_score(classifier, X_train, y_train, cv=4)
    accuracy = scores.mean()
    print(f"{classifier.__class__.__name__} Accuracy: {accuracy}")
    
    # Train the classifier on the full training set
    classifier.fit(X_train, y_train)

    # Make predictions on the test set
    y_pred = classifier.predict(X_test)

    # Generate the classification report
    report = classification_report(y_test, y_pred)
    print("Classification Report:")
    print(report)
    print()


LogisticRegression Accuracy: 0.9734724791228613
Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.98      0.98      3920
           1       0.95      0.94      0.95      1179

    accuracy                           0.97      5099
   macro avg       0.97      0.96      0.96      5099
weighted avg       0.97      0.97      0.97      5099


DecisionTreeClassifier Accuracy: 0.9939687133170444
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      3920
           1       0.99      0.99      0.99      1179

    accuracy                           0.99      5099
   macro avg       0.99      0.99      0.99      5099
weighted avg       0.99      0.99      0.99      5099


RandomForestClassifier Accuracy: 0.994361033644471
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      3920
           1   

In [21]:
# Select the best classifier based on cross-validation results
best_classifier = classifiers[scores.argmax()]
print(f"Best Classifier: {best_classifier.__class__.__name__}")

Best Classifier: LogisticRegression


In [22]:
# Train the best classifier on the full training set
best_classifier.fit(X_train, y_train)

In [23]:
# Evaluate the best classifier on the test set
test_accuracy = best_classifier.score(X_test, y_test)
print(f"Test Accuracy: {test_accuracy}")

Test Accuracy: 0.9748970386350265


In [24]:
## Test the model in action
model = best_classifier

In [25]:
# Define the data for a single incident
data = {
    'heartRate': [27.80747],
    'oxygenSaturation': [21.93779],
    'temperature': [0.1876749]
}

In [26]:
# Create a DataFrame from the data
X = pd.DataFrame(data)

In [27]:
# Make a prediction on the single incident data
prediction = model.predict(X)

In [28]:
# Decode the predicted label back to its original category
predicted_category = le.inverse_transform(prediction)

In [29]:
predicted_category

array(['Normal'], dtype=object)