# Import Data

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Perceptron
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Read in csv
data = pd.read_csv('data/diabetic_data_formatted.csv')

# Select target column to predict
X = data.drop(columns=['readmitted'])
y = data['readmitted']

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
# Standardize data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Ensure data is NumPy array
X_train = np.array(X_train)
X_test = np.array(X_test)

In [None]:
# Define models
models = {
    "Decision Tree": DecisionTreeClassifier(),
    "Perceptron": Perceptron(),
    "SVM": SVC(),
    "Random Forest": RandomForestClassifier(n_jobs=-1),  # Use all CPU cores
    "KNN (k=3)": KNeighborsClassifier(n_neighbors=3, n_jobs=-1),  # Use all CPU cores
    "Naive Bayes": GaussianNB(),
    "Neural Network": MLPClassifier(hidden_layer_sizes=(48, 1), max_iter=100)
}

# Collect results
results = []

In [None]:
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro', zero_division=0)
    recall = recall_score(y_test, y_pred, average='macro', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='macro', zero_division=0)
    results.append({"Model": name, "Accuracy": accuracy, "Precision": precision, "Recall": recall, "F1 Score": f1})

In [None]:
# Convert results to DataFrame
results_df = pd.DataFrame(results)

# Sort by Accuracy
results_df = results_df.sort_values(by="Accuracy", ascending=False).reset_index(drop=True)

# Display table
print(results_df)