In [None]:
import pandas as pd
import numpy as np

from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.svm import SVC

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.feature_selection import VarianceThreshold
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline

In [None]:


df_train = pd.read_csv("diabetes_train.csv")
df_test = pd.read_csv("diabetes_test.csv")

target_col = "readmitted"  
X_train_full = df_train.drop(columns=[target_col])
y_train_full = df_train[target_col]

X_test = df_test.drop(columns=[target_col])
y_test = df_test[target_col]

cat_cols = X_train_full.select_dtypes(include=["object", "category"]).columns.tolist()

X_train_full = pd.get_dummies(X_train_full, columns=cat_cols, drop_first=True)
X_test = pd.get_dummies(X_test, columns=cat_cols, drop_first=True)

X_train_full, X_test = X_train_full.align(X_test, join='left', axis=1, fill_value=0)

var_thresh = VarianceThreshold(threshold=0.0)
X_train_full = var_thresh.fit_transform(X_train_full)
X_test = var_thresh.transform(X_test)

X_train, _, y_train, _ = train_test_split(
    X_train_full, y_train_full, 
    train_size=0.7,  
    random_state=42
)

In [None]:

qda_model = QuadraticDiscriminantAnalysis()
qda_model.fit(X_train, y_train)

y_pred_train_qda = qda_model.predict(X_train)
y_pred_test_qda = qda_model.predict(X_test)

print("=== QDA Results ===")
print("Training Accuracy:", accuracy_score(y_train, y_pred_train_qda))
print("Test Accuracy:    ", accuracy_score(y_test, y_pred_test_qda))
print("\nConfusion Matrix (Test):\n", confusion_matrix(y_test, y_pred_test_qda))
print("\nClassification Report (Test):\n", classification_report(y_test, y_pred_test_qda))

In [11]:

svm_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA(n_components=50, random_state=42)),
    ('svm', SVC(kernel='linear', 
                C=1.0, 
                class_weight='balanced', 
                tol=1e-3, 
                max_iter=10000,
                random_state=42))
])

svm_pipeline.fit(X_train, y_train)

y_pred_train_svm = svm_pipeline.predict(X_train)
y_pred_test_svm = svm_pipeline.predict(X_test)

print("\n=== SVM (Linear + PCA + Balanced) Results ===")
print("Training Accuracy:", accuracy_score(y_train, y_pred_train_svm))
print("Test Accuracy:    ", accuracy_score(y_test, y_pred_test_svm))
print("\nConfusion Matrix (Test):\n", confusion_matrix(y_test, y_pred_test_svm))
print("\nClassification Report (Test):\n", classification_report(y_test, y_pred_test_svm))

print("\nQDA Test Accuracy:", accuracy_score(y_test, y_pred_test_qda))
print("SVM Test Accuracy:", accuracy_score(y_test, y_pred_test_svm))



=== QDA Results ===
Training Accuracy: 0.5378915599958373
Test Accuracy:     0.5288278487897743

Confusion Matrix (Test):
 [[    3    10  3240]
 [   18    42 10481]
 [   41    70 15511]]

Classification Report (Test):
               precision    recall  f1-score   support

         <30       0.05      0.00      0.00      3253
         >30       0.34      0.00      0.01     10541
          NO       0.53      0.99      0.69     15622

    accuracy                           0.53     29416
   macro avg       0.31      0.33      0.23     29416
weighted avg       0.41      0.53      0.37     29416






=== SVM (Linear + PCA + Balanced) Results ===
Training Accuracy: 0.5208450411072952
Test Accuracy:     0.5108784335055752

Confusion Matrix (Test):
 [[  125     2  3126]
 [  288     4 10249]
 [  689    34 14899]]

Classification Report (Test):
               precision    recall  f1-score   support

         <30       0.11      0.04      0.06      3253
         >30       0.10      0.00      0.00     10541
          NO       0.53      0.95      0.68     15622

    accuracy                           0.51     29416
   macro avg       0.25      0.33      0.25     29416
weighted avg       0.33      0.51      0.37     29416


QDA Test Accuracy: 0.5288278487897743
SVM Test Accuracy: 0.5108784335055752


## Summarizing findings 

We see that our QDA accuracy is 52.9%, and the SVM accuracy is 51.1%. From this, we can state that QDA is marginally better than SVM, though both models didn't perform as well as intended. This might be due to the imbalanced dataset where very few datapoints were <30, and skewed results. With the current hyperparameter tunings, the model does not predict as well as expected and may not be best suited to this dataset.