In [2]:
# imports
import os
import pandas as pd
import numpy as np
from sklearn.svm import OneClassSVM
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

In [3]:
# Load cleaned data from eda step
file_path = "../data/cleaned_diabetes_health_indicators_dataset.csv"
df = pd.read_csv(file_path)

In [4]:
# Separate features and target
X = df.drop("Diabetes_012", axis=1)
y = df["Diabetes_012"]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
# Train on each class separately
mask_0 = (y_train == 0)
X_train_0 = X_train[mask_0]

mask_1 = (y_train == 1)
X_train_1 = X_train[mask_1]

mask_2 = (y_train == 2)
X_train_2 = X_train[mask_2]

# Create and fit models
ocsvm_0 = OneClassSVM(kernel='rbf', nu=0.05, gamma='scale')
ocsvm_0.fit(X_train_0)

ocsvm_1 = OneClassSVM(kernel='rbf', nu=0.05, gamma='scale')
ocsvm_1.fit(X_train_1)

ocsvm_2 = OneClassSVM(kernel='rbf', nu=0.05, gamma='scale')
ocsvm_2.fit(X_train_2)

# Predict Using All Three Models
predictions = []

for idx in range(X_test.shape[0]):
    sample = X_test.iloc[idx:idx+1]

    # Compute decision function scores
    score_0 = ocsvm_0.decision_function(sample)[0]
    score_1 = ocsvm_1.decision_function(sample)[0]
    score_2 = ocsvm_2.decision_function(sample)[0]

    scores = [score_0, score_1, score_2]

    # Predict the class with the highest score
    predicted_class = np.argmax(scores)
    predictions.append(predicted_class)

# Convert predictions to array
predictions = np.array(predictions)

# Multi-Class One-Class SVM Classification Report
print("\nOne-Class SVM Multi-Class Classification Report")
print(classification_report(y_test, predictions))

KeyboardInterrupt: 

# Save final prediction data to results directroy to be used in ensemble voting

In [None]:
# Make results directory, if it's doesn't exist
os.makedirs("results", exist_ok=True)
# Save prediction results to be later used in the ensemble voting
np.save("results/y_pred_ocsvm.npy", predictions)
# Save test data to be later used in the ensemble voting
np.save("results/y_test.npy", y_test.to_numpy())