In [1]:
import pandas as pd

def load_data():
    return pd.read_csv("datasets/ckd.csv")
data = load_data()

In [2]:
columns = list(data.columns)

classes = list(data["CKD_Stage"].unique())
# transform classes from numpy int to string to be printed
multi_classes = [str(c) for c in classes]

binary = list(data["CKD_Progression"].unique())
classes_binary = [str(c) for c in binary]

In [4]:
data.fillna(0, inplace=True)
data.replace({"?": 0}, inplace=True)

In [5]:
# svm with binary classification (CKD_Progression)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix

def svm_classifier(X_train, y_train, X_test, y_test, classes, kernel):
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    svm = SVC(kernel=kernel, random_state=42)
    svm.fit(X_train_scaled, y_train)
    
    y_pred = svm.predict(X_test_scaled)
    
    print("Classification Report:")
    print(classification_report(y_test, y_pred, target_names=classes))
    
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))

X = data.drop(columns=["CKD_Progression"])
y = data["CKD_Progression"]
# split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
svm_classifier(X_train, y_train, X_test, y_test, classes_binary, kernel='linear')

Classification Report:
              precision    recall  f1-score   support

           0       0.84      0.93      0.88       240
           1       0.78      0.60      0.68       102

    accuracy                           0.83       342
   macro avg       0.81      0.76      0.78       342
weighted avg       0.83      0.83      0.82       342

Confusion Matrix:
[[223  17]
 [ 41  61]]


In [None]:
# rbf kernel
svm_classifier(X_train, y_train, X_test, y_test, classes_binary, kernel='rbf')

Classification Report:
              precision    recall  f1-score   support

           0       0.82      0.95      0.88       240
           1       0.81      0.50      0.62       102

    accuracy                           0.82       342
   macro avg       0.81      0.72      0.75       342
weighted avg       0.81      0.82      0.80       342

Confusion Matrix:
[[228  12]
 [ 51  51]]


In [None]:
# multi class classification (CKD_Stage)
X_multi = data.drop(columns=["CKD_Stage"])
# one vs all strategy for multi-class classification (under the hood, SVC handles multi-class classification under the hood)
y_multi = data["CKD_Stage"]
X_train_multi, X_test_multi, y_train_multi, y_test_multi = train_test_split(X_multi, y_multi, test_size=0.3, random_state=42)
svm_classifier(X_train_multi, y_train_multi, X_test_multi, y_test_multi, multi_classes, kernel='linear')

Classification Report:
              precision    recall  f1-score   support

           3       0.96      1.00      0.98        23
           5       0.98      0.99      0.98       139
           2       0.97      0.97      0.97       111
           4       1.00      0.97      0.99        69

    accuracy                           0.98       342
   macro avg       0.98      0.98      0.98       342
weighted avg       0.98      0.98      0.98       342

Confusion Matrix:
[[ 23   0   0   0]
 [  1 137   1   0]
 [  0   3 108   0]
 [  0   0   2  67]]


In [9]:
# multi class with rbf kernel
svm_classifier(X_train_multi, y_train_multi, X_test_multi, y_test_multi, multi_classes, kernel='rbf')

Classification Report:
              precision    recall  f1-score   support

           3       0.96      0.96      0.96        23
           5       0.97      0.96      0.97       139
           2       0.92      0.96      0.94       111
           4       0.98      0.93      0.96        69

    accuracy                           0.96       342
   macro avg       0.96      0.95      0.96       342
weighted avg       0.96      0.96      0.96       342

Confusion Matrix:
[[ 22   1   0   0]
 [  1 134   4   0]
 [  0   3 107   1]
 [  0   0   5  64]]


SVMs foram uma ótima escolha para determinar o estágio da doença