In [1]:
import pandas as pd
import numpy as np
import os
raw_data = pd.read_csv('data.csv')
print('Subjects:{}, Features:{}'.format(raw_data.shape[0], raw_data.shape[1]))

Subjects:102, Features:23


In [2]:
X_data = raw_data.drop(['No', 'PAgroup01vs23', 'MTAgroup01vs23', 'GCAgroup12'], axis=1)
y1_data = raw_data[['PAgroup01vs23']]
y2_data = raw_data[['MTAgroup01vs23']]
y3_data = raw_data[['GCAgroup12']]

print(X_data.shape, y1_data.shape, y2_data.shape, y3_data.shape)

(102, 19) (102, 1) (102, 1) (102, 1)


In [3]:
# one-hot
categorical_columns = ['Sarcopenia', 'Gender', 'Smoke01', 'Alcohol01', 'CCI', 'Polypharmacy', 'VisualImp', 'HearImp', 'MRI_GCA', 'MRI_MTA', 'MRI_ParietalAtrophy']
X_data_one_hot = pd.get_dummies(X_data, columns=categorical_columns)
X_data_one_hot.head()

Unnamed: 0,Age,EduYear,BMI,MMSE,GDS,MNA,ADL,IADL,Sarcopenia_0,Sarcopenia_1,...,MRI_GCA_1,MRI_GCA_2,MRI_MTA_0,MRI_MTA_1,MRI_MTA_2,MRI_MTA_3,MRI_ParietalAtrophy_0,MRI_ParietalAtrophy_1,MRI_ParietalAtrophy_2,MRI_ParietalAtrophy_3
0,69,12.0,29.2,30,0,14.0,100,6,1,0,...,1,0,0,0,1,0,0,1,0,0
1,67,6.0,33.8,30,1,14.0,100,7,1,0,...,0,0,0,1,0,0,1,0,0,0
2,77,12.0,23.6,30,0,14.0,100,7,1,0,...,1,0,0,1,0,0,0,1,0,0
3,86,12.0,24.4,30,0,14.0,90,6,1,0,...,0,1,0,1,0,0,0,1,0,0
4,77,16.0,26.2,30,0,14.0,95,5,1,0,...,0,1,0,1,0,0,0,1,0,0


In [4]:
from sklearn.svm import SVC
from sklearn.model_selection import KFold
from sklearn.metrics import auc, roc_curve
from sklearn import preprocessing

In [5]:
svm_all_auroc = []
print('PAgroup01vs23 SVM--')
for train_index, test_index in KFold(n_splits=3, random_state=42, shuffle=True).split(X_data_one_hot):
    X_train, X_test = X_data.iloc[train_index], X_data.iloc[test_index]
    y_train, y_test = y1_data.iloc[train_index], y1_data.iloc[test_index]
    # scaling
    scaler = preprocessing.MinMaxScaler()
    #scaler = preprocessing.StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    # model
    model = SVC(kernel='linear', probability=True)
    model.fit(X_train, y_train.values.ravel())
    y_pred = model.predict_proba(X_test)
    fpr, tpr, thresholds = roc_curve(y_test, y_pred[:, 1])
    auroc = auc(fpr, tpr)
    print('auc', auroc)
    svm_all_auroc.append(auroc)
print('auc_mean: %.2f, auc_std: %.2f' %(np.mean(svm_all_auroc), np.std(svm_all_auroc)))

PAgroup01vs23 SVM--
auc 0.9249999999999999
auc 1.0
auc 0.951923076923077
auc_mean: 0.96, auc_std: 0.03


In [6]:
svm_all_auroc = []
print('MTAgroup01vs23 SVM--')
for train_index, test_index in KFold(n_splits=3, random_state=42, shuffle=True).split(X_data_one_hot):
    X_train, X_test = X_data.iloc[train_index], X_data.iloc[test_index]
    y_train, y_test = y2_data.iloc[train_index], y2_data.iloc[test_index]
    # scaling
    scaler = preprocessing.MinMaxScaler()
    #scaler = preprocessing.StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    # model
    model = SVC(kernel='linear', probability=True)
    model.fit(X_train, y_train.values.ravel())
    y_pred = model.predict_proba(X_test)
    fpr, tpr, thresholds = roc_curve(y_test, y_pred[:, 1])
    auroc = auc(fpr, tpr)
    print('auc', auroc)
    svm_all_auroc.append(auroc)
print('auc_mean: %.2f, auc_std: %.2f' %(np.mean(svm_all_auroc), np.std(svm_all_auroc)))

MTAgroup01vs23 SVM--
auc 1.0
auc 1.0
auc 1.0
auc_mean: 1.00, auc_std: 0.00


In [7]:
svm_all_auroc = []
print('GCAgroup12 SVM--')
for train_index, test_index in KFold(n_splits=3, random_state=42, shuffle=True).split(X_data_one_hot):
    X_train, X_test = X_data.iloc[train_index], X_data.iloc[test_index]
    y_train, y_test = y3_data.iloc[train_index], y3_data.iloc[test_index]
    # scaling
    scaler = preprocessing.MinMaxScaler()
    #scaler = preprocessing.StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    # model
    model = SVC(kernel='linear', probability=True)
    model.fit(X_train, y_train.values.ravel())
    y_pred = model.predict_proba(X_test)
    fpr, tpr, thresholds = roc_curve(y_test, y_pred[:, 1])
    auroc = auc(fpr, tpr)
    print('auc', auroc)
    svm_all_auroc.append(auroc)
print('auc_mean: %.2f, auc_std: %.2f' %(np.mean(svm_all_auroc), np.std(svm_all_auroc)))

GCAgroup12 SVM--
auc 1.0
auc 1.0
auc 1.0
auc_mean: 1.00, auc_std: 0.00
