In [1]:
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from joblib import Parallel, delayed
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import StratifiedKFold, train_test_split

## Male

In [2]:
%cd Z:\PPMI_Data\Excels\NonMotors\The251\Male
NonMot1 = pd.read_csv('Feats45_CategAge_APPRDX_Male_Edit.csv')
print(NonMot1.shape)

NonMot2 = NonMot1.drop('PATNO', axis=1)
NonMot2['APPRDX'] = pd.Categorical(NonMot2['APPRDX'])
print(NonMot2['APPRDX'].value_counts())

output = pd.DataFrame()

NUM_CORES = 4
NUM_FOLDS = 10
NUM_REPEATS = 1
NUM_SAMPLES = 50
NUM_RUNS = 100

def run_model(col_index):
    print(NonMot2.columns[col_index])
    acc_list = []
    for j in range(NUM_RUNS):
        print(col_index)
        print(j)
        df = NonMot2.groupby('APPRDX').apply(lambda x: x.sample(NUM_SAMPLES)).reset_index(drop=True)
        df = df.iloc[:, [0, col_index]]
        X = df.iloc[:, 1:].values
        y = df.iloc[:, 0].values
        skf = StratifiedKFold(n_splits=NUM_FOLDS, shuffle=True, random_state=j)
        for train_index, test_index in skf.split(X, y):
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]
            svc = SVC(kernel='linear')
            svc.fit(X_train, y_train)
            pred = svc.predict(X_test)
            res = confusion_matrix(y_test, pred)
            print(res)
            acc = np.sum(np.diag(res)) / np.sum(res)
            acc_list.append(acc)
    return acc_list

results = Parallel(n_jobs=NUM_CORES)(
    delayed(run_model)(col_index) for col_index in range(1, NonMot2.shape[1])
)

output = pd.DataFrame(np.array(results).T)
output.columns = NonMot2.columns[1:]
output.to_csv('NonMot_Indi45Feats_Male_100svmL.csv', index=False)

Z:\PPMI_Data\Excels\NonMotors\The251\Male
(167, 47)
1    111
2     56
Name: APPRDX, dtype: int64


## Female

In [3]:
%cd Z:\PPMI_Data\Excels\NonMotors\The251\Female
NonMot1 = pd.read_csv('Feats45_CategAge_APPRDX_Female_Edit.csv')
print(NonMot1.shape)

NonMot2 = NonMot1.drop('PATNO', axis=1)
NonMot2['APPRDX'] = pd.Categorical(NonMot2['APPRDX'])
print(NonMot2['APPRDX'].value_counts())

output = pd.DataFrame()

NUM_CORES = 4
NUM_FOLDS = 10
NUM_REPEATS = 1
NUM_SAMPLES = 30
NUM_RUNS = 100

def run_model(col_index):
    print(NonMot2.columns[col_index])
    acc_list = []
    for j in range(NUM_RUNS):
        print(col_index)
        print(j)
        df = NonMot2.groupby('APPRDX').apply(lambda x: x.sample(NUM_SAMPLES)).reset_index(drop=True)
        df = df.iloc[:, [0, col_index]]
        X = df.iloc[:, 1:].values
        y = df.iloc[:, 0].values
        skf = StratifiedKFold(n_splits=NUM_FOLDS, shuffle=True, random_state=j)
        for train_index, test_index in skf.split(X, y):
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]
            svc = SVC(kernel='linear')
            svc.fit(X_train, y_train)
            pred = svc.predict(X_test)
            res = confusion_matrix(y_test, pred)
            print(res)
            acc = np.sum(np.diag(res)) / np.sum(res)
            acc_list.append(acc)
    return acc_list

results = Parallel(n_jobs=NUM_CORES)(
    delayed(run_model)(col_index) for col_index in range(1, NonMot2.shape[1])
)

output = pd.DataFrame(np.array(results).T)
output.columns = NonMot2.columns[1:]
output.to_csv('NonMot_Indi45Feats_Female_100svmL.csv', index=False)

Z:\PPMI_Data\Excels\NonMotors\The251\Female
(84, 47)
1    54
2    30
Name: APPRDX, dtype: int64


## Combined

In [4]:
%cd Z:\PPMI_Data\Excels\NonMotors\The251
NonMot2 = pd.read_csv('Feats45_Categ.csv')
print(NonMot2.shape)
NonMot2['APPRDX'] = pd.Categorical(NonMot2['APPRDX'])
print(NonMot2['APPRDX'].value_counts())

output = pd.DataFrame()

NUM_CORES = 4
NUM_FOLDS = 10
NUM_REPEATS = 1
NUM_SAMPLES = 80
NUM_RUNS = 100

def run_model(col_index):
    print(NonMot2.columns[col_index])
    acc_list = []
    for j in range(NUM_RUNS):
        print(col_index)
        print(j)
        df = NonMot2.groupby('APPRDX').apply(lambda x: x.sample(NUM_SAMPLES)).reset_index(drop=True)
        df = df.iloc[:, [0, col_index]]
        X = df.iloc[:, 1:].values
        y = df.iloc[:, 0].values
        skf = StratifiedKFold(n_splits=NUM_FOLDS, shuffle=True, random_state=j)
        for train_index, test_index in skf.split(X, y):
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]
            svc = SVC(kernel='linear')
            svc.fit(X_train, y_train)
            pred = svc.predict(X_test)
            res = confusion_matrix(y_test, pred)
            print(res)
            acc = np.sum(np.diag(res)) / np.sum(res)
            acc_list.append(acc)
    return acc_list

results = Parallel(n_jobs=NUM_CORES)(
    delayed(run_model)(col_index) for col_index in range(1, NonMot2.shape[1])
)

output = pd.DataFrame(np.array(results).T)
output.columns = NonMot2.columns[1:]
output.to_csv('NonMot_Indi45Feats_Combined_100svmL.csv', index=False)

Z:\PPMI_Data\Excels\NonMotors\The251
(251, 46)
1    165
2     86
Name: APPRDX, dtype: int64
