In [1]:
import numpy as np
import os
import time
import matplotlib
import matplotlib.pyplot as plt
import random
import pandas as pd
import warnings 
warnings.filterwarnings("ignore")
from tqdm.notebook import tqdm
cpac_root='/media/dm/0001A094000BF891/Yazid/ABIDEI_CPAC/'
smri_root='/media/dm/0001A094000BF891/Yazid/ABIDEI_sMRI/'
sites=np.load('DATAARRANGE/train_test_site.npy')
train_asd_dict=np.load('DATAARRANGE/train_asd_dict.npy',allow_pickle=True).item()
train_tdc_dict=np.load('DATAARRANGE/train_tdc_dict.npy',allow_pickle=True).item()
test_asd_dict=np.load('DATAARRANGE/test_asd_dict.npy',allow_pickle=True).item()
test_tdc_dict=np.load('DATAARRANGE/test_tdc_dict.npy',allow_pickle=True).item()
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

### DATA

In [2]:
def normalized(X):
    return (X-X.mean())/X.std()
def get_data(cpac_root,smri_root,sites,asd_list,tdc_list):
    x=np.zeros((13804))
    y=np.zeros((1))
    for index in range(len(sites)):
        site=sites[index]
        asdlist=asd_list[index]
        tdclist=tdc_list[index]
        
        slow5_asd=cpac_root+site+'/group1_slow5/'
        slow4_asd=cpac_root+site+'/group1_slow4/'
        voxel_asd=smri_root+site+'/group1/'
        fc_asd   =cpac_root+site+'/group1_FC/'
        for file in asdlist:
            fc_data=np.load(fc_asd+file,allow_pickle=True).flatten()
            s5_data=normalized(np.load(slow5_asd+file,allow_pickle=True))
            s4_data=normalized(np.load(slow4_asd+file,allow_pickle=True))
            vl_data=normalized(np.load(voxel_asd+file,allow_pickle=True))
            data=np.concatenate((fc_data, s5_data), axis=0)
            data=np.concatenate((data,s4_data),axis=0)
            data=np.concatenate((data,vl_data),axis=0)
            x=np.row_stack((x,data))
            y=np.row_stack((y,np.array([1])))
            
        slow5_tdc=cpac_root+site+'/group2_slow5/'
        slow4_tdc=cpac_root+site+'/group2_slow4/'
        voxel_tdc=smri_root+site+'/group2/'
        fc_tdc   =cpac_root+site+'/group2_FC/'
        for file in tdclist:
            fc_data=np.load(fc_tdc+file,allow_pickle=True).flatten()
            s5_data=normalized(np.load(slow5_tdc+file,allow_pickle=True))
            s4_data=normalized(np.load(slow4_tdc+file,allow_pickle=True))
            vl_data=normalized(np.load(voxel_tdc+file,allow_pickle=True))
            data=np.concatenate((fc_data, s5_data), axis=0)
            data=np.concatenate((data,s4_data),axis=0)
            data=np.concatenate((data,vl_data),axis=0)
            x=np.row_stack((x,data))
            y=np.row_stack((y,np.array([2])))
    return x[1:,:],y[1:,:]  

In [3]:
from sklearn import svm
#L_2regularization parameter: 1, RBF kernel and kernel coefficient for RBF used 1 / (n_features * X.var()) as value
acc=0
TN,FP,FN,TP=0,0,0,0
for index in range(10):
    train_x,train_y=get_data(cpac_root,smri_root,sites,train_asd_dict[index],train_tdc_dict[index])
    test_x,test_y=get_data(cpac_root,smri_root,sites,test_asd_dict[index],test_tdc_dict[index])
    clf = svm.SVC()
    clf.fit(train_x,train_y)
    pred_y=clf.predict(test_x)
    acc+=accuracy_score(pred_y,test_y)
    tn, fp, fn, tp = confusion_matrix(test_y,pred_y).ravel()
    TN+=tn
    FP+=fp
    FN+=fn
    TP+=tp
print('SVM\tAcc: '+str(acc/10))
print('TN:%d  |  TP:%d  |  FP:%d  FN:%d'%(TN,TP,FP,FN))

SVM	Acc: 0.6904392721698117
TN:295  |  TP:398  |  FP:186  FN:128


In [4]:
clf

SVC()

In [4]:
from sklearn.ensemble import RandomForestClassifier
#(200 trees, ‘gini’ function to measure the quality of a split)
acc=0
TN,FP,FN,TP=0,0,0,0
for index in range(10):
    train_x,train_y=get_data(cpac_root,smri_root,sites,train_asd_dict[index],train_tdc_dict[index])
    test_x,test_y=get_data(cpac_root,smri_root,sites,test_asd_dict[index],test_tdc_dict[index])
    clf = RandomForestClassifier(n_estimators=200)
    clf.fit(train_x,train_y)
    pred_y=clf.predict(test_x)
    acc+=accuracy_score(pred_y,test_y)
    tn, fp, fn, tp = confusion_matrix(test_y,pred_y).ravel()
    TN+=tn
    FP+=fp
    FN+=fn
    TP+=tp
print('RandomForestClassifier\tAcc: '+str(acc/10))
print('TN:%d  |  TP:%d  |  FP:%d  FN:%d'%(TN,TP,FP,FN))

RandomForestClassifier	Acc: 0.6453366298626709
TN:272  |  TP:376  |  FP:209  FN:150


In [5]:
from sklearn.ensemble import GradientBoostingClassifier
#(the DecisionTreeClassifier as the base estimator, the maximum number of estimators was 200, learning rate was set to 1 and used SAMME.R as the real boosting algorithm)
acc=0
TN,FP,FN,TP=0,0,0,0
for index in range(10):
    train_x,train_y=get_data(cpac_root,smri_root,sites,train_asd_dict[index],train_tdc_dict[index])
    test_x,test_y=get_data(cpac_root,smri_root,sites,test_asd_dict[index],test_tdc_dict[index])
    clf = GradientBoostingClassifier(n_estimators=200) 
    clf.fit(train_x,train_y)
    pred_y=clf.predict(test_x)
    acc+=accuracy_score(pred_y,test_y)
    tn, fp, fn, tp = confusion_matrix(test_y,pred_y).ravel()
    TN+=tn
    FP+=fp
    FN+=fn
    TP+=tp
print('GradientBoostingClassifier\tAcc: '+str(acc/10))
print('TN:%d  |  TP:%d  |  FP:%d  FN:%d'%(TN,TP,FP,FN))

GradientBoostingClassifier	Acc: 0.667999858771603
TN:299  |  TP:372  |  FP:182  FN:154


In [6]:
from sklearn.naive_bayes import MultinomialNB
acc=0
for index in range(10):
    train_x,train_y=get_data(cpac_root,smri_root,sites,train_asd_dict[index],train_tdc_dict[index])
    test_x,test_y=get_data(cpac_root,smri_root,sites,test_asd_dict[index],test_tdc_dict[index])
    clf = MultinomialNB(alpha=0.01)
    clf.fit(train_x,train_y)
    pred_y=clf.predict(test_x)
    acc+=accuracy_score(pred_y,test_y) 
print('MultinomialNB\tAcc: '+str(acc/10))

ValueError: Negative values in data passed to MultinomialNB (input X)