# Classifiers using neuro-imaging data

In [1]:
import pandas as pd
import numpy as np
from numpy import random, arange
import seaborn as sns

from sklearn.model_selection import train_test_split, KFold, cross_val_score, GridSearchCV
from sklearn.metrics import balanced_accuracy_score
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler, StandardScaler, OrdinalEncoder, PolynomialFeatures, LabelEncoder

# Import relevant sklearn classes related to machine learning models
from sklearn.linear_model import LinearRegression, LogisticRegression, Lasso, ElasticNet
from sklearn.svm import SVC, SVR, NuSVC
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, BaggingClassifier
from sklearn.neural_network import MLPClassifier

# Import relevant sklearn class/function related to evaluation
import sklearn.metrics
from imblearn.under_sampling import RandomUnderSampler

#Smote oversampling 
import smote_variants as sv

## Data Pre-Processing

In [2]:
data_merge = pd.read_csv("../data/ADNIMERGE.csv", low_memory=False)
data_dx = pd.read_csv("../data/DXSUM_PDXCONV_ADNIALL.csv", low_memory=False)
data_imaging = pd.read_csv("../data/UCSFFSX51_11_08_19.csv", low_memory=False)

In [11]:
dx = pd.concat([data_merge['RID'], data_merge['DX'], data_merge['VISCODE']], axis=1)
img_dx= dx.merge(data_imaging, left_on=['RID', 'VISCODE'], right_on=['RID', 'VISCODE2'])

In [13]:
len(data_imaging)

4896

In [5]:
len(data_dx[pd.notna(data_dx['DXCURREN'])])

3868

In [27]:
img_dx.head(20)

Unnamed: 0,RID,DXCURREN,VISCODE2,COLPROT,VISCODE,EXAMDATE,VERSION,LONISID,LONIUID,IMAGEUID,...,ST147SV,ST148SV,ST149SV,ST150SV,ST151SV,ST152SV,ST153SV,ST154SV,ST155SV,update_stamp
0,1072,2.0,m36,ADNI1,nv,2010-03-18,2012-04-06,25347.0,81261.0,288908,...,227898.0,231372.0,459270.0,221181.0,224841.0,446022.0,174012.0,633282.0,998937.0,2019-11-14 14:34:00.0
1,1072,2.0,m36,ADNI1,nv,2010-03-18,2012-04-06,25347.0,81262.0,288907,...,223158.0,224229.0,447387.0,229883.0,227633.0,457516.0,173016.0,620403.0,995477.0,2019-11-14 14:34:00.0
2,1131,2.0,m36,ADNI1,nv,2010-03-04,2013-03-26,25349.0,81278.0,288910,...,222306.0,223673.0,445979.0,214800.0,215904.0,430704.0,171871.0,617850.0,1073660.0,2019-11-14 14:34:00.0
3,1131,2.0,m36,ADNI1,nv,2010-03-04,2012-10-12,25349.0,81272.0,288909,...,222171.0,220781.0,442952.0,215978.0,217603.0,433581.0,167879.0,610831.0,1010870.0,2019-11-14 14:34:00.0
4,1169,1.0,m36,ADNI1,nv,2010-01-11,2012-04-06,24831.0,79479.0,288906,...,201718.0,197309.0,399027.0,197731.0,192553.0,390284.0,153531.0,552558.0,909055.0,2019-11-14 14:34:00.0
5,1169,1.0,m36,ADNI1,nv,2010-01-11,2012-04-06,24831.0,79478.0,288905,...,199763.0,193112.0,392875.0,199152.0,198304.0,397456.0,154344.0,547219.0,910307.0,2019-11-14 14:34:00.0
6,1241,1.0,m36,ADNI1,nv,2010-02-16,2014-11-28,25074.0,80225.0,288913,...,204352.0,208389.0,412741.0,251972.0,252585.0,504557.0,187479.0,600220.0,1043550.0,2019-11-14 14:34:01.0
7,1241,1.0,m36,ADNI1,nv,2010-02-16,2014-11-28,25074.0,80228.0,288914,...,207035.0,207406.0,414442.0,250586.0,253099.0,503686.0,187459.0,601901.0,1045820.0,2019-11-14 14:34:01.0
8,778,,m48,ADNIGO,nv,2010-11-18,2012-02-10,29764.0,94963.0,223527,...,225154.0,225579.0,450733.0,200936.0,199588.0,400523.0,159098.0,609831.0,954581.0,2019-11-14 14:34:00.0
9,89,,m48,ADNIGO,nv,2010-10-04,2012-02-10,29553.0,94173.0,223348,...,219848.0,223364.0,443212.0,223600.0,224186.0,447786.0,167352.0,610564.0,990983.0,2019-11-14 14:42:59.0
