In [70]:
import numpy as np
from os import listdir
from os.path import isfile, join
import pandas as pd
import matplotlib.image as mpimg
from PIL import Image
from sklearn.utils import shuffle
from sklearn.model_selection import KFold,train_test_split

from sklearn import svm, grid_search
from sklearn.grid_search import GridSearchCV
from sklearn.svm import NuSVC,SVC

df=pd.read_excel('data/dadesFinal.xlsx')
df.set_index('Códigobiobanco', inplace=True)
df['mortCV']=df['muerte']=='CV death'
df['mortCV'].replace((True, False), (1, 0), inplace=True)

In [71]:
def create_hemodine_dataset(tipus):
    cols_clas_placa=['placas'+tipus,'lipidos'+tipus,'fibrosis'+tipus,'calcio'+tipus,
                     'clasif'+tipus,'EventoCV_Si_No','mortCV']
    DF=df.loc[:,cols_clas_placa]
    DF=DF.dropna()
    print('Number of sample for'+tipus+':', DF.shape[0])
    DF=DF.rename(index=str, columns={'lipidos'+tipus:'lipidos',
                                     'fibrosis'+tipus:'fibrosis',
                                     'calcio'+tipus:'calcio',
                                      'clasif'+tipus:'class'})

    return DF

def to3classes(y):
    for i in range(len(y)):
        k=y[i]
        if (k==0) or (k==1) :
            y[i]=0
        if (k==2) or (k==3) :
            y[i]=1
        if (k==4):
            y[i]=2
    return y
def load_data(df,test_size=0.1): 
    cols=['lipidos','fibrosis','calcio']
    X=df[cols].values
    print("Number of  samples:",X.shape[0])
    y=df['class'].values
    y=to3classes(y-1)
    X, y = shuffle(X, y,random_state=20)
    X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=test_size, random_state=22)
    print("Number of training samples:",X_train.shape[0])
    print("Number of test samples:",X_test.shape[0])
    return X_train, X_test, y_train, y_test

In [207]:
def Nu_svc_param_selection(X, y, nfolds):
    Nus = [0.08,0.09,0.1,0.11,0.12,0.13,.2]
    gammas = [0.1,0.5,1,5,10,100]
    kernels = ['rbf','linear','sigmoid']
    param_grid = {'nu': Nus, 'gamma' : gammas,'kernel':kernels}
    grid_search = GridSearchCV(NuSVC(), param_grid, cv=nfolds)
    grid_search.fit(X, y)
    print(grid_search.best_params_)
    return grid_search.best_params_
def train_Nu_SVM(X_train, X_test, y_train, y_test,nfolds=5):
    best_p_Nu=Nu_svc_param_selection(X_train,y_train,nfolds)
    clf = NuSVC(nu=best_p_Nu['nu'],kernel=best_p_Nu['kernel'],gamma=best_p_Nu['gamma'])
    clf.fit(X_train, y_train) 
    print('Accuracy:',clf.score(X_test,y_test))
    return clf.score(X_test,y_test)

def svc_param_selection(X, y, nfolds):
    Cs = [0.001,0.01,1,10]
    gammas = [0.001,0.001,10,100]
    kernels = ['rbf']
    param_grid = {'C': Cs, 'gamma' : gammas,'kernel':kernels}
    grid_search = GridSearchCV(SVC(), param_grid, cv=nfolds)
    grid_search.fit(X, y)
    print(grid_search.best_params_)
    return grid_search.best_params_
def train_SVM(X_train, X_test, y_train, y_test,nfolds=5):
    best_p=svc_param_selection(X_train,y_train,nfolds)
    clf = SVC(C=best_p['C'],kernel=best_p['kernel'],gamma=best_p['gamma'])
    clf.fit(X_train, y_train) 
    print('Accuracy:',clf.score(X_test,y_test))
    return clf.score(X_test,y_test)

In [209]:
### Region by region

tipus=['_cc_','_fem_com_','_fem_sup_','_med_bif_','_med_car_']

for x in tipus:
    print('\n\n Tipus:',x)
    df_d=create_hemodine_dataset(x+'d')
    df_i=create_hemodine_dataset(x+'i')
    DF=pd.concat([df_d,df_i])
    X_train, X_test, y_train, y_test= load_data(DF)
    Nuacc=train_Nu_SVM(X_train, X_test, y_train, y_test)
    acc=train_SVM(X_train, X_test, y_train, y_test)
    if (Nuacc> acc):
        print('Nu better')
    else:
        print('C better')



 Tipus: _cc_
Number of sample for_cc_d: 153
Number of sample for_cc_i: 220
Number of  samples: 373
Number of training samples: 335
Number of test samples: 38
{'gamma': 0.5, 'kernel': 'rbf', 'nu': 0.1}
Accuracy: 0.605263157895
{'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
Accuracy: 0.526315789474
Nu better


 Tipus: _fem_com_
Number of sample for_fem_com_d: 1081
Number of sample for_fem_com_i: 1007
Number of  samples: 2088
Number of training samples: 1879
Number of test samples: 209
{'gamma': 100, 'kernel': 'rbf', 'nu': 0.09}
Accuracy: 0.511961722488
{'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
Accuracy: 0.55980861244
C better


 Tipus: _fem_sup_
Number of sample for_fem_sup_d: 230
Number of sample for_fem_sup_i: 256
Number of  samples: 486
Number of training samples: 437
Number of test samples: 49
{'gamma': 5, 'kernel': 'rbf', 'nu': 0.13}
Accuracy: 0.530612244898
{'C': 0.001, 'gamma': 0.001, 'kernel': 'rbf'}
Accuracy: 0.489795918367
Nu better


 Tipus: _med_bif_
Number of sample for_med_bif

In [216]:
print('All classes:')
tipus=['_cc_','_fem_com_','_fem_sup_','_med_bif_','_med_car_']

DF = pd.DataFrame(columns=df_d.columns)

for x in tipus:
    df_d=create_hemodine_dataset(x+'d')
    df_i=create_hemodine_dataset(x+'i')
    DF=pd.concat([DF,df_d,df_i])
X_train, X_test, y_train, y_test= load_data(DF)
Nuacc=train_Nu_SVM(X_train, X_test, y_train, y_test)

All classes:
Number of sample for_cc_d: 153
Number of sample for_cc_i: 220
Number of sample for_fem_com_d: 1081
Number of sample for_fem_com_i: 1007
Number of sample for_fem_sup_d: 230
Number of sample for_fem_sup_i: 256
Number of sample for_med_bif_d: 1024
Number of sample for_med_bif_i: 1010
Number of sample for_med_car_d: 497
Number of sample for_med_car_i: 473
Number of  samples: 5951
Number of training samples: 5355
Number of test samples: 596
{'gamma': 5, 'kernel': 'rbf', 'nu': 0.09}
Accuracy: 0.565436241611


In [None]:
acc=train_SVM(X_train, X_test, y_train, y_test)

In [215]:
print('Both femorals:')
tipus=['_fem_com_','_fem_sup_']

DF = pd.DataFrame(columns=df_d.columns)

for x in tipus:
    df_d=create_hemodine_dataset(x+'d')
    df_i=create_hemodine_dataset(x+'i')
    DF=pd.concat([DF,df_d,df_i])
    
X_train, X_test, y_train, y_test= load_data(DF)
Nuacc=train_Nu_SVM(X_train, X_test, y_train, y_test)

Both femorals:
Number of sample for_fem_com_d: 1081
Number of sample for_fem_com_i: 1007
Number of sample for_fem_sup_d: 230
Number of sample for_fem_sup_i: 256
Number of  samples: 2574
Number of training samples: 2316
Number of test samples: 258
{'gamma': 10, 'kernel': 'rbf', 'nu': 0.08}
Accuracy: 0.259689922481


In [217]:
acc=train_SVM(X_train, X_test, y_train, y_test)

{'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
Accuracy: 0.60067114094
