## SVM

----

### 1. Import Modules

In [1]:
import numpy as np
import pandas as pd
import cimcb as cb
from sklearn.model_selection import train_test_split

print('All packages successfully loaded')

Using TensorFlow backend.


All packages successfully loaded


### 2. Load data and peak sheet

In [2]:
home = 'data/' 
file = 'MTBLS161_serum.xlsx' 

DataTable,PeakTable = cb.utils.load_dataXL(home + file, DataSheet='Data', PeakSheet='Peak') 

Loadings PeakFile: Peak
Loadings DataFile: Data
Data Table & Peak Table is suitable.
TOTAL SAMPLES: 59 TOTAL PEAKS: 29
Done!


### 3. Extract X & Y

In [3]:
peaklist = PeakTable['Name']
DataTable2 = DataTable[(DataTable['Class']==0) | (DataTable['Class']==1)]
Y = DataTable2['Class'].values 

# Extract and scale the metabolite data from the DataTable
peaklist = PeakTable['Name']                           
XT = DataTable2[peaklist]                                    
XTlog = np.log(XT)                                          
XTscale = cb.utils.scale(XTlog, method='auto')              
XTknn = cb.utils.knnimpute(XTscale, k=3) 

### 4. Hyperparameters optimisation

In [4]:
# param_dict
# C_range = [2**-5, 2**-3, 2**-1, 2**1, 2**3, 2**5, 2**7, 2**9, 2**11, 2**13, 2**15]
# gamma_range = [2**-15, 2**-13, 2**-11, 2**-9, 2**-7, 2**-5, 2**-3, 2**-1, 2**1, 2**3]
C_range = [2**-5, 2**-3, 2**-1, 2**1, 2**3, 2**5, 2**7, 2**9, 2**11, 2**13, 2**15]
gamma_range = [2**-23, 2**-21, 2**-19, 2**-17, 2**-15, 2**-13, 2**-11, 2**-9, 2**-7, 2**-5, 2**-3]
param_dict = dict(gamma=gamma_range, C=C_range)



# Initalise
cv = cb.cross_val.kfold(model=cb.model.SVM,                      
                                X=XTknn,                                 
                                Y=Y,                               
                                param_dict=param_dict,                   
                                folds=5,
                                n_mc=10)                                

# Run and plot
cv.run()  
cv.plot(metric='auc', ci=95, scale=1.3, color_beta=2, color_scaling='tanh')  # color_scaling='tanh','tanh+1' , 'linear', 'sq', 'sqrt'

Number of cores set to: 8
Running ...


100%|██████████| 1210/1210 [00:12<00:00, 95.30it/s] 


Time taken: 0.22 minutes with 8 cores
Done!


### 6. Get Model and Evaluate

In [5]:
# 6. Train, test and evaluate model

# Train model
model = cb.model.SVM(C=32, gamma=2**-7)
model.train(XTknn, Y)
model.test(XTknn)

# Evaluate 
model.evaluate(cutoffscore=0.5, plot_median=False) 

In [6]:
# Bootstrap evaluate
model.booteval(XTknn, Y, cutoffscore=0.5, bootnum=100) 

100%|██████████| 100/100 [00:00<00:00, 132.11it/s]


### 7. Save table

In [7]:
home = 'tables/'
file = 'SVM_MTBLS161_serum_NH.xlsx'

model.save_table(home + file)

Done! Saved table as tables/SVM_MTBLS161_serum_NH.xlsx
