In [None]:
# 1. Import Packages/Modules

import numpy as np
import pandas as pd
from beakerx.object import beakerx
from sklearn.model_selection import train_test_split
import cimcb as cb
beakerx.pandas_display_table() # by default display pandas tables as BeakerX interactive tables
print('All packages successfully loaded')

In [None]:
# 2. Load Data and Peak Sheet

home = 'data/'  
file = 'GastricCancer_NMR.xlsx' 
DataTable,PeakTable = cb.utils.load_dataXL(home + file, DataSheet='Data', PeakSheet='Peak') 

In [None]:
# 3. Get X, and Y

# Clean PeakTable
RSD = PeakTable['QC_RSD']   
PercMiss = PeakTable['Perc_missing']  
PeakTableClean = PeakTable[(RSD < 20) & (PercMiss < 10)]   

# Select subset of Data for the PLS-DA model
DataTable2 = DataTable[(DataTable.Class == "GC") | (DataTable.Class == "HE")]

# Create a Binary Y vector for stratifiying the samples
Outcomes = DataTable2['Class']                                  
Y = [1 if outcome == 'GC' else 0 for outcome in Outcomes]         
Y = np.array(Y)                                                

# Extract and scale the metabolite data from the DataTable
peaklist = PeakTableClean['Name']                           
X = DataTable2[peaklist]                                    
Xlog = np.log(X)                                          
Xscale = cb.utils.scale(Xlog, method='auto')              
Xknn = cb.utils.knnimpute(Xscale, k=3)                    


In [None]:
# 4. Optimise model hyperparameters 

# param_dict
param_dict = {'n_components': [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]}

# Initalise
cv = cb.cross_val.kfold(model=cb.model.PCLR,                      
                                X=Xknn,                                 
                                Y=Y,                               
                                param_dict=param_dict,                   
                                folds=10)                                

# Run and plot
cv.run()  
cv.plot(metric='auc')


In [None]:
# 5. Train and evaluate model

model = cb.model.PCLR(n_components=7)
model.train(Xknn, Y)

#model.evaluate(specificity=0.8)  
model.evaluate(cutoffscore=0.5) 


In [None]:
# 6. Save tables to excel

# CV full / cv 
table = pd.DataFrame(cv.table)
writer = pd.ExcelWriter("pclr_cv.xlsx")
table.to_excel(writer, index=False)
writer.save()

# Evaluate
table = pd.DataFrame(model.table)
writer = pd.ExcelWriter("pclr_eval.xlsx")
table.to_excel(writer, index=False)
writer.save()