# Example usage of spectraxai for modeling

Using various pre-processing techniques, models and splitting methods

## Load the example dataset

In [1]:
from spectraxai.data import load_GR_SSL

dataset = load_GR_SSL()

## Kennard-Stone split and PLS

In [2]:
from spectraxai.dataset import DatasetSplit

idx_trn, idx_tst = dataset.train_test_split(DatasetSplit.KENNARD_STONE, 0.8)

In [3]:
from spectraxai.models import Model, StandardModel
from spectraxai.utils import metrics 

pls = StandardModel(Model.PLS)

datasetTrn, datasetTst = dataset.subset(idx_trn), dataset.subset(idx_tst)
pls.fit(datasetTrn)
y_hat = pls.predict(datasetTst.X)
metrics(datasetTst.Y, y_hat)

{'N': 186,
 'RMSE': 0.34581057570112184,
 'R2': 0.6369625596758557,
 'RPIQ': 2.530286987972998}

**Note:** The above may also be simplified with a single call as follows:

In [4]:
pls.fit_and_predict(dataset, idx_trn=idx_trn)

Unnamed: 0,N,RMSE,R2,RPIQ,output,pre_process,val_score,n_components,feature_importance,training_time,testing_time
0,186,0.345811,0.636963,2.530287,OM,NONE,0.503412,16,"[2.233024461034403, 1.987569471029435, 1.50969...",2.51393,0.000206


## cross-validation split and RF with ABS + SG1 transformation

In [5]:
from spectraxai.spectra import SpectralPreprocessing

preprocess = [
    SpectralPreprocessing.ABS,
    (SpectralPreprocessing.SG1, {"window_length": 7, "polyorder": 3})
]

idx_trn, idx_tst = dataset.train_test_split(DatasetSplit.CROSS_VALIDATION, 5)

rf = StandardModel(Model.RF)
rf.fit_and_predict(dataset, preprocess=preprocess, idx_trn=idx_trn)

Unnamed: 0,N,RMSE,R2,RPIQ,output,pre_process,val_score,max_features,n_estimators,feature_importance,training_time,testing_time,fold
0,186,0.487339,0.4341,1.768789,OM,"[absorbance, (SG1, {'window_length': 7, 'polyo...",0.470995,auto,50,"[0.005844363464991503, 0.004261582854562004, 0...",7.924296,0.004325,1
1,186,0.456743,0.511963,1.751533,OM,"[absorbance, (SG1, {'window_length': 7, 'polyo...",0.455873,auto,200,"[0.010700246002806245, 0.0095322200132506, 0.0...",11.633584,0.016425,2
2,186,0.432807,0.563093,1.888833,OM,"[absorbance, (SG1, {'window_length': 7, 'polyo...",0.442826,sqrt,200,"[0.00634434360084957, 0.0062412893901028135, 0...",7.119067,0.016195,3
3,185,0.428485,0.564768,1.91372,OM,"[absorbance, (SG1, {'window_length': 7, 'polyo...",0.449976,sqrt,200,"[0.006261095909949627, 0.005587479033160521, 0...",7.410581,0.016027,4
4,185,0.370002,0.485395,2.162151,OM,"[absorbance, (SG1, {'window_length': 7, 'polyo...",0.477234,auto,100,"[0.007401349539180314, 0.010818112668178741, 0...",9.302341,0.008111,5


## cross-validation split and SVR with different pre-treatments and custom grid search

In [6]:
import numpy as np

from spectraxai.spectra import SpectralPreprocessing


preprocesses = [
    [
        SpectralPreprocessing.ABS,
        (SpectralPreprocessing.SG1, {"window_length": 7, "polyorder": 3})
    ],
    [
        SpectralPreprocessing.ABS,
        SpectralPreprocessing.CR
    ],
    SpectralPreprocessing.SNV
]

idx_trn, idx_tst = dataset.train_test_split(DatasetSplit.CROSS_VALIDATION, 5)

svr_params = {
    "epsilon": [0.01, 0.05, 0.1],
    "C": np.logspace(start=-1, stop=7, base=2, num=9),
}

svr = StandardModel(Model.SVR, grid_search_hyperparameters=svr_params)
svr.fit_and_predict_multiple(dataset, preprocesses=preprocesses, idx_trn=idx_trn)

Unnamed: 0,N,RMSE,R2,RPIQ,output,pre_process,val_score,C,epsilon,gamma,SVs,training_time,testing_time,fold
0,186,0.408704,0.63691,2.177615,OM,"[absorbance, (SG1, {'window_length': 7, 'polyo...",0.532137,4.0,0.1,0.001799,569,4.281113,0.009066,1
1,186,0.414634,0.612389,2.188675,OM,"[absorbance, (SG1, {'window_length': 7, 'polyo...",0.55365,16.0,0.05,0.001799,627,4.339564,0.011265,2
2,186,0.394293,0.55692,2.028946,OM,"[absorbance, (SG1, {'window_length': 7, 'polyo...",0.546584,4.0,0.1,0.001799,562,4.253259,0.008926,3
3,185,0.391639,0.587983,2.1193,OM,"[absorbance, (SG1, {'window_length': 7, 'polyo...",0.551687,16.0,0.1,0.001799,552,4.213309,0.008766,4
4,185,0.35678,0.626669,2.298335,OM,"[absorbance, (SG1, {'window_length': 7, 'polyo...",0.542752,16.0,0.1,0.001799,547,4.176434,0.012937,5
5,186,0.438489,0.582061,2.029699,OM,"[absorbance, continuum-removal]",0.467381,4.0,0.01,0.001799,728,4.306008,0.01155,1
6,186,0.444541,0.554458,2.041433,OM,"[absorbance, continuum-removal]",0.500609,4.0,0.1,0.008698,564,4.4567,0.011505,2
7,186,0.441138,0.445386,1.813493,OM,"[absorbance, continuum-removal]",0.49795,16.0,0.1,0.001799,572,4.352398,0.009108,3
8,185,0.427896,0.508164,1.939724,OM,"[absorbance, continuum-removal]",0.485639,32.0,0.1,0.001799,561,4.418959,0.008925,4
9,185,0.375723,0.585973,2.18246,OM,"[absorbance, continuum-removal]",0.471882,4.0,0.01,0.001799,722,4.419363,0.0133,5
