# Example usage of spectraxai for modeling

Using various pre-processing techniques, models and splitting methods

## Load the example dataset

In [1]:
from spectraxai.utils.datasets import load_GR_SSL

dataset = load_GR_SSL()

## Kennard-Stone split and PLS

In [2]:
from spectraxai.dataset import DatasetSplit

idx_trn, idx_tst = dataset.train_test_split(DatasetSplit.KENNARD_STONE, 0.8)

In [3]:
from spectraxai.models import Model, StandardModel
from spectraxai.utils.modelAssessment import metrics 

pls = StandardModel(Model.PLS)

datasetTrn, datasetTst = dataset.subset(idx_trn), dataset.subset(idx_tst)
pls.train(datasetTrn)
y_hat = pls.predict(datasetTst.X)
metrics(datasetTst.Y, y_hat)

{'N': 186,
 'RMSE': 0.34581057570112184,
 'R2': 0.6369625596758557,
 'RPIQ': 2.530286987972998}

**Note:** The above may also be simplified with a single call as follows:

In [4]:
pls.train_and_test(dataset, idx_trn=idx_trn)

Unnamed: 0,N,RMSE,R2,RPIQ,output,pre_process,val_score,n_components,feature_importance,training_time,testing_time
0,186,0.345811,0.636963,2.530287,OM,NONE,0.503412,16,"[2.233024461034403, 1.987569471029435, 1.50969...",2.771639,0.001545


## cross-validation split and RF with ABS + SG1 transformation

In [5]:
from spectraxai.spectra import SpectralPreprocessing

preprocess = [
    SpectralPreprocessing.ABS,
    (SpectralPreprocessing.SG1, {"window_length": 7, "polyorder": 3})
]

idx_trn, idx_tst = dataset.train_test_split(DatasetSplit.CROSS_VALIDATION, 5)

rf = StandardModel(Model.RF)
rf.train_and_test(dataset, preprocess=preprocess, idx_trn=idx_trn)

Unnamed: 0,N,RMSE,R2,RPIQ,output,pre_process,val_score,max_features,n_estimators,feature_importance,training_time,testing_time,fold
0,186,0.432159,0.507759,1.897452,OM,"[absorbance, (SG1, {'window_length': 7, 'polyo...",0.464802,sqrt,200,"[0.007700629669913299, 0.006485563949339953, 0...",7.75463,0.01658,1
1,186,0.437689,0.566882,2.11337,OM,"[absorbance, (SG1, {'window_length': 7, 'polyo...",0.423656,auto,100,"[0.013409912672599802, 0.0075977795861049335, ...",9.440188,0.008296,2
2,186,0.392649,0.523382,2.056544,OM,"[absorbance, (SG1, {'window_length': 7, 'polyo...",0.460621,auto,150,"[0.009747723959952976, 0.008092263878640345, 0...",10.798451,0.013494,3
3,185,0.451114,0.514809,1.817721,OM,"[absorbance, (SG1, {'window_length': 7, 'polyo...",0.459935,auto,150,"[0.004701609995050472, 0.006896858337682953, 0...",10.655174,0.013249,4
4,185,0.453402,0.495644,1.808549,OM,"[absorbance, (SG1, {'window_length': 7, 'polyo...",0.470267,auto,200,"[0.010217268478131801, 0.009672352221399625, 0...",12.307717,0.025178,5


## cross-validation split and SVR with different pre-treatments and custom grid search

In [6]:
import numpy as np

from spectraxai.spectra import SpectralPreprocessing


preprocesses = [
    [
        SpectralPreprocessing.ABS,
        (SpectralPreprocessing.SG1, {"window_length": 7, "polyorder": 3})
    ],
    [
        SpectralPreprocessing.ABS,
        SpectralPreprocessing.CR
    ],
    SpectralPreprocessing.SNV
]

idx_trn, idx_tst = dataset.train_test_split(DatasetSplit.CROSS_VALIDATION, 5)

svr_params = {
    "epsilon": [0.01, 0.05, 0.1],
    "C": np.logspace(start=-1, stop=7, base=2, num=9),
}

svr = StandardModel(Model.SVR, grid_search_hyperparameters=svr_params)
svr.train_and_test_multiple(dataset, preprocesses=preprocesses, idx_trn=idx_trn)

Unnamed: 0,N,RMSE,R2,RPIQ,output,pre_process,val_score,C,epsilon,gamma,SVs,training_time,testing_time,fold
0,186,0.393514,0.538534,2.231178,OM,"[absorbance, (SG1, {'window_length': 7, 'polyo...",0.573819,8.0,0.05,0.001961,639,4.536349,0.010542,1
1,186,0.426586,0.641631,2.109773,OM,"[absorbance, (SG1, {'window_length': 7, 'polyo...",0.543819,4.0,0.1,0.001961,558,4.36853,0.01444,2
2,186,0.38263,0.583306,2.09079,OM,"[absorbance, (SG1, {'window_length': 7, 'polyo...",0.552775,16.0,0.1,0.001961,538,4.35745,0.013733,3
3,185,0.380157,0.593238,2.157006,OM,"[absorbance, (SG1, {'window_length': 7, 'polyo...",0.533105,8.0,0.1,0.001961,558,4.279142,0.013352,4
4,185,0.393886,0.616743,2.000581,OM,"[absorbance, (SG1, {'window_length': 7, 'polyo...",0.518508,8.0,0.1,0.001961,554,4.414216,0.009838,5
5,186,0.400577,0.521821,2.19184,OM,"[absorbance, continuum-removal]",0.490957,16.0,0.1,0.001961,564,4.538019,0.009006,1
6,186,0.43888,0.620677,2.050673,OM,"[absorbance, continuum-removal]",0.484783,16.0,0.1,0.001961,563,4.466737,0.016482,2
7,186,0.398393,0.548268,2.00807,OM,"[absorbance, continuum-removal]",0.506791,8.0,0.1,0.001961,571,4.494121,0.009841,3
8,185,0.417299,0.509872,1.96502,OM,"[absorbance, continuum-removal]",0.485083,4.0,0.01,0.001961,731,4.492683,0.013339,4
9,185,0.463719,0.468797,1.699304,OM,"[absorbance, continuum-removal]",0.466795,4.0,0.1,0.017203,557,4.35521,0.008806,5
