# Example usage of spectraxai for modeling

Using various pre-processing techniques, models and splitting methods

## Load the example dataset

In [1]:
from spectraxai.utils.datasets import load_GR_SSL

dataset = load_GR_SSL()

## Kennard-Stone split and PLS

In [2]:
from spectraxai.dataset import DatasetSplit

idx_trn, idx_tst = dataset.train_test_split(DatasetSplit.KENNARD_STONE, 0.8)

In [3]:
from spectraxai.models import Model, StandardModel
from spectraxai.utils.modelAssessment import metrics 

pls = StandardModel(Model.PLS)

datasetTrn, datasetTst = dataset.subset(idx_trn), dataset.subset(idx_tst)
pls.train(datasetTrn)
y_hat = pls.predict(datasetTst.X)
metrics(datasetTst.Y, y_hat)

{'N': 186,
 'RMSE': 0.34581057570112184,
 'R2': 0.6369625596758557,
 'RPIQ': 2.530286987972998}

The above may also be simplified with a single call as follows:

In [4]:
pls.train_and_test(dataset, idx_trn=idx_trn)

[{'N': 186,
  'RMSE': 0.34581057570112184,
  'R2': 0.6369625596758557,
  'RPIQ': 2.530286987972998,
  'output': 'OM',
  'pre_process': 'NONE',
  'val_score': 0.5034115401574552,
  'n_components': 16,
  'VIP': array([2.23302446, 1.98756947, 1.50969351, 1.38541485, 1.12457779,
         1.01419668, 1.08503902, 1.12963256, 1.09785367, 1.13378569,
         1.2411618 , 1.21476178, 1.12122645, 1.06099858, 1.01466122,
         0.98135311, 0.95945116, 0.95323209, 0.96152758, 0.96352632,
         0.95497833, 0.94333931, 0.92493532, 0.89995177, 0.878851  ,
         0.87293487, 0.8767203 , 0.8949788 , 0.91995059, 0.94147415,
         0.95905921, 0.96700661, 0.97378277, 0.9793178 , 0.98157249,
         0.98062836, 0.97814087, 0.97469789, 0.98118893, 0.96814409,
         0.94978551, 0.95438756, 0.96378746, 0.9692439 , 0.96705101,
         0.95800089, 0.94609594, 0.91610968, 0.88225574, 0.85841647,
         0.8749628 , 0.89127196, 1.17904536, 1.35996198, 1.02302815,
         0.92200152, 0.88749179, 0

## cross-validation split and RF with ABS + SG1 transformation

In [5]:
from spectraxai.spectra import SpectralPreprocessing

treatments = [
    [
        SpectralPreprocessing.ABS,
        (SpectralPreprocessing.SG1, {"window_length": 7, "polyorder": 3}),
    ]
]

idx_trn, idx_tst = dataset.train_test_split(DatasetSplit.CROSS_VALIDATION, 5)

rf = StandardModel(Model.RF)
rf.train_and_test_with_sequence(dataset, treatments, idx_trn)

Unnamed: 0,pre_process,training_time,testing_time,val_score,N,RMSE,R2,RPIQ,fold,max_features,n_estimators,feature_importance
0,"[absorbance, (SG1, {'window_length': 7, 'polyo...",6.901191,0.012831,0.449156,186,0.472051,0.504034,1.821836,1,sqrt,150,"[0.007311236321289507, 0.005922101668542232, 0..."
1,"[absorbance, (SG1, {'window_length': 7, 'polyo...",10.580489,0.012789,0.480443,186,0.397997,0.494489,2.010064,2,auto,150,"[0.006811354102976155, 0.010927377124538515, 0..."
2,"[absorbance, (SG1, {'window_length': 7, 'polyo...",9.157084,0.008301,0.469675,186,0.462718,0.501521,1.76133,3,auto,100,"[0.008446335331240642, 0.00812063919006671, 0...."
3,"[absorbance, (SG1, {'window_length': 7, 'polyo...",7.251591,0.01893,0.431118,185,0.376516,0.586008,2.177861,4,sqrt,200,"[0.006030150303970275, 0.005241892080834433, 0..."
4,"[absorbance, (SG1, {'window_length': 7, 'polyo...",6.660348,0.004572,0.48976,185,0.481364,0.46446,2.07743,5,sqrt,50,"[0.003477779676180579, 0.004854142280427052, 0..."
