# Compare performance
Let's check if this newly trained model (command line base) is relevant to the previous one I trained in an unprofessional way, i.e.redundant scripts that are not efficient for experiments.

In [1]:
import torch
from src.datas.dataloader import get_dataloader
from src.datas.transforms import InstanceNorm
from src.eval.eval import evaluate_base, evaluate
from src.models.mae_vit import mae_vit_base_patch16


In [2]:
dataloader = get_dataloader(annotations_file='data/pretrain/train/info.csv', input_dir='data/pretrain/train', 
                                batch_size=256, transform=InstanceNorm(), ispretrain=True)

# for calculating R2 score
mse_base= evaluate_base(dataloader['val'])
print(f'MSE: {round(mse_base, 5)}')

MSE: 0.99951


In [3]:
# Our command line pre-trained model
model = mae_vit_base_patch16().to('cuda')
model.load_state_dict(torch.load('results/pretrain_test_20240609/model.ckpt'))
for _ in range(5):
    print(_)        
    mse_model = evaluate(model, dataloader['val'])
    print(f'MSE: {round(mse_model, 5)}')
    print(f'R2: {round(1 - mse_model / mse_base, 4)}')

0
MSE: 0.01065
R2: 0.9893
1
MSE: 0.01048
R2: 0.9895
2
MSE: 0.01121
R2: 0.9888
3
MSE: 0.01091
R2: 0.9891
4
MSE: 0.01058
R2: 0.9894


In [4]:
# Previous pre-trained model
model_old = mae_vit_base_patch16().to('cuda')
model_old.load_state_dict(torch.load('models/mae_vit_base_patch16_l-coslr_1e-05_20231229.pth'))
for _ in range(5):
    print(_)    
    mse_model_old = evaluate(model, dataloader['val'])
    print(f'MSE: {round(mse_model_old, 5)}')
    print(f'R2: {round(1 - mse_model_old / mse_base, 4)}')

0
MSE: 0.01084
R2: 0.9892
1
MSE: 0.01064
R2: 0.9894
2
MSE: 0.01089
R2: 0.9891
3
MSE: 0.01119
R2: 0.9888
4
MSE: 0.0108
R2: 0.9892


In [5]:
for _ in range(5):
    print(_)
    mse_base= evaluate_base(dataloader['val'])
    print(f'MSE: {round(mse_base, 5)}')

0
MSE: 0.99951
1
MSE: 0.99951
2
MSE: 0.99951
3
MSE: 0.99951
4
MSE: 0.99951


There is difference between each iteration of evaluation. It happened before, but I fixed it by removing the shuffling in the validation dataloader. This time I haven't found what causes the randomness.

At least there is a good news: the newly pre-trained model has relevant performance to the previous model I trained by improfessional way. The R2 are both around 0.989. The 0.996 R2 I reported based on the previous model was calculated in the raw space, which means the standardized spectra are inverse transformed to the raw space. It might be the numerical difference during the transform. And this small difference is not worthy for me to dig into.

# Update for the new model
It's the selected pre-trained model through series of experiments, which is now being written in the paper.

In [2]:
dataloader = get_dataloader(
    annotations_file='data/pretrain/train/info.csv', input_dir='data/pretrain/train',
    val_annotations_file='data/pretrain/train/val.csv', val_input_dir='data/pretrain/train',
    batch_size=256, transform=InstanceNorm(), ispretrain=True
    )

# for calculating R2 score
mse_base= evaluate_base(dataloader)
print(f'MSE: {round(mse_base, 5)}')

MSE: 0.99951


In [6]:
model = mae_vit_base_patch16(mask_ratio=0.5).to('cuda')
model.load_state_dict(torch.load('results/HPtuning-loss-on-masks/pretrain-mask-ratio-0.5-blr-1e-4-transform-instance_normalize/model.ckpt', weights_only=True))
for _ in range(5):
    print(_)        
    mse_model = evaluate(model, dataloader['val'], mask_only=True)
    print(f'MSE: {round(mse_model, 5)}')
    print(f'R2: {round(1 - mse_model / mse_base, 4)}')

0
MSE: 0.02715
R2: 0.9728
1
MSE: 0.02691
R2: 0.9731
2
MSE: 0.02689
R2: 0.9731
3
MSE: 0.02691
R2: 0.9731
4
MSE: 0.02713
R2: 0.9729


In [7]:
round(1 - (0.02715+0.02691+0.02689+0.02691+0.02713)/5/mse_base, 4)

0.973