In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pathlib import Path

import torch
import wandb

from config import Environment, TrainConfig
from denoising.train import prepare_training
from denoising.utils import seed_everything

In [3]:
CWD = Path.cwd()
env = Environment(_env_file=CWD / '../env')
wandb.login(key=env.wandb_api_key)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/d.nesterov/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mdmitrylala[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

# Конфиг обучения и подготовка

In [4]:
run_idx = 30
save_dir = CWD / 'mri'

# HNO optuned params
#     n_layers: 10
#     hidden_channels: 16
#     n_modes: 16
#     lifting_channel_ratio: 32
#     projection_channel_ratio: 8
#     lr: 0.00433647012426727

# FNO optuned params
#     n_layers: 15
#     hidden_channels: 16
#     n_modes: 32
#     lifting_channel_ratio: 32
#     projection_channel_ratio: 2
#     lr: 0.006055187761870968


cfg = TrainConfig(
    # Datasets params
    train_dset='mri_pm_train',
    test_dset='mri_pm_test',
    train_batch_size=64,
    test_batch_size=128,
    # Model params
    name_model='mri-fno-optuned',
    cfg_fno={
        'n_modes': (32, 32),
        'in_channels': 1,
        'hidden_channels': 16,
        'lifting_channel_ratio': 32,
        'projection_channel_ratio': 2,
        'out_channels': 1,
        'factorization': 'dense',
        'n_layers': 15,
        'rank': 0.42,
    },
    # Run params
    random_seed=42,
    device='cuda:2',
    run_name=f'Run {run_idx}, FNO optuned',
    save_weights_path=save_dir / f'run-{run_idx}-weights.pt',
    # Train params
    n_epochs=91,
    lr=0.00606,
    wandb_log=True,
    save_dir=save_dir / f'run-{run_idx}',
    verbose=True,
)

In [5]:
trainer, train_kwargs, run = prepare_training(env, cfg)

Got n_samples = 8380  in dataset mri_pm_train        with sample size = torch.Size([1, 145, 145])
Got n_samples = 2093  in dataset mri_pm_test         with sample size = torch.Size([1, 145, 145])
Got n_samples = 2093  in dataset mri_gt_test         with sample size = torch.Size([1, 145, 145])
Got n_samples = 137   in dataset bsd_synth_0.01_train with sample size = torch.Size([1, 321, 481])
Got n_samples = 77    in dataset bsd_synth_0.01_test with sample size = torch.Size([1, 321, 481])
Got n_samples = 12296 in dataset sidd_train          with sample size = torch.Size([3, 512, 512])
Got n_samples = 3008  in dataset sidd_test           with sample size = torch.Size([3, 512, 512])
torch.Size([64, 1, 145, 145]) torch.Size([64, 1, 145, 145])
Loaded  model mri-fno-neuralop with n_parameters = 2010449
Loaded  model mri-fno-tucker   with n_parameters = 2010449
Loaded  model mri-fno-dense    with n_parameters = 4476513
Loaded  model mri-hno          with n_parameters = 1098977
Loaded  model sid

Logging to wandb enabled: True


# Запуск

In [8]:
seed_everything(cfg.random_seed)
trainer.train(**train_kwargs)

Training on 8380 samples
Testing on [2093] samples         on resolutions ['test'].
Raw outputs of shape torch.Size([64, 1, 145, 145])
[0] time=52.37, avg_loss=0.1622, train_err=10.3762
Eval: test_h1=0.0993, test_l2=0.0560
Saved training state to /home/d.nesterov/denoising-fno/notebooks/mri/run-30
[1] time=50.79, avg_loss=0.0982, train_err=6.2804
Eval: test_h1=0.0932, test_l2=0.0544
Saved training state to /home/d.nesterov/denoising-fno/notebooks/mri/run-30
[2] time=50.62, avg_loss=0.0932, train_err=5.9610
Eval: test_h1=0.0894, test_l2=0.0399
Saved training state to /home/d.nesterov/denoising-fno/notebooks/mri/run-30
[3] time=51.84, avg_loss=0.0886, train_err=5.6674
Eval: test_h1=0.0858, test_l2=0.0579
Saved training state to /home/d.nesterov/denoising-fno/notebooks/mri/run-30
[4] time=51.48, avg_loss=0.0851, train_err=5.4443
Eval: test_h1=0.0838, test_l2=0.0327
Saved training state to /home/d.nesterov/denoising-fno/notebooks/mri/run-30
[5] time=50.19, avg_loss=0.0827, train_err=5.2913

{'train_err': 4.182765813274238,
 'avg_loss': 0.06538691187815336,
 'epoch_train_time': 50.502768628299236,
 'test_h1': tensor(0.0657, device='cuda:2'),
 'test_l2': tensor(0.0234, device='cuda:2'),
 'test_outputs': <wandb.sdk.data_types.image.Image at 0x7eff66d032b0>}

In [9]:
if run is not None:
    run.finish()

0,1
avg_loss,█▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lr,███▇▇▆▆▄▄▃▂▂▁▁▁▃▃▄▆▇▇████████▇▆▅▅▃▃▂▁▁▁▁
test_h1,█▆▅▅▅▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
test_l2,█▄█▃▃▂▃▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▃▄▃▃▃▂▂▁▂▂▁▁▁▁▁
time,▁▅▄▄▇▆▆▇▄█▇▆▆▆▆▇▇▄▂▃▁▃▂▃▂▃▄▄▁▃▅▄▃▄▃▃▃▄▃▄
train_err,█▇▆▅▄▄▃▃▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁

0,1
avg_loss,0.06539
lr,2e-05
test_h1,0.06573
test_l2,0.02343
time,50.50277
train_err,4.18277


In [10]:
torch.save(trainer.model.to('cpu').state_dict(), cfg.save_weights_path)
print(f'Saved to {cfg.save_weights_path}')

Saved to /home/d.nesterov/denoising-fno/notebooks/mri/run-30-weights.pt
