In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pathlib import Path

import torch
import wandb

from config import Environment, TrainConfig
from denoising.train import prepare_training
from denoising.utils import seed_everything

In [4]:
CWD = Path.cwd()
env = Environment(_env_file=CWD / '../env')
wandb.login(key=env.wandb_api_key)

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/d.nesterov/.netrc


True

# Конфиг обучения и подготовка

In [7]:
run_idx = 18
save_dir = CWD / 'mri'

cfg = TrainConfig(
    # Datasets params
    train_dset='mri_pm_train',
    test_dset='mri_pm_test',
    train_batch_size=32,
    test_batch_size=64,
    # Model params
    name_model='mri-hno-v2-optuned',
    cfg_fno={
        'n_modes': (16, 16),
        'in_channels': 1,
        'hidden_channels': 49,
        'lifting_channel_ratio': 6,
        'projection_channel_ratio': 12,
        'out_channels': 1,
        'factorization': 'dense',
        'n_layers': 3,
        'rank': 0.42,
        'spectral': 'hartley',
    },
    # Run params
    random_seed=42,
    device='cuda:2',
    run_name=f'Run {run_idx}, HNO v2 optuned',
    save_weights_path=save_dir / f'run-{run_idx}-weights.pt',
    # Train params
    n_epochs=50,
    lr=0.0075,
    wandb_log=True,
    save_dir=save_dir / f'run-{run_idx}',
    verbose=True,
)

In [8]:
trainer, train_kwargs, run = prepare_training(env, cfg)

Got n_samples = 8380  in dataset mri_pm_train        with sample size = torch.Size([1, 145, 145])
Got n_samples = 2093  in dataset mri_pm_test         with sample size = torch.Size([1, 145, 145])
Got n_samples = 2093  in dataset mri_gt_test         with sample size = torch.Size([1, 145, 145])
Got n_samples = 137   in dataset bsd_synth_0.01_train with sample size = torch.Size([1, 321, 481])
Got n_samples = 77    in dataset bsd_synth_0.01_test with sample size = torch.Size([1, 321, 481])
Got n_samples = 12296 in dataset sidd_train          with sample size = torch.Size([3, 512, 512])
Got n_samples = 3008  in dataset sidd_test           with sample size = torch.Size([3, 512, 512])
torch.Size([32, 1, 145, 145]) torch.Size([32, 1, 145, 145])
Loaded  model mri-fno-neuralop with n_parameters = 2010449
Loaded  model mri-fno-custom   with n_parameters = 2010449
Loaded  model mri-fno-dense    with n_parameters = 4476513
Loaded  model mri-hno-separable with n_parameters = 2248289
Loaded  model mr

Logging to wandb enabled: True


# Запуск

In [9]:
seed_everything(cfg.random_seed)
trainer.train(**train_kwargs)

Training on 8380 samples
Testing on [2093] samples         on resolutions ['test'].


  fft = torch.fft.fft2(x.float(), norm=norm, dim=dim, s=s)


Raw outputs of shape torch.Size([32, 1, 145, 145])
[0] time=50.93, avg_loss=0.1313, train_err=4.1997
Eval: test_h1=0.1097, test_l2=0.0616
Saved training state to /home/d.nesterov/denoising-fno/notebooks/mri/run-18
[1] time=49.04, avg_loss=0.1099, train_err=3.5144
Eval: test_h1=0.1090, test_l2=0.0688
Saved training state to /home/d.nesterov/denoising-fno/notebooks/mri/run-18
[2] time=49.22, avg_loss=0.1094, train_err=3.4992
Eval: test_h1=0.1134, test_l2=0.0450
Saved training state to /home/d.nesterov/denoising-fno/notebooks/mri/run-18
[3] time=49.27, avg_loss=0.1088, train_err=3.4804
Eval: test_h1=0.1100, test_l2=0.0669
Saved training state to /home/d.nesterov/denoising-fno/notebooks/mri/run-18
[4] time=49.23, avg_loss=0.1084, train_err=3.4670
Eval: test_h1=0.1083, test_l2=0.0457
Saved training state to /home/d.nesterov/denoising-fno/notebooks/mri/run-18
[5] time=49.30, avg_loss=0.1086, train_err=3.4729
Eval: test_h1=0.1101, test_l2=0.0518
Saved training state to /home/d.nesterov/denois

Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x7fc5fd1061a0>>
Traceback (most recent call last):
  File "/home/d.nesterov/denoising-fno/.venv/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 790, in _clean_thread_parent_frames
    active_threads = {thread.ident for thread in threading.enumerate()}
  File "/home/d.nesterov/.local/share/uv/python/cpython-3.10.16-linux-x86_64-gnu/lib/python3.10/threading.py", line 1487, in enumerate
    return list(_active.values()) + list(_limbo.values())
KeyboardInterrupt: 


KeyboardInterrupt: 

In [10]:
if run is not None:
    run.finish()

0,1
avg_loss,█▂▂▁▁▁▁▁▁▁▁▁▁▁
lr,███▇▇▇▆▅▅▄▃▃▂▁
test_h1,▄▃█▄▂▄▁▁▁▂▁▄▃▁
test_l2,▆█▁▇▁▃▁▄▃▂▂▅▄▂
time,█▁▂▂▂▂▄▂▂▁▂▂▁▂
train_err,█▂▂▁▁▁▁▁▁▁▁▁▁▁

0,1
avg_loss,0.10741
lr,0.00414
test_h1,0.10731
test_l2,0.04921
time,49.2953
train_err,3.43559


In [11]:
torch.save(trainer.model.to('cpu').state_dict(), cfg.save_weights_path)