Try to use these to fix outputs issue:

https://ipython.readthedocs.io/en/stable/interactive/magics.html
https://notebook.community/lifeinoppo/littlefishlet-scode/RES/REF/python_sourcecode/ipython-master/examples/IPython%20Kernel/Capturing%20Output

# Load SEED Dataset

### Load RAW EEG

In [None]:
from dataset_processing.seed_dataset_loader import SeedDatasetLoader

sampling_frequency = 200  # 200 Hz

_loader = SeedDatasetLoader(fs=sampling_frequency)

In [None]:
labels = _loader.get_labels()
labels

In [None]:
channel_order = _loader.get_channel_order()
channel_order

In [None]:
_eeg_data_df = _loader.get_eeg_data_df()

In [None]:
_loader.plot_random_eeg()

In [None]:
del _loader

### Data Augmentation

In [None]:
from dataset_processing.eeg_augmentation import EEGAugmentation

_augmentor = EEGAugmentation(_eeg_data_df)
_augmented_df = _augmentor.augment_data()
del _augmentor, _eeg_data_df

# Pre-Training Tests

### Preparations

In [None]:
from torch.utils.data import DataLoader
from dataset_processing.eeg_dataset import EEGDataset
from model.pre_training.do_pre_training import PreTraining

# From the paper
pretraining_batch_size = 256

In [None]:
_dataset = EEGDataset(_augmented_df)
del _augmented_df

In [None]:
from utils.misc import sort_dict_by_values
from datetime import timedelta
import ujson


# TODO: Add in bachelor thesis how `num_workers` was chosen with code below

# Custom cleanup function, useful when using the dataloader too much,
# as it's bugged and needs manual cleaning (because of Jupyter Notebook)
def cleanup_data_loader(loader):
    # noinspection PyProtectedMember
    if loader._iterator is not None:
        # noinspection PyProtectedMember
        loader._iterator._shutdown_workers()


def pretraining_testing(
        _testing_epochs,  # Epochs
        _num_workers, _prefetch_factor,  #  From DataLoader
        _scheduler_patience, _early_stopping_patience  # From PreTraining
):
    print(
        f"Testing Pre-Training for {_testing_epochs} epochs ("
        f"{_num_workers} workers loading the dataset, "
        f"scheduler_patience: {_scheduler_patience}, "
        f"early_stopping_patience: {_early_stopping_patience})."
    )

    _data_loader = DataLoader(
        _dataset,
        batch_size=pretraining_batch_size,
        shuffle=True,
        pin_memory=True,

        persistent_workers=_num_workers > 0,
        num_workers=_num_workers,
        prefetch_factor=_prefetch_factor,  # Default: 2 for `num_workers` > 0
    )

    _pretraining = PreTraining(
        data_loader=_data_loader,
        sampling_frequency=sampling_frequency,
        scheduler_patience=_scheduler_patience,  # 50 default
        early_stopping_patience=_early_stopping_patience,
        epochs=_testing_epochs,

        pretraining_model_save_dir=None,
        log_dir=None,
    )
    _pretraining.train(update_after_every_epoch=False)

    cleanup_data_loader(_data_loader)
    del _data_loader

    return dict(
        last_epoch_loss=_pretraining.last_epoch_loss,
        overall_elapsed_time=_pretraining.overall_elapsed_time,
    )

### Test `num_workers` and `prefetch_factor` for DataLoader

In [None]:
_data_loader_test_epochs = 50
_num_workers_values = [2, 3, 4, 5, 6, 7, 8]
_prefetch_factor_values = [1, 2, 4]

data_loader_test_times_dict = dict()

In [None]:
print(
    f"Starting to test `num_workers` (from {_num_workers_values}) and "
    f"`prefetch_factor` (from {_prefetch_factor_values}) for DataLoader, "
    f"while pre-training for {_data_loader_test_epochs} epochs"
)

if not data_loader_test_times_dict:
    for num_workers in _num_workers_values:
        for prefetch_factor in _prefetch_factor_values:
            res = pretraining_testing(
                _testing_epochs=_data_loader_test_epochs,

                _num_workers=num_workers,
                _prefetch_factor=prefetch_factor,

                # Disable the patience
                _scheduler_patience=_data_loader_test_epochs,
                _early_stopping_patience=_data_loader_test_epochs,
            )
            data_loader_test_times_dict[(num_workers, prefetch_factor)] = res["overall_elapsed_time"]

    data_loader_test_times_dict = sort_dict_by_values(  # Sort by lowest `time` first
        data_loader_test_times_dict,
        reverse=False,
    )

    with open(f"model_params/data_loader_test_times_dict__{_data_loader_test_epochs}_epochs.json", "w", encoding="utf-8") as fout:
        ujson.dump(
            data_loader_test_times_dict, fout,
            ensure_ascii=False, escape_forward_slashes=False,
            indent=4,
        )

print()
for (num_workers, prefetch_factor), overall_elapsed_time in data_loader_test_times_dict.items():
    formatted_time = str(timedelta(seconds=overall_elapsed_time))[:-3]
    print(
        f"For {{num_workers: {num_workers}, "
        f"prefetch_factor: {prefetch_factor}}} -> "
        f"Time Taken: {formatted_time}"
    )
print()

best_num_workers, best_prefetch_factor = list(data_loader_test_times_dict.keys())[0]
print(
    f"Best results -> "
    f"(`best_num_workers`: {best_num_workers}, "
    f"`best_prefetch_factor`: {best_prefetch_factor})"
)

##### Results for choosing best value for `num_workers` and `prefetch_factor` (Time/epoch, HH:MM:SS.milliseconds)
For {num_workers: 5, prefetch_factor: 2} -> Time Taken: 0:32:56.436

For {num_workers: 6, prefetch_factor: 1} -> Time Taken: 0:33:04.665

For {num_workers: 5, prefetch_factor: 4} -> Time Taken: 0:33:11.745

For {num_workers: 3, prefetch_factor: 2} -> Time Taken: 0:33:20.534

For {num_workers: 8, prefetch_factor: 1} -> Time Taken: 0:33:29.338

For {num_workers: 4, prefetch_factor: 2} -> Time Taken: 0:33:30.620

For {num_workers: 4, prefetch_factor: 1} -> Time Taken: 0:33:32.329

For {num_workers: 8, prefetch_factor: 2} -> Time Taken: 0:33:36.731

For {num_workers: 4, prefetch_factor: 4} -> Time Taken: 0:33:36.828

For {num_workers: 7, prefetch_factor: 4} -> Time Taken: 0:33:40.763

For {num_workers: 7, prefetch_factor: 2} -> Time Taken: 0:33:43.519

For {num_workers: 7, prefetch_factor: 1} -> Time Taken: 0:33:45.172

For {num_workers: 3, prefetch_factor: 1} -> Time Taken: 0:33:45.564

For {num_workers: 6, prefetch_factor: 2} -> Time Taken: 0:33:53.110

For {num_workers: 5, prefetch_factor: 1} -> Time Taken: 0:33:56.519

For {num_workers: 2, prefetch_factor: 4} -> Time Taken: 0:34:02.076

For {num_workers: 6, prefetch_factor: 4} -> Time Taken: 0:34:04.084

For {num_workers: 3, prefetch_factor: 4} -> Time Taken: 0:34:06.625

For {num_workers: 8, prefetch_factor: 4} -> Time Taken: 0:34:11.154

For {num_workers: 2, prefetch_factor: 2} -> Time Taken: 0:34:16.823

For {num_workers: 2, prefetch_factor: 1} -> Time Taken: 0:36:11.101

### Test `scheduler_patience` and `early_stopping_patience` for DataLoader

In [None]:
_pre_training_test_epochs = 100
_scheduler_patience_values = [10, 25, 50]
_early_stopping_patience_values = [10, 25]

pre_training_test_times_dict = dict()

In [None]:
print(
    f"Starting to test `scheduler_patience` (from {_scheduler_patience_values}) and "
    f"`early_stopping_patience` (from {_early_stopping_patience_values}) for PreTraining, "
    f"while pre-training for {_pre_training_test_epochs} epochs"
)
print(
    f"Will use previous best results -> "
    f"(`best_num_workers`: {best_num_workers}, "
    f"`best_prefetch_factor`: {best_prefetch_factor})"
)

if not pre_training_test_times_dict:
    for scheduler_patience in _scheduler_patience_values:
        for early_stopping_patience in _early_stopping_patience_values:
            res = pretraining_testing(
                _testing_epochs=_pre_training_test_epochs,

                _num_workers=best_num_workers,
                _prefetch_factor=best_prefetch_factor,

                # Disable the patience
                _scheduler_patience=scheduler_patience,
                _early_stopping_patience=early_stopping_patience,
            )
            pre_training_test_times_dict[(scheduler_patience, early_stopping_patience)] = res["last_epoch_loss"]

    pre_training_test_times_dict = sort_dict_by_values(  # Sort by lowest `loss` first
        pre_training_test_times_dict,
        reverse=False,
    )
    
    with open(f"model_params/pre_training_test_times_dict__{_pre_training_test_epochs}_epochs.json", "w", encoding="utf-8") as fout:
        ujson.dump(
            pre_training_test_times_dict, fout, 
            ensure_ascii=False, escape_forward_slashes=False,
            indent=4,
        )

print()
for (scheduler_patience, early_stopping_patience), last_epoch_loss in pre_training_test_times_dict.items():
    print(
        f"For {{scheduler_patience: {scheduler_patience}, "
        f"early_stopping_patience: {early_stopping_patience}}} -> "
        f"Final Loss: {last_epoch_loss:.4f}"
    )
print()

best_scheduler_patience, best_pretraining_testing = list(pre_training_test_times_dict.keys())[0]
print(
    f"Best results -> "
    f"(`best_scheduler_patience`: {best_scheduler_patience}, "
    f"`best_pretraining_testing`: {best_pretraining_testing})"
)

##### Results for choosing best value for `scheduler_patience` and `early_stopping_patience` (Time/epoch, HH:MM:SS.milliseconds)
For {scheduler_patience: 50, early_stopping_patience: 10} -> Final Loss: -2.6390

For {scheduler_patience: 25, early_stopping_patience: 10} -> Final Loss: -2.5647

For {scheduler_patience: 10, early_stopping_patience: 25} -> Final Loss: -2.4591

For {scheduler_patience: 50, early_stopping_patience: 25} -> Final Loss: -2.4308

For {scheduler_patience: 25, early_stopping_patience: 25} -> Final Loss: -2.4073

For {scheduler_patience: 10, early_stopping_patience: 10} -> Final Loss: -2.3840

# Fine-Tuning Tests