# Загрузка библиотек

In [1]:
!pip install -qU complexPyTorch
!pip install -U kaleido
!pip install -dU umap


Usage:   
  pip3 install [options] <requirement specifier> [package-index-options] ...
  pip3 install [options] -r <requirements file> [package-index-options] ...
  pip3 install [options] [-e] <vcs project url> ...
  pip3 install [options] [-e] <local project path> ...
  pip3 install [options] <archive url/path> ...

no such option: -d


In [2]:
import random
import imageio
import kaleido
import numpy as np
import pandas as pd
import os
import librosa
import wandb
import umap
from dataclasses import dataclass
from collections import defaultdict

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import complexPyTorch.complexLayers as cvnn

from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE
from sklearn.metrics.pairwise import pairwise_distances

from tqdm import tqdm
import scipy.io.wavfile as wav

import matplotlib.pyplot as plt
%config InlineBackend.figure_format = 'retina'
%matplotlib inline
plt.rcParams['figure.figsize'] = 10, 6
plt.rcParams['font.size'] = 12

import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Set custom layout for plotly
pio.templates['custom'] = go.layout.Template(
    layout= dict(
        font=dict(size=15),
        title=dict(
            font=dict(size=25),
            x=0.5
        ),
        bargap=0.1,
        width=900,
        height=500,
        autosize=False
    )
)
pio.templates.default = 'plotly+custom'
import seaborn as sns
sns.set()



This means that static image generation (e.g. `fig.write_image()`) will not work.

Please upgrade Plotly to version 6.1.1 or greater, or downgrade Kaleido to version 0.2.1.

  from .kaleido import Kaleido


# Загрузка данных

In [3]:
import kagglehub
sripaadsrinivasan_audio_mnist_path = kagglehub.dataset_download('sripaadsrinivasan/audio-mnist')

print('Data source import complete.')

Data source import complete.


In [4]:
sripaadsrinivasan_audio_mnist_path

'/kaggle/input/audio-mnist'

In [5]:
root = '/kaggle/input/audio-mnist/data'
# root = '/root/.cache/kagglehub/datasets/sripaadsrinivasan/audio-mnist/versions/1/data'
n = 60
folders = [os.path.join(root,str(i).zfill(2)) for i in range(1,n+1)]

files = []
for folder in folders:
    files += os.listdir(folder)

In [6]:
X = []
Y = []
for file in files:
    label = file.split("_")[0]
    human = file.split("_")[1]
    X.append(os.path.join(root,human,file))
    Y.append(label)

In [7]:
len(X), len(Y)

(30000, 30000)

# Загрузка функций

In [8]:
class AudioMNISTDataset(Dataset):
    def __init__(
            self,
            X: list[any],
            Y: list[any],
            target_sr: int = 16000
            ) -> None:
        self.audio = X
        self.labels = Y
        self.target_sr = target_sr
        assert len(self.audio) == len(self.labels)

    def __len__(self) -> int:
        return len(self.audio)

    def get_data(self, file: str) -> np.ndarray:
        data, sr = librosa.load(file, sr=None)
        data = librosa.resample(data, orig_sr=sr, target_sr=self.target_sr)
        data = librosa.util.fix_length(data, size=12000)

        return data

    def __getitem__(self, idx: int) -> tuple[torch.Tensor, torch.Tensor]:
        sample = self.audio[idx]
        sample = self.get_data(sample)
        sample = torch.tensor(sample, dtype=torch.float32).unsqueeze(0)

        label = self.labels[idx]
        label = torch.tensor(int(self.labels[idx]), dtype=torch.long)

        return sample, label

In [9]:
class Complex(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        # Feature extraction layers
        self.conv_layer1 = nn.Sequential(
            nn.Conv1d(1, 96, kernel_size=11, stride=4, padding=0),
            nn.BatchNorm1d(96),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=5, stride=3)
        )
        self.conv_layer2 = nn.Sequential(
            nn.Conv1d(96, 256, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=3, stride=2)
        )
        self.conv_layer3 = nn.Sequential(
            nn.Conv1d(256, 384, kernel_size=3, stride=2, padding=1),
            nn.BatchNorm1d(384),
            nn.ReLU()
        )
        self.conv_layer4 = nn.Sequential(
            nn.Conv1d(384, 384, kernel_size=3, stride=2, padding=1),
            nn.BatchNorm1d(384),
            nn.ReLU()
        )
        self.conv_layer5 = nn.Sequential(
            nn.Conv1d(384, 256, kernel_size=3, stride=2, padding=1),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=5, stride=3)
        )

        self.convs = nn.Sequential(
            self.conv_layer1,
            self.conv_layer2,
            self.conv_layer3,
            self.conv_layer4,
            self.conv_layer5,
        )

        # Linear layers
        linear_layer_size: int = 4096

        self.linear_fc1 = nn.Sequential(
            cvnn.ComplexLinear(5120, linear_layer_size),
            cvnn.ComplexReLU()
        )
        self.linear_fc2 = nn.Sequential(
            cvnn.ComplexLinear(linear_layer_size, linear_layer_size // 2),
            cvnn.ComplexReLU()
        )
        self.linear_fc3 = nn.Sequential(
            cvnn.ComplexLinear(linear_layer_size // 2, linear_layer_size),
            cvnn.ComplexReLU()
        )

        self.linears = nn.Sequential(
            self.linear_fc1,
            self.linear_fc2,
            self.linear_fc3,
        )

        # Classification head layer
        self.classification_head = nn.Linear(5120, TrainConfig.n_labels)

    def forward(
            self,
            x: torch.Tensor,
            ) -> tuple[torch.Tensor, ...]:

        out = self.convs(x)  # convlutions
        out_after_conv = out.reshape(
            out.size(0),
            -1
        )  # concatenate
        n = out_after_conv.shape[-1]

        """ LOG FOR VISUALIZATION """
        out_after_conv_log = out_after_conv.detach().clone()
        """ LOG FOR VISUALIZATION """

        normalization_scale = 2 / n
        out_complex = torch.fft.fft(
            out_after_conv * normalization_scale,
            dim=-1
        )  # to complex

        """ LOG FOR VISUALIZATION """
        out_complex_abs_before_linear_log = out_complex.detach().abs().clone()
        """ LOG FOR VISUALIZATION """

        """ BACKBONE """
        out_complex = self.linears(out_complex)  # move through complex layers
        """ BACKBONE """

        """ LOG FOR VISUALIZATION """
        out_complex_abs_after_linear_log = out_complex.detach().abs().clone()
        """ LOG FOR VISUALIZATION """

        # Back to real
        inverse_normalization_scale = n / 2
        out_after_ifft = torch.fft.ifft(
            out_complex * inverse_normalization_scale,
            dim=-1,
            n=n
        )
        out_after_ifft = torch.abs(out_after_ifft)

        """ LOG FOR VISUALIZATION """
        out_after_ifft_log = out_after_ifft.detach().clone()
        """ LOG FOR VISUALIZATION """

        out_final = self.classification_head(out_after_ifft)  # cls head

        result = (
            out_final,
            out_after_conv_log,
            out_complex_abs_before_linear_log,
            out_complex_abs_after_linear_log,
            out_after_ifft_log
        )

        return result

In [10]:
def train_one_epoch(
        model,
        train_dataloader,
        criterion,
        optimizer,
        device,
        print_interval: int = 50,
        log: bool = True
        ) -> tuple:
    total_loss: float = 0
    correct_predictions: int = 0
    total_samples: int = 0

    nn_logs: dict[int, list[np.ndarray]] = dict()  # save nn output logs

    model.train()
    tqdm_loader = tqdm(train_dataloader, initial=1, desc='Training')
    for iteration, (audio, label) in enumerate(tqdm_loader, start=1):
        audio = audio.to(device)
        label = label.to(device)

        optimizer.zero_grad()

        preds, *_ = model(audio)
        loss = criterion(preds, label)

        loss.backward()
        optimizer.step()

        total_loss += loss.item()

        if log and iteration != 0 and iteration % print_interval == 0:
            audio, label = BALANCED_SAMPLE
            audio = audio.to(device)
            label = label.to(device)

            model.eval()
            with torch.no_grad():
                preds, *output_logs = model(audio)

            nn_logs[iteration]: dict[str, any] = dict()
            nn_logs[iteration]['nn_output'] = [
                *(output_log.cpu().numpy() for output_log in output_logs),
                label.detach().clone().cpu().numpy()
            ]
            nn_logs[iteration]['metrics'] = evaluate_model(
                model,
                BALANCED_LOADER,
                criterion,
                device,
                phase='Evaluationg batch',
                log=False
            )
            model.train()

            current_loss = total_loss / iteration
            wandb.log({'avg_batch_loss': current_loss})  # logging
            print(f"\nIteration {iteration}, Average Loss: {current_loss}")

    return total_loss, nn_logs

In [11]:
def evaluate_model(
        model,
        dataloader,
        criterion,
        device,
        phase: str = 'Testing',
        log: bool = True
        ) -> tuple[any, ...]:
    total_loss = 0
    correct_predictions = 0
    total_samples = 0
    all_labels = []
    all_preds = []


    nn_logs: dict[int, list[np.ndarray]] = defaultdict(list)  # save nn output logs

    model.eval()
    with torch.no_grad():
        for audio, label in tqdm(dataloader, desc=phase):
            audio = audio.to(device)
            label = label.to(device)

            preds, *output_logs = model(audio)
            loss = criterion(preds, label)
            total_loss += loss.item()

            for i, output_log in enumerate(output_logs, start=1):
                output_log = output_log.cpu().numpy()
                nn_logs[i].extend(output_log)

            _, predicted_labels = torch.max(preds, 1)
            correct_predictions += (predicted_labels == label).sum().item()
            total_samples += label.size(0)

            all_labels.extend(label.cpu().numpy())
            all_preds.extend(predicted_labels.cpu().numpy())

    f1_micro = f1_score(all_labels, all_preds, average='micro')
    f1_macro = f1_score(all_labels, all_preds, average='macro')

    metrics = {
        'avg_loss': total_loss / len(dataloader),
        'accuracy_top1': correct_predictions / total_samples,
        'f1_micro': f1_micro,
        'f1_macro': f1_macro
    }

    if log:
        print(f"\nEvaluation Results: Average Loss: {metrics['avg_loss']}, Accuracy: {metrics['accuracy_top1']:.4f}, "
              f"F1-Micro: {metrics['f1_micro']:.4f}, F1-Macro: {metrics['f1_macro']:.4f}")

    return metrics, nn_logs, all_labels

In [12]:
def compute_pairwise_distances(
        all_features: np.ndarray,
        all_labels: np.ndarray,
        num_classes: int,
        device: str = 'cuda',
        metric: str = 'cosine'
    ):
    model.eval()
    model.to(device)

    # Межклассовые расстояния
    inter_class_dist_matrix = np.zeros((num_classes, num_classes))

    for i in range(num_classes):
        for j in range(num_classes):
            if i == j:
                inter_class_dist_matrix[i, j] = 0
            else:
                feats_i = all_features[all_labels == i]
                feats_j = all_features[all_labels == j]

                if len(feats_i) > 0 and len(feats_j) > 0:
                    dist = pairwise_distances(
                        feats_i,
                        feats_j,
                        metric=metric
                    ).mean()
                    inter_class_dist_matrix[i, j] = dist
                else:
                    inter_class_dist_matrix[i, j] = np.nan

    # Внутриклассовые расстояния
    intra_class_distances = np.zeros(num_classes)

    for i in range(num_classes):
        feats_i = all_features[all_labels == i]
        if len(feats_i) > 1:
            dists = pairwise_distances(feats_i, metric=metric)
            intra_class_distances[i] = dists[np.triu_indices(len(feats_i), k=1)].mean()
        else:
            intra_class_distances[i] = np.nan

    return inter_class_dist_matrix, intra_class_distances

# Загружаем доп. функции

In [13]:
def get_model_params_count(model: nn.Module) -> tuple[int, int]:
    all_params_count = sum(p.numel() for p in model.parameters())
    requires_grad_params_count = sum(
        p.numel() for p in model.parameters() if p.requires_grad
    )

    return all_params_count, requires_grad_params_count

In [14]:
def get_dtype(model: nn.Module) -> str:
    param_dtype = str(next(model.parameters()).dtype)

    return param_dtype

In [15]:
def set_seed(seed: int) -> None:
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    random.seed(seed)
    np.random.seed(seed)

In [16]:
RANDOM_STATE = 42
set_seed(RANDOM_STATE)

# Конфиг

In [17]:
@dataclass
class TrainConfig:
    n_epochs: int = 5
    lr: float = 3e-5
    batch_size: int = 128
    momentum: float = 0.9

    n_labels: int = 10
    dataset: str = 'AudioMNIST'
    train_size: float = 0.8
    optimizer: str = 'Adam'


config = TrainConfig()

In [18]:
config.__dict__

{'n_epochs': 5,
 'lr': 3e-05,
 'batch_size': 128,
 'momentum': 0.9,
 'n_labels': 10,
 'dataset': 'AudioMNIST',
 'train_size': 0.8,
 'optimizer': 'Adam'}

In [19]:
run = wandb.init(
    entity='aelyovin',
    project=config.dataset,
    name=f'complex_bs_{config.batch_size}_lr_{config.lr}',
    config=config.__dict__
)

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33melyovin[0m ([33maelyovin[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


# Train Test Split

In [20]:
X_train, X_val, y_train, y_val = train_test_split(
    X,
    Y,
    train_size=config.train_size,
    random_state=RANDOM_STATE,
    stratify=Y,
    shuffle=True
)

X_val, X_test, y_val, y_test = train_test_split(
    X_val,
    y_val,
    test_size=0.5,
    random_state=RANDOM_STATE,
    stratify=y_val,
    shuffle=True
)

In [21]:
print(len(X_train), len(X_val), len(X_test))

24000 3000 3000


In [22]:
pd.Series(y_train).value_counts(dropna=False)

Unnamed: 0,count
6,2400
5,2400
2,2400
8,2400
4,2400
1,2400
7,2400
3,2400
9,2400
0,2400


In [23]:
train_dataset = AudioMNISTDataset(X_train, y_train)
val_dataset = AudioMNISTDataset(X_val, y_val)
test_dataset = AudioMNISTDataset(X_test, y_test)

In [24]:
train_loader = DataLoader(
    train_dataset,
    batch_size=TrainConfig.batch_size,
    shuffle=False,
    drop_last=False
)

val_loader = DataLoader(
    val_dataset,
    batch_size=TrainConfig.batch_size,
    shuffle=False,
    drop_last=False
)

test_loader = DataLoader(
    test_dataset,
    batch_size=TrainConfig.batch_size,
    shuffle=False,
    drop_last=False
)

In [25]:
it = iter(train_loader)
audio, label = next(it)

In [26]:
audio.shape, label.shape

(torch.Size([128, 1, 12000]), torch.Size([128]))

In [27]:
len(train_loader)

188

In [28]:
def get_balanced_batch(
        dataset,
        n_samples_per_class: int = 30,
        n_digits: int = 10  # digits 0-9
        ) -> tuple[torch.Tensor, ...]:
    # Group indices by label
    label_to_indices = {}
    for idx, (_, label) in enumerate(dataset):
        label = label.item()
        if label not in label_to_indices:
            label_to_indices[label] = []
        label_to_indices[label].append(idx)

    # Select samples
    selected_indices = []
    for label in range(n_digits):
        indices = label_to_indices[label]
        selected = np.random.choice(indices, n_samples_per_class, replace=False)
        selected_indices.extend(selected)

    # Shuffle the order
    np.random.shuffle(selected_indices)

    # Create a subset dataset
    subset = torch.utils.data.Subset(dataset, selected_indices)
    loader = DataLoader(
        subset,
        batch_size=n_samples_per_class * n_digits,
        shuffle=False
    )

    return loader, next(iter(loader))

In [29]:
BALANCED_LOADER, BALANCED_SAMPLE = get_balanced_batch(
    test_dataset,
    n_samples_per_class=30
)

In [30]:
BALANCED_SAMPLE[1].unique(return_counts=True)

(tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
 tensor([30, 30, 30, 30, 30, 30, 30, 30, 30, 30]))

# Обучение

In [31]:
model = Complex()

In [32]:
get_model_params_count(model)

(76729418, 76729418)

In [33]:
get_dtype(model)

'torch.float32'

In [34]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [35]:
model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(
    model.parameters(),
    lr=TrainConfig.lr
)

In [36]:
val_metrics, start_nn_val_logs, start_labels = evaluate_model(
    model,
    val_loader,
    criterion,
    device,
    phase='Evaluating'
)

Evaluating: 100%|██████████| 24/24 [00:24<00:00,  1.02s/it]


Evaluation Results: Average Loss: 2.360150913397471, Accuracy: 0.1000, F1-Micro: 0.1000, F1-Macro: 0.0182





In [37]:
metrics_logs: dict[int, dict[str, float]] = dict()
nn_logs: dict[int, dict[int, list[np.ndarray]]] = dict()
nn_all_val_logs: dict[int, dict[int, list[np.ndarray]]] = dict()


for epoch in range(1, TrainConfig.n_epochs + 1):
    print(f'\nEpoch {epoch}')
    total_loss, epoch_nn_logs = train_one_epoch(
        model,
        train_loader,
        criterion,
        optimizer,
        device,
        print_interval=12
    )

    val_metrics, nn_val_logs, labels = evaluate_model(
        model,
        val_loader,
        criterion,
        device,
        phase='Evaluating'
    )

    inter_matrix, intra_matrix = compute_pairwise_distances(
        np.array(nn_val_logs[4]),
        np.array(labels),
        10
    )

    nn_all_val_logs[epoch] = nn_val_logs

    epoch_metrics = {
        'avg_loss_train': total_loss / len(train_loader),
        'mean_intra_distance_val': np.nanmean(intra_matrix),
        'mean_inter_distance_val': np.nanmean(inter_matrix[inter_matrix != 0])
    }
    rename_map = {key: key + '_val' for key in val_metrics.keys()}
    val_metrics = {rename_map.get(k, k): v for k, v in val_metrics.items()}
    epoch_metrics.update(val_metrics)

    # Logging
    wandb.log(epoch_metrics)
    nn_logs[epoch] = epoch_nn_logs

    # Save model and metrics
    torch.save(model.state_dict(), 'best_model.pth')
    metrics_logs[epoch] = epoch_metrics

    # Drop if overfitting
    if epoch != 1 and metrics_logs[epoch]['avg_loss_val'] > metrics_logs[epoch - 1]['avg_loss_val']:
        model = Complex()
        model.load_state_dict(torch.load('best_model.pth'))
        break


Epoch 1


Training:   6%|▋         | 12/188 [00:12<03:25,  1.17s/it]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.22it/s]
Training:   7%|▋         | 13/188 [00:14<04:10,  1.43s/it]


Iteration 12, Average Loss: 2.1350370248158774


Training:  13%|█▎        | 24/188 [00:26<03:02,  1.11s/it]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.49it/s]
Training:  13%|█▎        | 25/188 [00:28<03:38,  1.34s/it]


Iteration 24, Average Loss: 1.9108574042717617


Training:  19%|█▉        | 36/188 [00:40<02:46,  1.09s/it]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.61it/s]
Training:  20%|█▉        | 37/188 [00:42<03:18,  1.31s/it]


Iteration 36, Average Loss: 1.640454779068629


Training:  26%|██▌       | 48/188 [00:54<02:31,  1.08s/it]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.62it/s]
Training:  26%|██▌       | 49/188 [00:55<02:59,  1.29s/it]


Iteration 48, Average Loss: 1.389644316708048


Training:  32%|███▏      | 60/188 [01:08<02:41,  1.26s/it]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:01<00:00,  1.29s/it]
Training:  32%|███▏      | 61/188 [01:11<03:33,  1.68s/it]


Iteration 60, Average Loss: 1.2174542958537737


Training:  38%|███▊      | 72/188 [01:23<02:09,  1.12s/it]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.67it/s]
Training:  39%|███▉      | 73/188 [01:25<02:31,  1.32s/it]


Iteration 72, Average Loss: 1.071340525522828


Training:  45%|████▍     | 84/188 [01:37<01:56,  1.12s/it]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.65it/s]
Training:  45%|████▌     | 85/188 [01:39<02:17,  1.33s/it]


Iteration 84, Average Loss: 0.9586449852656751


Training:  51%|█████     | 96/188 [01:51<01:45,  1.15s/it]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.65it/s]
Training:  52%|█████▏    | 97/188 [01:53<02:02,  1.35s/it]


Iteration 96, Average Loss: 0.8680172259919345


Training:  57%|█████▋    | 108/188 [02:05<01:30,  1.13s/it]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.54it/s]
Training:  58%|█████▊    | 109/188 [02:07<01:49,  1.38s/it]


Iteration 108, Average Loss: 0.7950135308007399


Training:  64%|██████▍   | 120/188 [02:20<01:20,  1.18s/it]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.49it/s]
Training:  64%|██████▍   | 121/188 [02:22<01:35,  1.42s/it]


Iteration 120, Average Loss: 0.7325438984359304


Training:  70%|███████   | 132/188 [02:35<01:06,  1.18s/it]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.28it/s]
Training:  71%|███████   | 133/188 [02:37<01:20,  1.46s/it]


Iteration 132, Average Loss: 0.6806715620173649


Training:  77%|███████▋  | 144/188 [02:49<00:47,  1.09s/it]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.54it/s]
Training:  77%|███████▋  | 145/188 [02:51<00:56,  1.32s/it]


Iteration 144, Average Loss: 0.636732087781032


Training:  83%|████████▎ | 156/188 [03:03<00:35,  1.10s/it]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.65it/s]
Training:  84%|████████▎ | 157/188 [03:05<00:40,  1.32s/it]


Iteration 156, Average Loss: 0.5970531929379854


Training:  89%|████████▉ | 168/188 [03:17<00:22,  1.11s/it]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.63it/s]
Training:  90%|████████▉ | 169/188 [03:19<00:25,  1.33s/it]


Iteration 168, Average Loss: 0.5615845786274544


Training:  96%|█████████▌| 180/188 [03:31<00:08,  1.09s/it]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.55it/s]
Training:  96%|█████████▋| 181/188 [03:33<00:09,  1.33s/it]


Iteration 180, Average Loss: 0.5321570574616392


Training: 189it [03:41,  1.18s/it]
Evaluating: 100%|██████████| 24/24 [00:08<00:00,  2.85it/s]



Evaluation Results: Average Loss: 0.11277652019634843, Accuracy: 0.9667, F1-Micro: 0.9667, F1-Macro: 0.9670

Epoch 2


Training:   6%|▋         | 12/188 [00:05<01:22,  2.14it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.05it/s]
Training:   7%|▋         | 13/188 [00:06<02:18,  1.26it/s]


Iteration 12, Average Loss: 0.11048245554169019


Training:  13%|█▎        | 24/188 [00:12<01:20,  2.03it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.71it/s]
Training:  13%|█▎        | 25/188 [00:13<01:54,  1.43it/s]


Iteration 24, Average Loss: 0.10284857296695311


Training:  19%|█▉        | 36/188 [00:18<01:13,  2.07it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.64it/s]
Training:  20%|█▉        | 37/188 [00:20<01:45,  1.43it/s]


Iteration 36, Average Loss: 0.09724414948787954


Training:  26%|██▌       | 48/188 [00:25<01:08,  2.04it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.59it/s]
Training:  26%|██▌       | 49/188 [00:26<01:38,  1.41it/s]


Iteration 48, Average Loss: 0.08980713423807174


Training:  32%|███▏      | 60/188 [00:32<01:02,  2.06it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.55it/s]
Training:  32%|███▏      | 61/188 [00:33<01:30,  1.40it/s]


Iteration 60, Average Loss: 0.08901137290522457


Training:  38%|███▊      | 72/188 [00:38<00:56,  2.06it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.57it/s]
Training:  39%|███▉      | 73/188 [00:40<01:21,  1.41it/s]


Iteration 72, Average Loss: 0.08279013517312706


Training:  45%|████▍     | 84/188 [00:45<00:50,  2.07it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.31it/s]
Training:  45%|████▌     | 85/188 [00:46<01:17,  1.33it/s]


Iteration 84, Average Loss: 0.07887986778015536


Training:  51%|█████     | 96/188 [00:52<00:43,  2.11it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.68it/s]
Training:  52%|█████▏    | 97/188 [00:53<01:02,  1.46it/s]


Iteration 96, Average Loss: 0.0755113614043997


Training:  57%|█████▋    | 108/188 [00:58<00:38,  2.09it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.39it/s]
Training:  58%|█████▊    | 109/188 [00:59<00:58,  1.35it/s]


Iteration 108, Average Loss: 0.07220608036516717


Training:  64%|██████▍   | 120/188 [01:05<00:32,  2.09it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.68it/s]
Training:  64%|██████▍   | 121/188 [01:06<00:46,  1.45it/s]


Iteration 120, Average Loss: 0.07003542224410922


Training:  70%|███████   | 132/188 [01:11<00:28,  1.99it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.38it/s]
Training:  71%|███████   | 133/188 [01:13<00:42,  1.30it/s]


Iteration 132, Average Loss: 0.06749784441975255


Training:  77%|███████▋  | 144/188 [01:18<00:21,  2.09it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.59it/s]
Training:  77%|███████▋  | 145/188 [01:19<00:30,  1.43it/s]


Iteration 144, Average Loss: 0.06511638005678025


Training:  83%|████████▎ | 156/188 [01:25<00:16,  1.99it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.33it/s]
Training:  84%|████████▎ | 157/188 [01:26<00:23,  1.30it/s]


Iteration 156, Average Loss: 0.06210045509326916


Training:  89%|████████▉ | 168/188 [01:31<00:09,  2.04it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.60it/s]
Training:  90%|████████▉ | 169/188 [01:32<00:13,  1.40it/s]


Iteration 168, Average Loss: 0.05945617058092639


Training:  96%|█████████▌| 180/188 [01:38<00:04,  1.92it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.65it/s]
Training:  96%|█████████▋| 181/188 [01:39<00:05,  1.35it/s]


Iteration 180, Average Loss: 0.05727709991236528


Training: 189it [01:43,  1.82it/s]
Evaluating: 100%|██████████| 24/24 [00:10<00:00,  2.24it/s]



Evaluation Results: Average Loss: 0.057455620262771845, Accuracy: 0.9840, F1-Micro: 0.9840, F1-Macro: 0.9840

Epoch 3


Training:   6%|▋         | 12/188 [00:06<01:44,  1.69it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.66it/s]
Training:   7%|▋         | 13/188 [00:07<02:21,  1.23it/s]


Iteration 12, Average Loss: 0.03494974214117974


Training:  13%|█▎        | 24/188 [00:13<01:34,  1.73it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.64it/s]
Training:  13%|█▎        | 25/188 [00:15<02:09,  1.26it/s]


Iteration 24, Average Loss: 0.02999737433856353


Training:  19%|█▉        | 36/188 [00:21<01:28,  1.72it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.65it/s]
Training:  20%|█▉        | 37/188 [00:22<01:59,  1.26it/s]


Iteration 36, Average Loss: 0.029318796367281012


Training:  26%|██▌       | 48/188 [00:28<01:10,  1.98it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.31it/s]
Training:  26%|██▌       | 49/188 [00:30<01:46,  1.31it/s]


Iteration 48, Average Loss: 0.027854498747425776


Training:  32%|███▏      | 60/188 [00:35<01:01,  2.09it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.64it/s]
Training:  32%|███▏      | 61/188 [00:36<01:29,  1.42it/s]


Iteration 60, Average Loss: 0.028379089944064618


Training:  38%|███▊      | 72/188 [00:42<00:56,  2.04it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.34it/s]
Training:  39%|███▉      | 73/188 [00:43<01:27,  1.32it/s]


Iteration 72, Average Loss: 0.02737098566851475


Training:  45%|████▍     | 84/188 [00:48<00:50,  2.04it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.53it/s]
Training:  45%|████▌     | 85/188 [00:50<01:14,  1.39it/s]


Iteration 84, Average Loss: 0.026952806100737126


Training:  51%|█████     | 96/188 [00:55<00:46,  1.99it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.39it/s]
Training:  52%|█████▏    | 97/188 [00:56<01:08,  1.32it/s]


Iteration 96, Average Loss: 0.026789163564293023


Training:  57%|█████▋    | 108/188 [01:02<00:38,  2.05it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.63it/s]
Training:  58%|█████▊    | 109/188 [01:03<00:55,  1.43it/s]


Iteration 108, Average Loss: 0.026466138932543497


Training:  64%|██████▍   | 120/188 [01:08<00:34,  1.98it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.47it/s]
Training:  64%|██████▍   | 121/188 [01:10<00:50,  1.34it/s]


Iteration 120, Average Loss: 0.027067567404204358


Training:  70%|███████   | 132/188 [01:15<00:27,  2.06it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.64it/s]
Training:  71%|███████   | 133/188 [01:16<00:38,  1.43it/s]


Iteration 132, Average Loss: 0.0265423513377424


Training:  77%|███████▋  | 144/188 [01:21<00:22,  1.92it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.63it/s]
Training:  77%|███████▋  | 145/188 [01:23<00:31,  1.34it/s]


Iteration 144, Average Loss: 0.025298087430807453


Training:  83%|████████▎ | 156/188 [01:28<00:15,  2.09it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.59it/s]
Training:  84%|████████▎ | 157/188 [01:29<00:21,  1.43it/s]


Iteration 156, Average Loss: 0.024140635547108758


Training:  89%|████████▉ | 168/188 [01:35<00:10,  1.95it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.66it/s]
Training:  90%|████████▉ | 169/188 [01:36<00:13,  1.39it/s]


Iteration 168, Average Loss: 0.0230429026095884


Training:  96%|█████████▌| 180/188 [01:41<00:03,  2.03it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.63it/s]
Training:  96%|█████████▋| 181/188 [01:42<00:04,  1.42it/s]


Iteration 180, Average Loss: 0.022315445678153387


Training: 189it [01:46,  1.76it/s]
Evaluating: 100%|██████████| 24/24 [00:08<00:00,  2.79it/s]



Evaluation Results: Average Loss: 0.04391479461143414, Accuracy: 0.9873, F1-Micro: 0.9873, F1-Macro: 0.9873

Epoch 4


Training:   6%|▋         | 12/188 [00:05<01:23,  2.10it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.70it/s]
Training:   7%|▋         | 13/188 [00:06<02:00,  1.45it/s]


Iteration 12, Average Loss: 0.01283391008231168


Training:  13%|█▎        | 24/188 [00:12<01:20,  2.05it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.61it/s]
Training:  13%|█▎        | 25/188 [00:13<01:55,  1.41it/s]


Iteration 24, Average Loss: 0.012003667885437608


Training:  19%|█▉        | 36/188 [00:18<01:12,  2.09it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.41it/s]
Training:  20%|█▉        | 37/188 [00:19<01:50,  1.37it/s]


Iteration 36, Average Loss: 0.011768512692975087


Training:  26%|██▌       | 48/188 [00:25<01:16,  1.82it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.67it/s]
Training:  26%|██▌       | 49/188 [00:26<01:46,  1.31it/s]


Iteration 48, Average Loss: 0.0100122479974137


Training:  32%|███▏      | 60/188 [00:33<01:16,  1.68it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.45it/s]
Training:  32%|███▏      | 61/188 [00:34<01:47,  1.18it/s]


Iteration 60, Average Loss: 0.01017478978416572


Training:  38%|███▊      | 72/188 [00:40<01:07,  1.73it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.66it/s]
Training:  39%|███▉      | 73/188 [00:42<01:30,  1.27it/s]


Iteration 72, Average Loss: 0.010027940991373422


Training:  45%|████▍     | 84/188 [00:48<00:57,  1.82it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.64it/s]
Training:  45%|████▌     | 85/188 [00:49<01:16,  1.34it/s]


Iteration 84, Average Loss: 0.010022284086084082


Training:  51%|█████     | 96/188 [00:54<00:43,  2.10it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.58it/s]
Training:  52%|█████▏    | 97/188 [00:56<01:03,  1.42it/s]


Iteration 96, Average Loss: 0.010210671006158615


Training:  57%|█████▋    | 108/188 [01:01<00:40,  2.00it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.62it/s]
Training:  58%|█████▊    | 109/188 [01:02<00:55,  1.41it/s]


Iteration 108, Average Loss: 0.009850375787613707


Training:  64%|██████▍   | 120/188 [01:08<00:32,  2.09it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.69it/s]
Training:  64%|██████▍   | 121/188 [01:09<00:46,  1.45it/s]


Iteration 120, Average Loss: 0.01041361420454147


Training:  70%|███████   | 132/188 [01:14<00:27,  2.07it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.65it/s]
Training:  71%|███████   | 133/188 [01:15<00:38,  1.44it/s]


Iteration 132, Average Loss: 0.010995526397671325


Training:  77%|███████▋  | 144/188 [01:21<00:20,  2.10it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.44it/s]
Training:  77%|███████▋  | 145/188 [01:22<00:30,  1.40it/s]


Iteration 144, Average Loss: 0.010708128362441331


Training:  83%|████████▎ | 156/188 [01:27<00:15,  2.09it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.66it/s]
Training:  84%|████████▎ | 157/188 [01:28<00:21,  1.45it/s]


Iteration 156, Average Loss: 0.010460052695961144


Training:  89%|████████▉ | 168/188 [01:34<00:09,  2.11it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.33it/s]
Training:  90%|████████▉ | 169/188 [01:35<00:13,  1.36it/s]


Iteration 168, Average Loss: 0.010574371287865298


Training:  96%|█████████▌| 180/188 [01:40<00:03,  2.07it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.58it/s]
Training:  96%|█████████▋| 181/188 [01:42<00:04,  1.42it/s]


Iteration 180, Average Loss: 0.010337831419504558


Training: 189it [01:45,  1.78it/s]
Evaluating: 100%|██████████| 24/24 [00:08<00:00,  2.88it/s]



Evaluation Results: Average Loss: 0.03822938732143181, Accuracy: 0.9887, F1-Micro: 0.9887, F1-Macro: 0.9887

Epoch 5


Training:   6%|▋         | 12/188 [00:05<01:22,  2.14it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.66it/s]
Training:   7%|▋         | 13/188 [00:06<02:00,  1.45it/s]


Iteration 12, Average Loss: 0.005274776248067307


Training:  13%|█▎        | 24/188 [00:11<01:21,  2.02it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.38it/s]
Training:  13%|█▎        | 25/188 [00:13<02:03,  1.32it/s]


Iteration 24, Average Loss: 0.005206687126095251


Training:  19%|█▉        | 36/188 [00:18<01:13,  2.06it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.66it/s]
Training:  20%|█▉        | 37/188 [00:19<01:45,  1.44it/s]


Iteration 36, Average Loss: 0.004881543412921019


Training:  26%|██▌       | 48/188 [00:25<01:11,  1.96it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.27it/s]
Training:  26%|██▌       | 49/188 [00:26<01:51,  1.24it/s]


Iteration 48, Average Loss: 0.004282085151013841


Training:  32%|███▏      | 60/188 [00:31<01:02,  2.06it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.59it/s]
Training:  32%|███▏      | 61/188 [00:32<01:30,  1.41it/s]


Iteration 60, Average Loss: 0.004340895924057501


Training:  38%|███▊      | 72/188 [00:38<00:58,  1.97it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.62it/s]
Training:  39%|███▉      | 73/188 [00:39<01:24,  1.36it/s]


Iteration 72, Average Loss: 0.003921470897492125


Training:  45%|████▍     | 84/188 [00:44<00:52,  2.00it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.65it/s]
Training:  45%|████▌     | 85/188 [00:46<01:15,  1.37it/s]


Iteration 84, Average Loss: 0.0036885419159218493


Training:  51%|█████     | 96/188 [00:52<00:54,  1.68it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.59it/s]
Training:  52%|█████▏    | 97/188 [00:53<01:14,  1.22it/s]


Iteration 96, Average Loss: 0.003604007610192639


Training:  57%|█████▋    | 108/188 [01:00<00:45,  1.76it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.60it/s]
Training:  58%|█████▊    | 109/188 [01:01<01:03,  1.25it/s]


Iteration 108, Average Loss: 0.003417351145698275


Training:  64%|██████▍   | 120/188 [01:07<00:39,  1.73it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.24it/s]
Training:  64%|██████▍   | 121/188 [01:09<00:56,  1.18it/s]


Iteration 120, Average Loss: 0.003406300622979567


Training:  70%|███████   | 132/188 [01:15<00:33,  1.67it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.45it/s]
Training:  71%|███████   | 133/188 [01:16<00:46,  1.18it/s]


Iteration 132, Average Loss: 0.0036919037875398344


Training:  77%|███████▋  | 144/188 [01:23<00:24,  1.77it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.63it/s]
Training:  77%|███████▋  | 145/188 [01:24<00:33,  1.28it/s]


Iteration 144, Average Loss: 0.0036125098952955645


Training:  83%|████████▎ | 156/188 [01:30<00:18,  1.72it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.59it/s]
Training:  84%|████████▎ | 157/188 [01:32<00:24,  1.25it/s]


Iteration 156, Average Loss: 0.003543402226708937


Training:  89%|████████▉ | 168/188 [01:38<00:11,  1.77it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.49it/s]
Training:  90%|████████▉ | 169/188 [01:39<00:15,  1.25it/s]


Iteration 168, Average Loss: 0.0034618368377518243


Training:  96%|█████████▌| 180/188 [01:45<00:04,  1.82it/s]
Evaluationg batch:   0%|          | 0/1 [00:00<?, ?it/s][A
Evaluationg batch: 100%|██████████| 1/1 [00:00<00:00,  1.72it/s]
Training:  96%|█████████▋| 181/188 [01:47<00:05,  1.32it/s]


Iteration 180, Average Loss: 0.003508402625933134


Training: 189it [01:51,  1.69it/s]
Evaluating: 100%|██████████| 24/24 [00:10<00:00,  2.32it/s]



Evaluation Results: Average Loss: 0.03397199074970558, Accuracy: 0.9897, F1-Micro: 0.9897, F1-Macro: 0.9897


# Визуализация

## 2D

In [38]:
def get_graph_limits(features: np.ndarray) -> tuple:
    x_lim = (
        np.percentile(features[:, 0], 2.5),
        np.percentile(features[:, 0], 97.5)
    )
    y_lim = (
        np.percentile(features[:, 1], 2.5),
        np.percentile(features[:, 1], 97.5)
    )

    x_margin = (x_lim[1] - x_lim[0]) * 0.05
    y_margin = (y_lim[1] - y_lim[0]) * 0.05

    result = (
        (float(x_lim[0] - x_margin), float(x_lim[1] + x_margin)),
        (float(y_lim[0] - y_margin), float(y_lim[1] + y_margin)),
    )

    return result

In [None]:
all_pca_before_1: list[np.ndarray, ...] = []
all_pca_after_1: list[np.ndarray, ...] = []
all_pca_before_2: list[np.ndarray, ...] = []
all_pca_after_2: list[np.ndarray, ...] = []

all_tsne_before_1: list[np.ndarray, ...] = []
all_tsne_after_1: list[np.ndarray, ...] = []
all_tsne_before_2: list[np.ndarray, ...] = []
all_tsne_after_2: list[np.ndarray, ...] = []

all_umap_before_1: list[np.ndarray, ...] = []
all_umap_after_1: list[np.ndarray, ...] = []
all_umap_before_2: list[np.ndarray, ...] = []
all_umap_after_2: list[np.ndarray, ...] = []

for epoch, nn_logs_epoch in tqdm(nn_logs.items()):
    for iteration, nn_logs_iteration in nn_logs_epoch.items():
        nn_before_1, nn_before_2, nn_after_2, nn_after_1, labels = nn_logs_iteration['nn_output']

        scaler_before_1 = StandardScaler()
        scaler_after_1 = StandardScaler()
        nn_before_1 = scaler_before_1.fit_transform(nn_before_1)
        nn_after_1 = scaler_after_1.fit_transform(nn_after_1)

        scaler_before_2 = StandardScaler()
        scaler_after_2 = StandardScaler()
        nn_before_2 = scaler_before_2.fit_transform(nn_before_2)
        nn_after_2 = scaler_after_2.fit_transform(nn_after_2)

        # Transformers PCA
        pca_transformer_before_1 = PCA(n_components=2)
        pca_transformer_after_1 = PCA(n_components=2)
        pca_before_1 = pca_transformer_before_1.fit_transform(nn_before_1)
        pca_after_1 = pca_transformer_after_1.fit_transform(nn_after_1)

        pca_transformer_before_2 = PCA(n_components=2)
        pca_transformer_after_2 = PCA(n_components=2)
        pca_before_2 = pca_transformer_before_2.fit_transform(nn_before_2)
        pca_after_2 = pca_transformer_after_2.fit_transform(nn_after_2)

        # Transformers t-SNE
        tsne_transformer_before_1 = TSNE(n_components=2)
        tsne_transformer_after_1 = TSNE(n_components=2)
        tsne_before_1 = tsne_transformer_before_1.fit_transform(nn_before_1)
        tsne_after_1 = tsne_transformer_after_1.fit_transform(nn_after_1)

        tsne_transformer_before_2 = TSNE(n_components=2)
        tsne_transformer_after_2 = TSNE(n_components=2)
        tsne_before_2 = tsne_transformer_before_2.fit_transform(nn_before_2)
        tsne_after_2 = tsne_transformer_after_2.fit_transform(nn_after_2)

        # Transformers UMAP
        umap_transformer_before_1 = umap.UMAP(n_components=2)
        umap_transformer_after_1 = umap.UMAP(n_components=2)
        umap_before_1 = umap_transformer_before_1.fit_transform(nn_before_1)
        umap_after_1 = umap_transformer_after_1.fit_transform(nn_after_1)

        umap_transformer_before_2 = umap.UMAP(n_components=2)
        umap_transformer_after_2 = umap.UMAP(n_components=2)
        umap_before_2 = umap_transformer_before_2.fit_transform(nn_before_2)
        umap_after_2 = umap_transformer_after_2.fit_transform(nn_after_2)

        all_pca_before_1.append(pca_before_1)
        all_pca_after_1.append(pca_after_1)
        all_pca_before_2.append(pca_before_2)
        all_pca_after_2.append(pca_after_2)

        all_tsne_before_1.append(tsne_before_1)
        all_tsne_after_1.append(tsne_after_1)
        all_tsne_before_2.append(tsne_before_2)
        all_tsne_after_2.append(tsne_after_2)

        all_umap_before_1.append(umap_before_1)
        all_umap_after_1.append(umap_after_1)
        all_umap_before_2.append(umap_before_2)
        all_umap_after_2.append(umap_after_2)

all_pca_before_1 = np.array(all_pca_before_1)
all_pca_after_1 = np.array(all_pca_after_1)
all_pca_before_2 = np.array(all_pca_before_2)
all_pca_after_2 = np.array(all_pca_after_2)

all_tsne_before_1 = np.array(all_tsne_before_1)
all_tsne_after_1 = np.array(all_tsne_after_1)
all_tsne_before_2 = np.array(all_tsne_before_2)
all_tsne_after_2 = np.array(all_tsne_after_2)

all_umap_before_1 = np.array(all_umap_before_1)
all_umap_after_1 = np.array(all_umap_after_1)
all_umap_before_2 = np.array(all_umap_before_2)
all_umap_after_2 = np.array(all_umap_after_2)

In [None]:
all_before, all_after = all_pca_before_1, all_pca_after_1

In [None]:
global_xlim_before, global_ylim_before = get_graph_limits(all_before.reshape(-1, 2))
global_xlim_after, global_ylim_after = get_graph_limits(all_after.reshape(-1, 2))

In [None]:
figures: list[go.Figure, ...] = []
digits: list[int, ...] = list(map(str, range(0, 10)))
n_iterations: int = len(train_loader)  # number of iterations in epoch
iteration_step: int = 25  # step of iteration DON'T FORGET TO CHANGE THIS
labels: np.ndarray[int, ...] = BALANCED_SAMPLE[1].cpu().numpy()

for epoch, nn_logs_epoch in nn_logs.items():
    for iteration, nn_logs_iteration in nn_logs_epoch.items():
        # Get idx for pcas
        idx = (epoch - 1) * (n_iterations - n_iterations % iteration_step) + iteration
        idx //= iteration_step
        idx -= 1

        pca_before, pca_after = all_before[idx], all_after[idx]

        df_before = pd.DataFrame(pca_before, columns=['Компонента 1', 'Компонента 2'])
        df_before['Label'] = labels.astype(str)

        df_after = pd.DataFrame(pca_after, columns=['Компонента 1', 'Компонента 2'])
        df_after['Label'] = labels.astype(str)

        fig = make_subplots(
            rows=1,
            cols=2,
            subplot_titles=['Перед ДПФ (1)', 'После обратного ДПФ (4)']
            # subplot_titles=['Перед комплексной основой (2)', 'После комплексной основы (3)']
        )
        tab10_colors = px.colors.qualitative.Dark24

        for i, label in enumerate(digits):
            df_subset_before = df_before[df_before['Label'] == label]
            df_subset_after = df_after[df_after['Label'] == label]

            fig.add_trace(go.Scatter(
                x=df_subset_before['Компонента 1'],
                y=df_subset_before['Компонента 2'],
                mode='markers',
                marker=dict(
                    size=15,
                    color=tab10_colors[i % len(tab10_colors)],
                    opacity=1
                ),
                name=f'{label}',
                showlegend=True
            ), row=1, col=1)

            fig.add_trace(go.Scatter(
                x=df_subset_after['Компонента 1'],
                y=df_subset_after['Компонента 2'],
                mode='markers',
                marker=dict(
                    size=15,
                    color=tab10_colors[i % len(tab10_colors)],
                    opacity=1
                ),
                showlegend=False
            ), row=1, col=2)

        fig.update_layout(
            xaxis_title='Компонента 1',
            yaxis_title='Компонента 2',
            xaxis2_title='Компонента 1',
            yaxis2_title='Компонента 2',
            legend_title='Common Legend',
            legend=dict(
                title='Цифры',
                title_font_size=22,
                font=dict(
                    size=22,
                ),  # Font size for the legend
                traceorder='normal'
            ),
            width=1400,
            height=700,
        )

        fig.update_xaxes(
            range=global_xlim_before, title='Компонента 1',
            tickfont=dict(size=18),
            title_font=dict(size=20),
            row=1, col=1
        )
        fig.update_yaxes(
            range=global_ylim_before,
            title='Компонента 2',
            tickfont=dict(size=18),
            title_font=dict(size=20),
            row=1, col=1
        )
        fig.update_xaxes(
            range=global_xlim_after,
            title='Компонента 1',
            tickfont=dict(size=18),
            title_font=dict(size=20),
            row=1, col=2
        )
        fig.update_yaxes(
            range=global_ylim_after,
            title='Компонента 2',
            tickfont=dict(size=18),
            title_font=dict(size=20),
            row=1, col=2
        )

        fig.for_each_annotation(lambda a: a.update(font=dict(size=22)))

        figures.append(fig)

## 3D

In [None]:
def get_graph_features_limits_3d(featuress: np.ndarray) -> tuple:
    x_lim = (np.percentile(featuress[:, 0], 0.5), np.percentile(featuress[:, 0], 99.5))
    y_lim = (np.percentile(featuress[:, 1], 0.5), np.percentile(featuress[:, 1], 99.5))
    z_lim = (np.percentile(featuress[:, 2], 0.5), np.percentile(featuress[:, 2], 99.5))

    x_margin = (x_lim[1] - x_lim[0]) * 0.05
    y_margin = (y_lim[1] - y_lim[0]) * 0.05
    z_margin = (z_lim[1] - z_lim[0]) * 0.05

    result = (
        (float(x_lim[0] - x_margin), float(x_lim[1] + x_margin)),
        (float(y_lim[0] - y_margin), float(y_lim[1] + y_margin)),
        (float(z_lim[0] - z_margin), float(z_lim[1] + z_margin))
    )

    return result

In [None]:
all_pca_before_1: list[np.ndarray, ...] = []
all_pca_after_1: list[np.ndarray, ...] = []
all_pca_before_2: list[np.ndarray, ...] = []
all_pca_after_2: list[np.ndarray, ...] = []

all_tsne_before_1: list[np.ndarray, ...] = []
all_tsne_after_1: list[np.ndarray, ...] = []
all_tsne_before_2: list[np.ndarray, ...] = []
all_tsne_after_2: list[np.ndarray, ...] = []

all_umap_before_1: list[np.ndarray, ...] = []
all_umap_after_1: list[np.ndarray, ...] = []
all_umap_before_2: list[np.ndarray, ...] = []
all_umap_after_2: list[np.ndarray, ...] = []

for epoch, nn_logs_epoch in tqdm(nn_logs.items()):
    for iteration, nn_logs_iteration in nn_logs_epoch.items():
        # _, _, nn_before, nn_after, _ = nn_logs_iteration['nn_output']
        nn_before_1, nn_before_2, nn_after_2, nn_after_1, labels = nn_logs_iteration['nn_output']

        # 1
        scaler_before_1 = StandardScaler()
        scaler_after_1 = StandardScaler()
        nn_before_1 = scaler_before_1.fit_transform(nn_before_1)
        nn_after_1 = scaler_after_1.fit_transform(nn_after_1)

        # 2
        scaler_before_2 = StandardScaler()
        scaler_after_2 = StandardScaler()
        nn_before_2 = scaler_before_2.fit_transform(nn_before_2)
        nn_after_2 = scaler_after_2.fit_transform(nn_after_2)

        # Transformers PCA
        # 1
        pca_transformer_before_1 = PCA(n_components=3)
        pca_transformer_after_1 = PCA(n_components=3)
        pca_before_1 = pca_transformer_before_1.fit_transform(nn_before_1)
        pca_after_1 = pca_transformer_after_1.fit_transform(nn_after_1)

        # 2
        pca_transformer_before_2 = PCA(n_components=3)
        pca_transformer_after_2 = PCA(n_components=3)
        pca_before_2 = pca_transformer_before_2.fit_transform(nn_before_2)
        pca_after_2 = pca_transformer_after_2.fit_transform(nn_after_2)

        # Transformers t-SNE
        # 1
        tsne_transformer_before_1 = TSNE(n_components=3)
        tsne_transformer_after_1 = TSNE(n_components=3)
        tsne_before_1 = tsne_transformer_before_1.fit_transform(nn_before_1)
        tsne_after_1 = tsne_transformer_after_1.fit_transform(nn_after_1)

        # 2
        tsne_transformer_before_2 = TSNE(n_components=3)
        tsne_transformer_after_2 = TSNE(n_components=3)
        tsne_before_2 = tsne_transformer_before_2.fit_transform(nn_before_2)
        tsne_after_2 = tsne_transformer_after_2.fit_transform(nn_after_2)

        # Transformers UMAP
        # 1
        umap_transformer_before_1 = umap.UMAP(n_components=3)
        umap_transformer_after_1 = umap.UMAP(n_components=3)
        umap_before_1 = umap_transformer_before_1.fit_transform(nn_before_1)
        umap_after_1 = umap_transformer_after_1.fit_transform(nn_after_1)

        # 2
        umap_transformer_before_2 = umap.UMAP(n_components=3)
        umap_transformer_after_2 = umap.UMAP(n_components=3)
        umap_before_2 = umap_transformer_before_2.fit_transform(nn_before_2)
        umap_after_2 = umap_transformer_after_2.fit_transform(nn_after_2)

        all_pca_before_1.append(pca_before_1)
        all_pca_after_1.append(pca_after_1)
        all_pca_before_2.append(pca_before_2)
        all_pca_after_2.append(pca_after_2)

        all_tsne_before_1.append(tsne_before_1)
        all_tsne_after_1.append(tsne_after_1)
        all_tsne_before_2.append(tsne_before_2)
        all_tsne_after_2.append(tsne_after_2)

        all_umap_before_1.append(umap_before_1)
        all_umap_after_1.append(umap_after_1)
        all_umap_before_2.append(umap_before_2)
        all_umap_after_2.append(umap_after_2)

all_pca_before_1 = np.array(all_pca_before_1)
all_pca_after_1 = np.array(all_pca_after_1)
all_pca_before_2 = np.array(all_pca_before_2)
all_pca_after_2 = np.array(all_pca_after_2)

all_tsne_before_1 = np.array(all_tsne_before_1)
all_tsne_after_1 = np.array(all_tsne_after_1)
all_tsne_before_2 = np.array(all_tsne_before_2)
all_tsne_after_2 = np.array(all_tsne_after_2)

all_umap_before_1 = np.array(all_umap_before_1)
all_umap_after_1 = np.array(all_umap_after_1)
all_umap_before_2 = np.array(all_umap_before_2)
all_umap_after_2 = np.array(all_umap_after_2)

In [None]:
all_before, all_after = all_umap_before_2, all_umap_after_2

In [None]:
global_xlim_before, global_ylim_before, global_zlim_before = get_graph_features_limits_3d(all_before.reshape(-1, 3))
global_xlim_after, global_ylim_after, global_zlim_after = get_graph_features_limits_3d(all_after.reshape(-1, 3))

In [None]:
global_xlim_before

In [None]:
figures: list[go.Figure, ...] = []
digits: list[int, ...] = list(map(str, range(0, 10)))
n_iterations: int = len(train_loader)  # number of iterations in epoch
iteration_step: int = 25  # step of iteration DON'T FORGET TO CHANGE THIS
labels: np.ndarray[int, ...] = BALANCED_SAMPLE[1].cpu().numpy()


for epoch, nn_logs_epoch in nn_logs.items():
    for iteration, nn_logs_iteration in nn_logs_epoch.items():
        # Get idx for pcas
        idx = (epoch - 1) * (n_iterations - n_iterations % iteration_step) + iteration
        idx //= iteration_step
        idx -= 1

        pca_before, pca_after = all_before[idx], all_after[idx]

        df_before = pd.DataFrame(pca_before, columns=['Компонента 1', 'Компонента 2', 'Компонента 3'])
        df_before['Label'] = labels.astype(str)

        df_after = pd.DataFrame(pca_after, columns=['Компонента 1', 'Компонента 2', 'Компонента 3'])
        df_after['Label'] = labels.astype(str)

        fig = make_subplots(
            rows=1,
            cols=2,
            horizontal_spacing=0.01,
            specs=[[{'type': 'scatter3d'}, {'type': 'scatter3d'}]],
            # subplot_titles=['Перед ДПФ (1)', 'После обратного ДПФ (4)']
            subplot_titles=['Перед комплексной основой (2)', 'После комплексной основы (3)']
        )
        tab10_colors = px.colors.qualitative.Dark24

        for i, label in enumerate(digits):
            df_subset_before = df_before[df_before['Label'] == label]
            df_subset_after = df_after[df_after['Label'] == label]

            fig.add_trace(go.Scatter3d(
                x=df_subset_before['Компонента 1'],
                y=df_subset_before['Компонента 2'],
                z=df_subset_before['Компонента 3'],
                mode='markers',
                marker=dict(
                    size=12,
                    color=tab10_colors[i % len(tab10_colors)],
                    opacity=1
                ),
                name=f'{label}',
                showlegend=True
            ), row=1, col=1)

            # Add the second subplot (right)
            fig.add_trace(go.Scatter3d(
                x=df_subset_after['Компонента 1'],
                y=df_subset_after['Компонента 2'],
                z=df_subset_after['Компонента 3'],
                mode='markers',
                marker=dict(
                    size=12,
                    color=tab10_colors[i % len(tab10_colors)],
                    opacity=1
                ),
                showlegend=False
            ), row=1, col=2)

        # Set layout with common legend and subplot titles
        fig.update_layout(
            scene=dict(
                xaxis_title='Компонента 1',
                yaxis_title='Компонента 2',
                zaxis_title='Компонента 3',

                xaxis_tickfont=dict(size=15),
                yaxis_tickfont=dict(size=15),
                zaxis_tickfont=dict(size=15),

                xaxis_title_font=dict(size=20),
                yaxis_title_font=dict(size=20),
                zaxis_title_font=dict(size=20),

                aspectmode='cube',
            ),
            scene2=dict(
                aspectmode='cube',
                xaxis_title='Компонента 1',
                yaxis_title='Компонента 2',
                zaxis_title='Компонента 3',

                xaxis_tickfont=dict(size=15),
                yaxis_tickfont=dict(size=15),
                zaxis_tickfont=dict(size=15),

                xaxis_title_font=dict(size=20),
                yaxis_title_font=dict(size=20),
                zaxis_title_font=dict(size=20),

            ),
            legend_title='Common Legend',
            legend=dict(
                title='Цифры',  # Common legend title
                title_font_size=25,
                font=dict(
                    size=25,
                ),  # Font size for the legend
                traceorder='normal'
            ),
            width=1600,  # Width of the whole figure (larger to fit two plots)
            height=800,  # Height of the whole figure
        )

        fig.for_each_annotation(lambda a: a.update(font=dict(size=24)))
        figures.append(fig)

In [None]:
figures[-1]

# Тестовые метрики

In [40]:
metrics_logs

{1: {'avg_loss_train': 0.5143056427663628,
  'mean_intra_distance_val': np.float64(0.02304094787687063),
  'mean_inter_distance_val': np.float64(0.10502515695989131),
  'avg_loss_val': 0.11277652019634843,
  'accuracy_top1_val': 0.9666666666666667,
  'f1_micro_val': 0.9666666666666667,
  'f1_macro_val': 0.9669831070492554},
 2: {'avg_loss_train': 0.056208513895089324,
  'mean_intra_distance_val': np.float64(0.022554356418550015),
  'mean_inter_distance_val': np.float64(0.11311280238959524),
  'avg_loss_val': 0.057455620262771845,
  'accuracy_top1_val': 0.984,
  'f1_micro_val': 0.984,
  'f1_macro_val': 0.9839772748313866},
 3: {'avg_loss_train': 0.02187425231169711,
  'mean_intra_distance_val': np.float64(0.022661614790558816),
  'mean_inter_distance_val': np.float64(0.11949168261554506),
  'avg_loss_val': 0.04391479461143414,
  'accuracy_top1_val': 0.9873333333333333,
  'f1_micro_val': 0.9873333333333333,
  'f1_macro_val': 0.9873263876551015},
 4: {'avg_loss_train': 0.01004900734319272

In [41]:
model.to(device);

In [42]:
test_metrics, nn_test_logs, test_labels = evaluate_model(
    model,
    test_loader,
    criterion,
    device,
    phase='Testing',
)
test_metrics = {key + '_test': val for key, val in test_metrics.items()}

Testing: 100%|██████████| 24/24 [00:21<00:00,  1.10it/s]


Evaluation Results: Average Loss: 0.03360959488297036, Accuracy: 0.9880, F1-Micro: 0.9880, F1-Macro: 0.9880





In [43]:
inter_matrix, intra_matrix = compute_pairwise_distances(
    np.array(nn_test_logs[4]),
    np.array(test_labels),
    10
)
dist_metrics = {
    'mean_intra_distance_test': float(np.nanmean(intra_matrix)),
    'mean_inter_distance_test': float(np.nanmean(inter_matrix[inter_matrix != 0]))
}

In [44]:
test_metrics.update(dist_metrics)

In [45]:
test_metrics

{'avg_loss_test': 0.03360959488297036,
 'accuracy_top1_test': 0.988,
 'f1_micro_test': 0.988,
 'f1_macro_test': 0.9879891609910763,
 'mean_intra_distance_test': 0.023013068921864034,
 'mean_inter_distance_test': 0.12449582285351224}

In [46]:
wandb.log(test_metrics)

In [47]:
wandb.finish()

0,1
accuracy_top1_test,▁
accuracy_top1_val,▁▆▇██
avg_batch_loss,█▅▅▅▄▃▃▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_loss_test,▁
avg_loss_train,█▂▁▁▁
avg_loss_val,█▃▂▁▁
f1_macro_test,▁
f1_macro_val,▁▆▇██
f1_micro_test,▁
f1_micro_val,▁▆▇██

0,1
accuracy_top1_test,0.988
accuracy_top1_val,0.98967
avg_batch_loss,0.00351
avg_loss_test,0.03361
avg_loss_train,0.00344
avg_loss_val,0.03397
f1_macro_test,0.98799
f1_macro_val,0.98966
f1_micro_test,0.988
f1_micro_val,0.98967
