In [None]:
# !ln -sf /opt/bin/nvidia-smi /usr/bin/nvidia-smi

In [None]:
%reload_ext tensorboard

In [1]:
import torch
import torch.nn as nn
from torchaudio import load, transforms
from datasets import load_dataset, load_metric, load_from_disk
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, logging as log_models
from datasets import Dataset, DatasetDict, set_caching_enabled, logging as log_data
from torch.utils.tensorboard import SummaryWriter

import json
import tempfile
import os
import gc
import re
import pickle
import pandas as pd
import numpy as np
from copy import deepcopy
import matplotlib.pyplot as plt
from  IPython.display import clear_output
from datetime import datetime
from tqdm.notebook import tqdm

from sklearn.linear_model import SGDClassifier, LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score, classification_report

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

torch.manual_seed(42)
torch.cuda.manual_seed(42)
np.random.seed(42)
# torch.backends.cudnn.enabled = False
torch.backends.cudnn.deterministic = True

import nltk
nltk.download('averaged_perceptron_tagger')

# from google.colab import drive
# drive.mount('/content/drive')

from profiler import ProbingProfiler
from constants import *

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


In [2]:
TODAY = str(datetime.now().strftime('%Y-%m-%d')) 
!mkdir cache
!mkdir checkpoints
!mkdir jsons
!mkdir tensorboards

!mkdir tensorboards/profiling
!mkdir checkpoints/$TODAY
!mkdir jsons/$TODAY

CACHE_DIR = './cache'

mkdir: cannot create directory ‘cache’: File exists
mkdir: cannot create directory ‘checkpoints’: File exists
mkdir: cannot create directory ‘jsons’: File exists
mkdir: cannot create directory ‘tensorboards’: File exists
mkdir: cannot create directory ‘tensorboards/profiling’: File exists
mkdir: cannot create directory ‘checkpoints/2022-02-01’: File exists
mkdir: cannot create directory ‘jsons/2022-02-01’: File exists


In [3]:
POOLING_TO = 4
def kl_divergence(z, mu_theta, p_theta):
    log_prior = torch.distributions.Normal(0, 1).log_prob(z) 
    log_p_q = torch.distributions.Normal(mu_theta, torch.log(1 + torch.exp(p_theta))).log_prob(z) 
    return (log_p_q - log_prior).mean()
 
class KL:
    accumulated_kl_div = 0

class Loss:
    def __init__(self, variational: bool = True):
        self.variational = variational
    def __call__(self, y_true, y_pred, model = None):
        if self.variational:
            reconstruction_error = torch.nn.CrossEntropyLoss()(y_pred, y_true)
            kl = model.accumulated_kl_div
            model.reset_kl_div()
            return reconstruction_error + kl
        else: return torch.nn.CrossEntropyLoss()(y_pred, y_true)

class LinearVariational(torch.nn.Module):
    def __init__(self, in_features: int, out_features: int, parent, bias: bool=True, device = device):
        super().__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.device = device
        self.include_bias = bias        
        self.parent = parent
        
        if getattr(parent, 'accumulated_kl_div', None) is None:
            if getattr(parent.parent, 'accumulated_kl_div', None) is None: parent.accumulated_kl_div = 0
            else: parent.accumulated_kl_div = parent.parent.accumulated_kl_div
            
        self.w_mu = nn.Parameter(torch.FloatTensor(in_features, out_features).normal_(mean = 0, std = 0.001).to(self.device))
        self.w_p = nn.Parameter(torch.FloatTensor(in_features, out_features).normal_(mean = 0, std = 0.001).to(self.device))

        if self.include_bias:
            self.b_mu = nn.Parameter(torch.zeros(out_features))
            self.b_p = nn.Parameter(torch.zeros(out_features))

    def _reparameterize(self, mu, p):
        sigma = torch.log(1 + torch.exp(p))
        eps = torch.randn_like(sigma)
        return mu + (eps * sigma)

    def forward(self, x):
        w = self._reparameterize(self.w_mu, self.w_p)
        
        if self.include_bias: b = self._reparameterize(self.b_mu, self.b_p)
        else: b = 0
            
        z = torch.matmul(x,w) + b
        
        self.parent.accumulated_kl_div += kl_divergence(w, self.w_mu, self.w_p).item()
        if self.include_bias: self.parent.accumulated_kl_div += kl_divergence(b, self.b_mu, self.b_p).item()
        return z

class LinearModel(torch.nn.Module):
    def __init__(self, in_size: int, hidden_size: int, out_size: int, variational: bool = True, device = device):
        super().__init__()
        self.kl_loss = KL
        self.variational = variational
        if self.variational: self.layers = torch.nn.Sequential(LinearVariational(in_size, out_size, self.kl_loss, device))
        else: self.layers = torch.nn.Sequential(torch.nn.Linear(in_size, out_size))

    @property
    def accumulated_kl_div(self):
        # assert self.variational
        return self.kl_loss.accumulated_kl_div
    
    def reset_kl_div(self):
        # assert self.variational
        self.kl_loss.accumulated_kl_div = 0
            
    def forward(self, x):
        return self.layers(x)

class ProberModel(torch.nn.Module):
    def __init__(self, parent_model, clf, enable_grads: bool):
        super().__init__()

        self.parent_model = parent_model
        self.pooling_layer = torch.nn.AdaptiveAvgPool1d(output_size = POOLING_TO)
        self.clf = clf
        self.enable_grads = enable_grads
    def forward(self, inp, att):
        parent_out = lambda e_inp, e_att: self.parent_model(e_inp, attention_mask = e_att,
                                                            output_hidden_states = False, output_attentions = False)
        if self.enable_grads: out = parent_out(inp, att)
        else: 
            with torch.no_grad(): out = parent_out(inp, att)
        out = self.pooling_layer(out.last_hidden_state.transpose(1, 2)).transpose(1, 2)
        out = self.clf(out.reshape(out.size(0), -1))
        return out

In [4]:
BATCH_SIZE = 50
N_EPOCHS = 10
MAX_LEN = 13000 #for linguistic dataset
DEBUG = False 
PROFILING = False 

GRAPHS_PATH = f"/content/drive/MyDrive/Tasks/probing_datasets/graphs/"
TENSORS_PATH = f"/content/drive/MyDrive/Tasks/probing_datasets/data/"
DATASETS_PATH = {"common_voice": f"/content/drive/MyDrive/Tasks/probing_datasets/tensors/common_voice_", 
                 "timit_asr": f"/content/drive/MyDrive/Tasks/probing_datasets/tensors/timit_asr_"}


MODELS_PATH ={"common_voice": {"ru": "anton-l/wav2vec2-large-xlsr-53-russian",
                               "fr": "facebook/wav2vec2-large-xlsr-53-french",
                               "de": "facebook/wav2vec2-large-xlsr-53-german",
                               "es": "facebook/wav2vec2-large-xlsr-53-spanish"},
              "timit_asr": {"None": "elgeish/wav2vec2-large-lv60-timit-asr"}}
TIMIT_METADATA_PATH = f"./timit_features_proc.csv"

LOGGING_DIR = "./tensorboards/"
GRAPHS_PATH = "./jsons/"
CHECKPOINTING_DIR = "./"
PROFILING_DIR = "./tensorboards/profiling/"

print_if_debug = lambda x, flag: print(x) if flag else None

class NumpyEncoder(json.JSONEncoder):
    """ Special json encoder for numpy types """
    def default(self, obj):
        if isinstance(obj, np.integer): return int(obj)
        elif isinstance(obj, np.floating): return float(obj)
        elif isinstance(obj, np.ndarray): return obj.tolist()
        return json.JSONEncoder.default(self, obj)

class CheckPointer:
    def __init__(self, parent_dir: str): 
        """
        Class to save necessary model checkpoints and load them
        -> saving: full model, layer index, optimizer state dict 
        """
        assert isinstance(parent_dir, str) and parent_dir.endswith("/")
        self.parent_dir = os.path.join(parent_dir, "checkpoints", str(TODAY))

    def __call__(self, probing_model: torch.nn.Module, task_title: str,
                       params: dict, layer_idx: int, optimizer: torch.optim):
        """Classical torch checkpointing callable method
        Args:
            probing_model, torch.nn.Module: probing model to save
            params, dict: model configuration for inference

            layer_idx, int: index of hidden layer of the model
            optimizer, torch.optim: optional optimizer to save
        Returns:
            str, name of saved checkpoint
        checkpoint name format: CHKPNT_lyridx=int_DATETIME.pth
        """
        torch.save({
            'layer_index': layer_idx,
            'model_params': params,
            'model_state_dict': probing_model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict()}, 
            os.path.join(self.parent_dir, "probing_checkpoint_{}_lyridx={}.pth".format(task_title, layer_idx)))
        return self.parent_dir + "checkpoints/{}/probing_checkpoint_{}_lyridx={}.pth".format(datetime.now().strftime('%Y-%m-%d'),
                                                                                             task_title,
                                                                                             layer_idx)

class Prober:
    def __init__(self, model_path: str, writer: torch.utils.tensorboard.SummaryWriter, data: Dataset = None, device: torch.device = device, init_strategy: str = None) -> None:
        """ Probing tasks class.
        Args:
            model_path, str: path to model in Hugging Face repo
            data, Dataset: optional, Hugging Face Dataset class 
                           default = None
            device: torch.device, accelerator
            pretrained: str, flag (use randomly initialized model or downloaded from repo)
                             supported strategies: 
                                -- full 
                                -- (only) encoder
                                -- (only) feature_extractors
                                default = None
        """
        print_if_debug("downloading staff...", DEBUG)

        self.model = Wav2Vec2ForCTC.from_pretrained(model_path, cache_dir = CACHE_DIR).to(device)
        if init_strategy is not None:
            print_if_debug("reseting network parameters...", DEBUG)
            assert isinstance(init_strategy, str)
            if init_strategy == "full": self.model.init_weights()
            elif init_strategy == "encoder":
                for p in self.model.wav2vec2.encoder.parameters(): torch.nn.init.normal_(p)
            elif init_strategy == "feature_extractors": 
                for p in self.model.wav2vec2.feature_extractor.parameters(): torch.nn.init.normal_(p)
                for p in self.model.wav2vec2.feature_projection.parameters(): torch.nn.init.normal_(p)
            else: print("No init with {} strategy".format(init_strategy))

        self.model.freeze_feature_extractor()
        self.fixed_encoder = self.model.wav2vec2.encoder.layers.cpu()

        #debugging tools
        self.writer = writer
        
        self.checkpointer = CheckPointer(CHECKPOINTING_DIR)
        
        self.profiler = ProbingProfiler(PROFILING_DIR)
        self.profiler.on() if PROFILING else self.profiler.off() 
        self.profiler.profile()
    

        self.data = data
        self.device = device
    def get_resources(self, load_data: bool = False, data_path: str = None, batch_size: int = 100, 
                      poisoning_ratio: float = 0, poisoning_mapping: type(lambda x: None) = None, **kwargs):
        """
        Args:
          load_data, bool: optional flag, whether load data from external resources or not, ONLY DISK MODE SUPPORTED;
                           default = False
          data_path, str: optional, active only if load_data = True;
                          default = None
          batch_size, int: optional;
                           default = 100
          poisoning_ratio, float: the ratio of adding misleading labels to the data (0 -- None, 1 -- fully random);
                           default = 0.       
          poisoning_mapping, callable: the mapping of poisoned labels,
                                       default = None
        """
        print_if_debug("collecting data...", DEBUG)
        
        def poison_data(batch, n_classes: int, ratio: float = 0.01, mapping = None):
            """Adding misleading labels"""
            assert ratio > 0. and ratio <= 1.
            if np.random.random() < ratio:
                if mapping is not None: batch['label'] = mapping(batch['label']) 
                else: batch['label'] = np.random.randint(0, n_classes)
            return batch        
        
        if load_data: 
            assert isinstance(data_path, str)
            self.data =  load_from_disk(data_path, **kwargs)
        
        if poisoning_ratio > 0.: 
            self.data = self.data.map(poison_data, fn_kwargs = {"n_classes": np.max(self.data['label']),
                                                                                    'ratio': poisoning_ratio,
                                                                                     'mapping': poisoning_mapping})
            
        self.data.set_format(type = 'torch', columns = ['input_values', 'attention_mask', 'label'])

        splitted_dataset = self.data.train_test_split(test_size = 0.25, seed = 42)

        weights = 1. / np.bincount(splitted_dataset['train']['label'])
        self.class_weight = np.array([weights[l] for l in splitted_dataset['train']['label']])
        self.dataloader = torch.utils.data.DataLoader(splitted_dataset['train'], batch_size = batch_size,
                                                      sampler = torch.utils.data.WeightedRandomSampler(self.class_weight, len(self.class_weight)))
        
        test_weights = 1. / np.bincount(splitted_dataset['test']['label'])
        self.validloader = torch.utils.data.DataLoader(splitted_dataset['test'], batch_size = batch_size,
                                                      sampler = torch.utils.data.WeightedRandomSampler(np.array([test_weights[l] for l in splitted_dataset['test']['label']]),
                                                                                                       len(splitted_dataset['test']['label'])))

    def _clear_cache(self):
         if self.device.type == 'cuda':
            with torch.no_grad(): torch.cuda.empty_cache()
            gc.collect()

    def make_probe(self, prober: torch.nn.Module, enable_grads: bool = False, use_variational: bool = False, layers: list = [1], from_memory = None, save_outputs: bool = False, task_title: str = None) -> np.ndarray:
        """ Main method to do a probing task
            Args:
                prober, callable object: funtion with argumnents __data__ and __labels__ to make probing classification (returns metric value of the task)
                enable_grads, bool: optional flag, whether to propagate grads or not
                                    default = False
                use_variational, bool: optional flag, whether to use variational prober or not
                                      default = False
                layers, list: optional list layers indexes to probe (shoud be 0 < layers < #hiddenLayers)
                              default = [0]
                from_memory, str: optionally load probing data from memory (currently deprecated)
                save_outputs, bool: optional flag, whether to save probing data
                                    default = False
                task_tilte, str: optional way to save probing data, active only if save_outputs = True;
                                default = None
            Returns:
              result, np.ndarray: float array of [#layers, ] output metric results
        """
        assert np.alltrue([l > 0 and l < len(self.fixed_encoder) for l in layers])

        def make_hidden_states(example, model = self.model, device: torch.device = device) -> list:
            """Returns outputs of all model layers 
               Agrs:
                  enable_grads, bool: optional flag, whether to propagate grads or not
                                      default = False
            """
            with torch.no_grad(): 
                output = model(example[0].to(self.device), 
                               attention_mask = example[1].to(self.device),
                               output_hidden_states = True)
            return [hs.cpu().view((len(hs), -1)).numpy() for hs in output.hidden_states]
        
        def _prepare_data(batch):
            """Helper function
            """
            labels = batch['label'].to(self.device)
            inp_values, att_masks =  batch['input_values'][0].to(device), batch['attention_mask'][0].to(self.device)
            #getting ready tp encoder
            with torch.no_grad(): 
                extract_features = self.model.wav2vec2.feature_extractor(inp_values).transpose(1, 2)
                hidden_states, extract_features = self.model.wav2vec2.feature_projection(extract_features)
                att_masks = self.model.wav2vec2._get_feature_vector_attention_mask(extract_features.shape[1], att_masks)
                hidden_states = self.model.wav2vec2._mask_hidden_states(hidden_states, attention_mask = att_masks)

            return hidden_states, att_masks, labels

        if from_memory is not None:
            assert isinstance(from_memory, str)
            raise NotImplementedError("") 
        else:
            print_if_debug("stacking classifiers...", DEBUG)

            loss_fn = Loss(use_variational)

            probing_info = {'loss': [], 'metrics': []}

            inputs, attention_masks, _ = _prepare_data(iter(self.dataloader).next())

            
            model_config = {'in_size': self.model.config.hidden_size * POOLING_TO, 
                            'hidden_size': 100,
                            'out_size': len(set(self.data['label'])),
                            'variational': use_variational}

            for layer in tqdm(layers, total = len(layers)):

                self.model.wav2vec2.encoder.layers = deepcopy(self.fixed_encoder[:layer].to(self.device))     
                for module in self.model.wav2vec2.encoder.layers[:-1]:
                    for param in module.parameters(): param.requires_grad = False

                probing_model = prober(parent_model = self.model.wav2vec2.encoder,
                                       clf = LinearModel(**model_config),
                                       enable_grads = enable_grads
                                       ).to(self.device)
                optim = torch.optim.Adam(probing_model.parameters(), lr = 3. * 1e-3)
                scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optim, T_max = 10)
                probing_model.eval()
                self.writer.add_graph(probing_model, input_to_model = [inputs, attention_masks])
                
                probing_model.train()
                with self.profiler.profile('train') as prof:
                    for epoch in range(N_EPOCHS):
                        train_loss = []
                        for step, batch in tqdm(enumerate(self.dataloader), total = len(self.dataloader)):
                            inputs, attention_masks, labels = _prepare_data(batch)
                            optim.zero_grad()

                            logits = probing_model(inputs, attention_masks)
                            loss = loss_fn(labels, logits, probing_model.clf)
                            train_loss.append(loss.cpu().item()) 

                            loss.backward()
                            optim.step()
                            prof.step()
                            self._clear_cache()
                        self.writer.add_scalar("training loss of layer {}".format(layer), np.mean(train_loss), epoch * len(self.dataloader))
                        scheduler.step()    
                print_if_debug("validating...", DEBUG)

                if save_outputs:
                    chkpnt = self.checkpointer(probing_model = probing_model.cpu(),
                                               task_title = "" if task_title is None else task_title,
                                               params = model_config, layer_idx = layer, optimizer = optim)
                    print_if_debug("checkpoint {} saved...".format(chkpnt), DEBUG)
                
                self._clear_cache()

                probing_model = probing_model.to(self.device)

                metrics_per_layer, losses_per_layer = [], [] 
                probing_model.eval()
                with self.profiler.profile('validation') as prof:
                    for step, batch in tqdm(enumerate(self.validloader), total = len(self.validloader)):
                        inputs, attention_masks, labels = _prepare_data(batch)
                        with torch.no_grad(): 
                            logits = probing_model(inputs, attention_masks)
                        loss = loss_fn(labels, logits, probing_model.clf)
                        f1 = f1_score(torch.argmax(logits.detach().cpu(), dim = -1).numpy(), labels.detach().cpu().numpy(), average = 'weighted')
                        metrics_per_layer.append(f1)
                        losses_per_layer.append(loss.cpu().item())
                        prof.step()
                        self._clear_cache()
                            
                probing_info['loss'].append(np.mean(losses_per_layer))
                probing_info['metrics'].append(np.mean(metrics_per_layer))

                self.writer.add_scalar("test loss", np.mean(losses_per_layer), layer)
                self.writer.add_scalar("test f1", np.mean(metrics_per_layer), layer)    

               
                probing_model = probing_model.cpu()
                del optim
                del probing_model
                print("\n", self.profiler.monitor_resources(), "\n")

        print_if_debug('running probes...', DEBUG)
        return probing_info

class Probing_pipeline:
    def __init__(self, writer: torch.utils.tensorboard.SummaryWriter,
                 feature: str, model_path: str, data: Dataset = None, lang: str = None, split: str = None) -> None:
        """Hugging Face Dataset wrapper for ASR probing
        Args:
            writer, SummaryWriter: tensorboard writer to debug and visualize all probing process
            feature, str: name of dataset's feature column to make probing onto
            model_path, str: path to model in Hugging Face repo
            custom_feature, bool: optional flag, a possibility to add feature not from original set
            data, Dataset: optional, Hugging Face Dataset class;
                           default = None
            (lang, split), str: optional features to this class to determine necessary dataset options
                                default = None

        """
        log_data.set_verbosity(40 if not DEBUG else 20)
        log_models.set_verbosity(40 if not DEBUG else 20)

        self.writer = writer
        self.lang = lang
        self.split = split
        self.feature = feature
        self.model_path = model_path
        self.dataset = data

    def load_data(self, from_disk: bool, data_path: str = None, own_feature_set: dict = None, only_custom_features: bool = True, **kwargs) -> None:
        """Custom dataloader
        Args:
            from_disk: bool, flag if load data from disk checkpoint or from the Internet
                             default = False
            data_path, str: optional, active only if from_disk = True;
                            default = None      
            own_feature_set, dict (format {feat: int}): optional own mapping to probing labels
                                                    default = None
            only_custom_features, bool:optional flag whether to use only custom features or add "other" ground class,
                                      active only with own_feature_set, default = True
        """
        print_if_debug("loading data...", DEBUG)
        if from_disk:
            assert isinstance(data_path, str) 
            self.dataset =  load_from_disk(data_path, **kwargs)
        elif data_path is not None: 
            assert isinstance(data_path, str) 
            self.dataset = load_dataset(data_path, name = self.lang, split = self.split,
                                        **kwargs)         
        else: assert self.dataset is not None
        self.own_feature_set = own_feature_set; self.only_custom_features = only_custom_features

    def _filter_data(self, own_feature_set: dict, only_custom_features: bool) -> None:
        """
        Args: 
          own_feature_set, dict (format {feat: int}): own mapping to probing labels
                                                    default = None
          only_custom_features, bool: flag whether to use only custom features or add "other" ground class,
                                      active only with own_feature_set, default = True

        """
        self.dataset = self.dataset.filter(lambda example: len(example[self.feature].strip()) > 0)
        if own_feature_set is None: self.f_set = {v: k for k, v in enumerate(list(set(self.dataset[self.feature])))}
        else: 
            assert isinstance(own_feature_set, dict)
            self.f_set = own_feature_set            
            if not only_custom_features:
                self.f_set["other"] = np.max(list(self.f_set.values())) + 1
                def foo(batch):
                    if batch[self.feature] not in self.f_set.keys(): batch[self.feature] = "other"
                    return batch                    
                self.dataset = self.dataset.map(foo)
            else: self.dataset = self.dataset.filter(lambda example: example[self.feature] in self.f_set)


    def get_dataset(self): return self.dataset
    def get_feature_set(self): 
        """All labels """
        return self.f_set

    def preprocess_data(self, preprocessinf_fn: type(lambda x: None), save_path: str = None, drop_columns: list = None, target_processing: type(lambda x: None) = None):
        """
        Args:
            preprocessinf_fn, callable object: prerpocessing dataset function to load all audio, should return the same self.dataset
                                               but with 'speech', 'len_speech', 'sampling_rate' columns
            save_path, str: optional path to save preprocessed data
                            default = None
            drop_columns, list: optional list of string-like columns to drop from the dataset
                                default = None
            target_processing, callable object: prerpocessing dataset function to speech transcript. Shouldn't change the dataset structure
                                                default = None
        """
        print_if_debug("downloading necessary staff...", DEBUG)
        processor =  Wav2Vec2Processor.from_pretrained(self.model_path, cache_dir = CACHE_DIR)
        def encode_labels(example, feature_column: str):
            """Label Encoder
            """
            example["label"] = self.f_set[example[feature_column]]
            return example

        def add_new_features(batch, max_len: int):
            """Preprocessing audio features with padding to maximum lenght"""
            inputs = processor(batch["speech"], sampling_rate = batch["sampling_rate"], return_tensors = "pt", 
                               padding = 'max_length', truncation = 'max_length', max_length = max_len)
            batch['input_values'] = inputs.input_values
            batch['attention_mask'] = inputs.attention_mask
            return batch

        print_if_debug('reading files...', DEBUG)
        if preprocessinf_fn is not None:
            self.dataset = self.dataset.map(preprocessinf_fn, fn_kwargs = {'feature_column': self.feature}, disable_nullable = False)
        print_if_debug('encoding features...', DEBUG)
        self._filter_data(self.own_feature_set, self.only_custom_features)
        self.dataset = self.dataset.map(encode_labels, fn_kwargs = {'feature_column': self.feature})

        print_if_debug('processing features...', DEBUG)
        self.dataset = self.dataset.map(add_new_features, fn_kwargs = {'max_len': np.max(self.dataset['len_speech'])})

        if drop_columns is not None:
            print_if_debug('removing user-picked columns...', DEBUG)
            assert isinstance(drop_columns, list) or isinstance(drop_columns, str)
            if isinstance(drop_columns, str): self.dataset = self.dataset.remove_columns([drop_columns])
            elif isinstance(drop_columns, list): self.dataset = self.dataset.remove_columns(drop_columns)
        self.dataset = self.dataset.remove_columns([self.feature, 'speech', 'len_speech', 'sampling_rate'])

        if target_processing is not None:
            print_if_debug('target processing... (is ON)', DEBUG)
            assert isinstance(target_processing, dict)
            assert ['fn', 'kwargs'] == list(target_processing.keys()) 
            assert isinstance(target_processing['fn'], type(lambda x: None)) and\
                   isinstance(target_processing['kwargs'], dict)

            self.dataset = self.dataset.map(target_processing['fn'], fn_kwargs = target_processing['kwargs'])

        if save_path is not None:
            assert isinstance(save_path, str) 
            print_if_debug('saving files...', DEBUG)
            if self.lang is None: self.lang = "en"
            self.dataset.save_to_disk(save_path + self.feature + "_" + self.lang + "_dataset")
        print('done')
        return self
    
    def run_probing(self, probing_fn, layers: list, enable_grads = False, use_variational: bool = False, init_strategy: str = None, plotting_fn: type(lambda x: None) = None, 
                    save_checkpoints: bool = False, plotting_config: dict = None, **kwargs):
        """Main probing runner
        Args:
           probing_fn, init_strategy -- look at Prober docs
           use_variational, bool: optional flag, whether to use variational prober or not
                                  default = False
           enable_grads, bool: optional flag, whether to propagate grads or not
                               default = False
           plotting_fn: callable, optional way to plot results
                       default = None
           save_checkpoints, bool: an optional flag, whether to save checkpoints
                                   defalult = False
           plotting_config, dict ({"title": str, "metrics": list of used in pro bing fn metrics, "save_path": str}), default = None
        """
        probing_task = Prober(self.model_path, self.writer, data = self.dataset, init_strategy = init_strategy)
        probing_task.get_resources(load_data = False, batch_size = BATCH_SIZE, **kwargs)
       

        probing_results = probing_task.make_probe(probing_fn, use_variational = use_variational, enable_grads = enable_grads, layers = layers, 
                                                  save_outputs = save_checkpoints, task_title = plotting_config['title'])
        
        json.dump({"data": probing_results, "config": plotting_config}, 
                  open(os.path.join(plotting_config['save_path'], plotting_config['title'] + ".json"), 'w' ), cls = NumpyEncoder)
        
        if plotting_fn is not None:
            assert isinstance(plotting_fn,  type(lambda x: None))
            assert isinstance(plotting_config, dict)
            plotting_fn(probing_results, plotting_config)
            plt.show()

        return probing_results

    def cleanup(self):
        """Erasing all cache
        """
        if self.dataset.cleanup_cache_files(): return "succeed"
        else: return "nothing has been deleted"
        
    def disable_cache(self): return set_caching_enabled(False)
    def enable_cache(self): return set_caching_enabled(True)

    def __repr__(self):
        return "Used data: {} \n ".format(self.dataset) +\
                "Used feature {} with set of values = {} \n".format(self.feature, self.f_set) +\
                "Used model: {}".format(self.model_path)

In [5]:
_lang = lambda l: "en" if l is None else l

def make_probing(data, labels):
    X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size = 0.33, shuffle = True, random_state=42)
    clf = SGDClassifier(max_iter = 1000, tol = 1e-2, random_state = 42)
    clf.fit(X_train, y_train)
    # acc = accuracy_score(clf.predict(X_test), y_test)
    f1 = f1_score(clf.predict(X_test), y_test, average = 'weighted')
    del clf
    return f1

##common_voice
def prepare_probing_task(batch, feature_column: str):
    frame_offset, num_frames = 16000, 16000
    sp, sr = load(batch["path"], frame_offset = frame_offset, num_frames = num_frames)
    resampler = transforms.Resample(sr, 16000)
    batch['speech'] = resampler(sp).squeeze().numpy()
    batch["sampling_rate"] = 16000
    batch['len_speech'] = len(batch['speech'])
    return batch


##timit
def prepare_probing_task_timit(batch, feature_column: str):
    frame_offset, num_frames = 16000, 16000
    sp, sr = load(batch["file"], frame_offset = frame_offset, num_frames = num_frames)
    resampler = transforms.Resample(sr, 16000)
    batch['speech'] = resampler(sp).squeeze().numpy()
    batch["sampling_rate"] = 16000
    batch['len_speech'] = len(batch['speech'])
    return batch

metadata = pd.read_csv(TIMIT_METADATA_PATH)
def prepare_probing_task_timit_2(batch, feature_column: str):
    """ Adding new features from  dataset's metadata files
    """
    frame_offset, num_frames = 16000, 16000
    sp, sr = load(batch["file"], frame_offset = frame_offset, num_frames = num_frames)
    resampler = transforms.Resample(sr, 16000)
    batch['speech'] = resampler(sp).squeeze().numpy()
    batch["sampling_rate"] = 16000
    batch['len_speech'] = len(batch['speech'])
    val = metadata[metadata["id"] == batch["speaker_id"]][feature_column].values
    batch[feature_column] = str(val[0]) if len(val) else "other" 
    return batch

def remove_special_characters(batch):
    chars_to_ignore_regex = '[\,\?\.\!\-\;\:\"]'
    batch["text"] = re.sub(chars_to_ignore_regex, '', batch["text"]).lower().strip()
    return batch

def plotting_fn(data, config: dict):
    if not DEBUG: clear_output(wait = True)
    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    ax.set_title(config['title'])
    used_metrics = config['metrics']
    if len(used_metrics) == 1: data = np.array(data)[..., np.newaxis]
    assert len(used_metrics) == data.shape[1]
    for ind in range(data.shape[1]): 
        ax.plot(np.arange(0, len(data), 1), data[:, ind], marker = '.', color = 'red', lw = 2, label = used_metrics[ind])
    ax.legend(loc = 'best')
    ax.set_ylabel('metrics'); ax.set_xlabel('#layer')
    ax.grid(True)
    if config['save_path'] is not None:
        assert isinstance(config['save_path'], str)
        pickle.dump(fig, open(config['save_path'] + config['title'] + '.pickle', 'wb'))

In [6]:
class TaskTester:
    def __init__(self, dataset_name: str,
                       features: list,
                       layers: list,
                       dataset_language: list = [None],
                       dataset_split: str = "test",
                       from_disk: bool = False,
                       prefix_data_path: str = None,
                       model_init_strategies: list = [None] + ["full"], 
                       use_variational: bool = False,
                       preprocessing_fn: type(lambda x: None) = prepare_probing_task,
                       probing_fn: torch.nn.Module = ProberModel,
                       enable_grads = False,
                       plotting_fn: type(lambda x: None) = None,
                       save_checkpoints: bool = False, 
                       save_preprocessed_data: str = None,
                       own_feature_set: dict = None,
                       poisoning_ratio: float = 0,
                       poisoning_mapping = None,
                       return_results: bool = False,
                       **kwargs):
        """A wrapper for the whole pipeline
        Agrs:
            dataset_name, str: name of used dataset, if `from_disk`== False, supported only "common_voice" and "timit_asr"
            layers, list: list of layers to probe on (l >= 1 and l <= 24 for l in layers)
            dataset_language, list: a list of supported dataset's languages
                                    deafult (for timit_asr) = [None]
            dataset_split, str: split of the used dataset
                                default = "test"
            from_disk, bool: an optional flag where to take a data for probing (if False, data will be dowlnoaded 
                             from HuggingFace hub)
                             default = False
            prefix_data_path, str: an optinal helpful string to find a dataset on the disk
            model_init_strategies, list: list of probing initializers; supported regims are: 
                                                                                            [None (only pretrained model),
                                                                                             "full" (whole random init.),
                                                                                             "encoder" (only random encoder)]
                                         default = [None, "full"]
            
            use_variational, bool: an optional flag, whether use MDL (True) or on ordinary logistic regression 
                                    default = False
            preprocessing_fn, a callable object: function for extracting audio from dataset
                                                 default = None
            probing_fn, torch.nn.Module class: probing model
                                               defalut = ProberModel
            
            enable_grads, bool: an optional flag, if backprop through any used model or not
                                default = False
            own_feature_set: dict, an optional dict of hand-designed labels for probing,
                                   default = None
            
            return_results: bool: a flag
            save_checkpoints, bool: an optional flag, whether to save checkpoints
                                   defalult = False
            
            other arguments are temporarily deprecated.
                                   
        """
        self.results = []
        layers = list(sorted(layers))
        
        for lang in dataset_language:
            for feature in features:
                for init_strategy in model_init_strategies:
                    title = dataset_name + "_" + _lang(lang) + "_" + feature + "_task_random=" + str(init_strategy) +\
                            "_grads="  +str(enable_grads) + "_variational=" + str(use_variational) + "_poisoned=" + str(poisoning_ratio)
                    writer = SummaryWriter(LOGGING_DIR + title + "/layers={}-{}".format(layers[0], layers[-1]))
                    pipe = Probing_pipeline(writer = writer,
                                            feature = feature, model_path = MODELS_PATH[dataset_name][str(lang)], 
                                            lang = lang, split = dataset_split)
                    pipe.disable_cache()
                    if from_disk: assert isinstance(prefix_data_path, str)
                    pipe.load_data(from_disk = from_disk, data_path = dataset_name if not from_disk else prefix_data_path,
                                   own_feature_set = own_feature_set, only_custom_features = False)
                    print(pipe.preprocess_data(preprocessing_fn, save_path = save_preprocessed_data,
                                               target_processing = None, **kwargs))
                    print("The task title:", title)
                    res = pipe.run_probing(probing_fn, layers = layers, enable_grads = enable_grads, 
                                           use_variational = use_variational, 
                                           plotting_fn = plotting_fn, 
                                           save_checkpoints = save_checkpoints, 
                                           init_strategy = init_strategy, 
                                           plotting_config = {"title": title,
                                                              "custom_features": list(own_feature_set.keys()) if own_feature_set is not None else "None",
                                                              "metrics": ['f1'], 'save_path': os.path.join(GRAPHS_PATH, str(TODAY))},
                                          poisoning_ratio = poisoning_ratio,
                                          poisoning_mapping = poisoning_mapping)
                    pipe.cleanup()
                    if return_results: self.results.append(res)
        if not DEBUG: clear_output(wait = True) 
        !rm /root/.cache/huggingface/* -r
        !rm ./cache/ -r
    def get_testing_results(self):
        """ A format:
        {
        "config": {dict of plotting config including "title"}
        "data": {"loss": [], "metrics": {}}
        }
        """
        return self.results
    def __repr__(self): return "ez4ence"

In [7]:
layers = np.arange(1, 24, 1)
# TaskTester(dataset_name = "timit_asr",
#            features = ['sex', 'age_bin'],
#            layers = layers,
#            preprocessing_fn = prepare_probing_task_timit_2,
#            use_variational = False,
#            enable_grads = False,
#            probing_fn = ProberModel,
#            save_checkpoints = False,
#            poisoning_ratio = 0,
#            drop_columns = ['word_detail', 'phonetic_detail'])

In [None]:
TaskTester(dataset_name = "timit_asr",
           features = ['sex', 'age_bin'],
           layers = layers,
           preprocessing_fn = prepare_probing_task_timit_2,
           use_variational = True,
           enable_grads = False,
           probing_fn = ProberModel,
           save_checkpoints = False,
           poisoning_ratio = 0,
           drop_columns = ['word_detail', 'phonetic_detail'])

In [None]:
layers = [1, 2, 3, 5, 7, 11, 17]
TaskTester(dataset_name = "timit_asr",
           features = ['sex'],
           layers = layers,
           preprocessing_fn = prepare_probing_task_timit_2,
           use_variational = True,
           enable_grads = False,
           probing_fn = ProberModel,
           save_checkpoints = False,
           model_init_strategies = [None],
           poisoning_ratio = .67,
           drop_columns = ['word_detail', 'phonetic_detail'])

TaskTester(dataset_name = "timit_asr",
           features = ['sex'],
           layers = layers,
           preprocessing_fn = prepare_probing_task_timit_2,
           use_variational = True,
           enable_grads = False,
           probing_fn = ProberModel,
           save_checkpoints = False,
           model_init_strategies = [None],
           poisoning_ratio = .0,
           drop_columns = ['word_detail', 'phonetic_detail'])

TaskTester(dataset_name = "timit_asr",
           features = ['pos_tag'],
           layers = layers,
           prefix_data_path='./pos_tag_set/',
           preprocessing_fn = None,
           use_variational = True,
           enable_grads = False,
           save_checkpoints = False,
           model_init_strategies = [None],
           poisoning_ratio = .67,
           probing_fn = ProberModel,
           from_disk=True)

In [None]:
# TaskTester(dataset_name = "timit_asr",
#            features = ['sex', 'age_bin'],
#            layers = [4] + [12] + [18],
#            preprocessing_fn = prepare_probing_task_timit_2,
#            use_variational = False,
#            enable_grads = True,
#            probing_fn = ProberModel,
#            save_checkpoints = True,
#            drop_columns = ['word_detail', 'phonetic_detail'])
# TaskTester(dataset_name = "timit_asr",
#            features = ['sex', 'age_bin'],
#            layers = [4] + [12] + [18],
#            preprocessing_fn = prepare_probing_task_timit_2,
#            use_variational = False,
#            enable_grads = False,
#            probing_fn = ProberModel,
#            save_checkpoints = True,
#            drop_columns = ['word_detail', 'phonetic_detail'])

# TaskTester(dataset_name = "timit_asr",
#            features = ['sex', 'age_bin'],
#            layers = [4] + [12] + [18],
#            preprocessing_fn = prepare_probing_task_timit_2,
#            use_variational = True,
#            enable_grads = True,
#            save_checkpoints = True,
#            probing_fn = ProberModel,
#            drop_columns = ['word_detail', 'phonetic_detail'])
# TaskTester(dataset_name = "timit_asr",
#            features = ['sex', 'age_bin'],
#            layers = [4] + [12] + [18],
#            preprocessing_fn = prepare_probing_task_timit_2,
#            use_variational = True,
#            enable_grads = False,
#            save_checkpoints = True,
#            probing_fn = ProberModel,
#            drop_columns = ['word_detail', 'phonetic_detail'])

# По информации о фонетических|лингвистических признаках


In [None]:
modes = ['word_detail', 'phonetic_detail']
class LinguisticDataset:
    def __init__(self, mode: str, save_path: str = None):
        """Linguistic dataset wrapper for TIMIT_ASR dataset
        Args:
             mode, str: 'word_detail' or 'phonetic_detail'
             save_path, str: optional way to save probing data, active only if save_outputs = True;
                            default = None
        """
        assert mode in ['word_detail', 'phonetic_detail']
        self.mode = mode
        self.save_path = save_path
        self.D = load_dataset('timit_asr', split = 'test')
        self.data = []
    def format_dataset(self):
        """Making JSON for TIMIT_ASR dataset
        """
        def process_fn(batch):
            data = {'long': []}

            for i in range(len(batch[self.mode]['start'])): data['long'].append((batch[self.mode]['start'][i], 
                                                                                  batch[self.mode]['stop'][i]))
            data['path'] = batch['file']
            data['utterance'] = batch[self.mode]['utterance']
            return data
        print_if_debug('making json...',DEBUG)
        for batch in self.D: self.data.append(process_fn(batch)) 
        if self.save_path is not None: assert isinstance(self.save_path, str)
        else: self.save_path = '.'   
        with open(self.save_path + 'timit_'+ self.mode +'.json', 'w') as fout: json.dump(self.data, fout)

    def __call__(self, additional_preprocessing: type(lambda x: None) = None, debug: bool = False, take_n: int = None) -> Dataset:
        """
        Args:
            additional_preprocessing, callable object: function to make new labels from phonemes, eg. POS-tags
                                                      default  = None, 
            debug, bool: flag, default = False
            take_n, int: how many samples from original set to take
                        default   = 10000 (None)
        Returns: new Hugging Face dataset
        """
        self.format_dataset()
        dataset = Dataset.from_pandas(pd.read_json(self.save_path + 'timit_'+ self.mode +'.json'))
        def mapping_fn(batch):
            """Phonemes mapping
            """
            speech_array, sampling_rate = load(batch["path"])
            resampler = transforms.Resample(sampling_rate, 16000)
            batch["speech"] = speech_array.squeeze().numpy()
            batch['atoms'] = []
            for l in batch['long']: 
                batch['atoms'].append(batch['speech'][l[0]:l[1]])
            return batch

        dataset = dataset.map(mapping_fn)
        new_data = []
        for example in dataset:
            for ind in range(len(example['atoms'])):
              if  len(example['atoms'][ind]) > 0 and \
                  len(example['atoms'][ind]) <= MAX_LEN if self.mode == 'word_detail' else int(0.01 * MAX_LEN): 

                  new_data.append([example['atoms'][ind], 16000,
                                   len(example['atoms'][ind]),
                                   example['utterance'][ind]])

              else: print_if_debug(example['utterance'][ind] + " " + "{}".format(len(example['atoms'][ind])), debug)
        new_dataset = pd.DataFrame(new_data, columns = ['speech', 'sampling_rate', 'len_speech', 'utterance'])

        if take_n is not None: 
            assert isinstance(take_n, int)
            print_if_debug("taking a slice of {} elements".format(take_n), debug)
            new_dataset = new_dataset.sample(n = take_n, random_state = 42)

        new_dataset =  Dataset.from_pandas(new_dataset)
        if additional_preprocessing is not None:
            assert isinstance(additional_preprocessing, type(lambda x: None))
            print_if_debug("adding new features...", debug)
            new_dataset = new_dataset.map(additional_preprocessing, batched = False)
        return new_dataset

In [None]:
def pos_tagging_fn(batch):
    batch['pos_tag'] = nltk.pos_tag([batch['utterance']])[0][1]
    return batch

def random_labeling_fn(batch, label: str = 'pos_tag', n_classes: int = 10):
    batch[label + "_random"] = str(np.random.randint(0, n_classes))
    return batch

def label_batch(batch):
    batch = pos_tagging_fn(batch)
    batch = random_labeling_fn(batch)
    return batch


In [None]:
# lingusitic_dataset = DatasetDict()
# for mode in modes:
#     lingusitic_dataset[mode] = LinguisticDataset(mode, 
#                                        save_path = ".")(
#                                            label_batch if mode == 'word_detail' else None,
#                                            debug = DEBUG,
#                                            take_n = None if mode == 'word_detail' else 15000 )

# lingusitic_dataset['word_detail'].save_to_disk("./pos_tag_set/")
# lingusitic_dataset['phonetic_detail'].save_to_disk("./phonetic_set/")
# del lingusitic_dataset


## POS tag 

In [None]:
# TaskTester(dataset_name = "timit_asr",
#            features = ['pos_tag'],
#            layers = [4] + [12] + [18],
#            prefix_data_path='./pos_tag_set/',
#            preprocessing_fn = None,
#            use_variational = False,
#            enable_grads = True,
#            save_checkpoints = True,
#            probing_fn = ProberModel,
#            from_disk=True)

# TaskTester(dataset_name = "timit_asr",
#            features = ['pos_tag'],
#            layers = [4] + [12] + [18],
#            prefix_data_path='./pos_tag_set/',
#            preprocessing_fn = None,
#            use_variational = True,
#            enable_grads = True,
#            save_checkpoints = True,
#            probing_fn = ProberModel,
#            from_disk=True)

# TaskTester(dataset_name = "timit_asr",
#            features = ['pos_tag'],
#            layers = [4] + [12] + [18],
#            prefix_data_path='./pos_tag_set/',
#            preprocessing_fn = None,
#            use_variational = False,
#            enable_grads = False,
#            save_checkpoints = True,
#            probing_fn = ProberModel,
#            from_disk=True)

# TaskTester(dataset_name = "timit_asr",
#            features = ['pos_tag'],
#            layers = [4] + [12] + [18],
#            prefix_data_path='./pos_tag_set/',
#            preprocessing_fn = None,
#            use_variational = True,
#            enable_grads = False,
#            save_checkpoints = True,
#            probing_fn = ProberModel,
#            from_disk=True)

## Простые фонемы

In [8]:
stops =  ["b", "d", "g", "p", "t", "k", "dx", "q"]  
closed_stops = ["bcl", "dcl", "gcl", "pcl", "tck", "kcl"]   
####################################################################################
vowels = ["iy", "ih", "eh", "ey", "ae", "aa", "aw", "ay", "ah", "ao", "oy", "ow",
          "uh", "uw", "ux", "er", "ax", "ix", "axr", "ax-h"]
####################################################################################
others = ["pau", "h#"]

####################################################################################
####################################################################################
f_set = lambda col: {v: k for k, v in enumerate(list(set(col)))}

In [9]:
# TaskTester(dataset_name = "timit_asr",
#            features = ['utterance'],
#            layers = [4] + [12] + [18],
#            prefix_data_path='./phonetic_set/',
#            preprocessing_fn = None,
#            use_variational = False,
#            enable_grads = True,
#            save_checkpoints = True,
#            probing_fn = ProberModel,
#            own_feature_set = f_set(vowels), #f_set(vowels) for only vowels classification 
#            from_disk=True)

# TaskTester(dataset_name = "timit_asr",
#            features = ['utterance'],
#            layers = [4] + [12] + [18],
#            prefix_data_path='./phonetic_set/',
#            preprocessing_fn = None,
#            use_variational = True,
#            enable_grads = True,
#            save_checkpoints = True,
#            probing_fn = ProberModel,
#            own_feature_set = f_set(vowels), 
#            from_disk=True)

# TaskTester(dataset_name = "timit_asr",
#            features = ['utterance'],
#            layers = [4] + [12] + [18],
#            prefix_data_path='./phonetic_set/',
#            preprocessing_fn = None,
#            use_variational = False,
#            enable_grads = False,
#            save_checkpoints = True,
#            probing_fn = ProberModel,
#            own_feature_set = f_set(vowels), #f_set(vowels) for only vowels classification 
#            from_disk=True)

# TaskTester(dataset_name = "timit_asr",
#            features = ['utterance'],
#            layers = [4] + [12] + [18],
#            prefix_data_path='./phonetic_set/',
#            preprocessing_fn = None,
#            use_variational = True,
#            enable_grads = False,
#            save_checkpoints = True,
#            probing_fn = ProberModel,
#            own_feature_set = f_set(vowels), 
#            from_disk=True)

In [None]:
# TaskTester(dataset_name = "timit_asr",
#            features = ['utterance'],
#            layers = np.arange(1, 24, 1),
#            prefix_data_path='./phonetic_set/',
#            preprocessing_fn = None,
#            use_variational = True,
#            enable_grads = False,
#            save_checkpoints = False,
#            poisoning_ratio = 0.0,
#            probing_fn = ProberModel,
#            own_feature_set = f_set(vowels), 
#            from_disk=True)


TaskTester(dataset_name = "timit_asr",
           features = ['utterance'],
           layers = np.arange(1, 24, 1),
           prefix_data_path='./phonetic_set/',
           preprocessing_fn = None,
           use_variational = False,
           enable_grads = False,
           save_checkpoints = False,
           poisoning_ratio = 0.0,
           probing_fn = ProberModel,
           own_feature_set = f_set(vowels), 
           from_disk=True)

  0%|          | 0/15 [00:00<?, ?ba/s]

  0%|          | 0/15000 [00:00<?, ?ex/s]

  0%|          | 0/15000 [00:00<?, ?ex/s]

  0%|          | 0/15000 [00:00<?, ?ex/s]

done
Used data: Dataset({
    features: ['__index_level_0__', 'label', 'input_values', 'attention_mask'],
    num_rows: 15000
}) 
 Used feature utterance with set of values = {'ey': 0, 'uw': 1, 'iy': 2, 'aw': 3, 'ax': 4, 'ae': 5, 'aa': 6, 'ux': 7, 'oy': 8, 'ay': 9, 'eh': 10, 'ow': 11, 'ix': 12, 'ah': 13, 'uh': 14, 'ao': 15, 'axr': 16, 'er': 17, 'ih': 18, 'ax-h': 19, 'other': 20} 
Used model: elgeish/wav2vec2-large-lv60-timit-asr
The task title: timit_asr_en_utterance_task_random=None_grads=False_variational=False_poisoned=0.0


To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  /pytorch/aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)


  0%|          | 0/23 [00:00<?, ?it/s]

  if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
  if attention_mask.size() != (bsz, 1, tgt_len, src_len):
  if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):


  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


virual memory usage: svmem(total=270353252352, available=191897927680, percent=29.0, used=75780665344, free=80565415936, active=58871140352, inactive=122703962112, buffers=82454175744, cached=31552995328, shared=187916288, slab=5983727616) 

RAM Free: 80565415936MB | Used: 75780665344MB | Util  29% | Total 270353252352MB 
GPU RAM Free: 9053MB 	|	 Used: 2125MB 	|	 Util  19% 	|	 Total 11178MB
GPU RAM Free: 8MB 	|	 Used: 11170MB 	|	 Util 100% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 8823MB 	|	 Used: 2355MB 	|	 Util  21% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 

  if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
  if attention_mask.size() != (bsz, 1, tgt_len, src_len):
  if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):


  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


virual memory usage: svmem(total=270353252352, available=194835013632, percent=27.9, used=72853876736, free=81555386368, active=58922332160, inactive=121769623552, buffers=82492162048, cached=33451827200, shared=177598464, slab=5975715840) 

RAM Free: 81555386368MB | Used: 72853876736MB | Util  28% | Total 270353252352MB 
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 8MB 	|	 Used: 11170MB 	|	 Util 100% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 8823MB 	|	 Used: 2355MB 	|	 Util  21% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11

  if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
  if attention_mask.size() != (bsz, 1, tgt_len, src_len):
  if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):


  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


virual memory usage: svmem(total=270353252352, available=194754707456, percent=28.0, used=72934035456, free=76759547904, active=58939707392, inactive=126529249280, buffers=82521772032, cached=38137896960, shared=177762304, slab=5976711168) 

RAM Free: 76759547904MB | Used: 72934035456MB | Util  28% | Total 270353252352MB 
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 8MB 	|	 Used: 11170MB 	|	 Util 100% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 8823MB 	|	 Used: 2355MB 	|	 Util  21% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11

  if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
  if attention_mask.size() != (bsz, 1, tgt_len, src_len):
  if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):


  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)


virual memory usage: svmem(total=270353252352, available=194806611968, percent=27.9, used=72881954816, free=75498414080, active=60453478400, inactive=126321106944, buffers=82573770752, cached=39399112704, shared=177938432, slab=5975789568) 

RAM Free: 75498414080MB | Used: 72881954816MB | Util  28% | Total 270353252352MB 
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 8MB 	|	 Used: 11170MB 	|	 Util 100% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 8477MB 	|	 Used: 2701MB 	|	 Util  24% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11

  if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
  if attention_mask.size() != (bsz, 1, tgt_len, src_len):
  if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):


  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


virual memory usage: svmem(total=270353252352, available=194501722112, percent=28.1, used=73186668544, free=75157274624, active=61877993472, inactive=125218455552, buffers=82601496576, cached=39407812608, shared=178118656, slab=5975482368) 

RAM Free: 75157274624MB | Used: 73186668544MB | Util  28% | Total 270353252352MB 
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 8MB 	|	 Used: 11170MB 	|	 Util 100% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 8477MB 	|	 Used: 2701MB 	|	 Util  24% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11

  if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
  if attention_mask.size() != (bsz, 1, tgt_len, src_len):
  if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):


  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)


virual memory usage: svmem(total=270353252352, available=194509299712, percent=28.1, used=73178894336, free=75149144064, active=61883904000, inactive=125234085888, buffers=82621878272, cached=39403335680, shared=178307072, slab=5975920640) 

RAM Free: 75149144064MB | Used: 73178894336MB | Util  28% | Total 270353252352MB 
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 8MB 	|	 Used: 11170MB 	|	 Util 100% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 8477MB 	|	 Used: 2701MB 	|	 Util  24% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11

  if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
  if attention_mask.size() != (bsz, 1, tgt_len, src_len):
  if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):


  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


virual memory usage: svmem(total=270353252352, available=194810986496, percent=27.9, used=72877015040, free=72978649088, active=61940985856, inactive=127337771008, buffers=82681896960, cached=41815691264, shared=178507776, slab=5975531520) 

RAM Free: 72978649088MB | Used: 72877015040MB | Util  28% | Total 270353252352MB 
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 8MB 	|	 Used: 11170MB 	|	 Util 100% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 8131MB 	|	 Used: 3047MB 	|	 Util  27% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11

  if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
  if attention_mask.size() != (bsz, 1, tgt_len, src_len):
  if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):


  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


virual memory usage: svmem(total=270353252352, available=194813194240, percent=27.9, used=72874606592, free=72870584320, active=61940928512, inactive=127456669696, buffers=82796048384, cached=41812013056, shared=178708480, slab=5975138304) 

RAM Free: 72870584320MB | Used: 72874606592MB | Util  28% | Total 270353252352MB 
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 8MB 	|	 Used: 11170MB 	|	 Util 100% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 8131MB 	|	 Used: 3047MB 	|	 Util  27% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11

  if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
  if attention_mask.size() != (bsz, 1, tgt_len, src_len):
  if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):


  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


virual memory usage: svmem(total=270353252352, available=194690867200, percent=28.0, used=72996909056, free=72676028416, active=61954527232, inactive=127638413312, buffers=82851823616, cached=41828491264, shared=178733056, slab=5975642112) 

RAM Free: 72676028416MB | Used: 72996909056MB | Util  28% | Total 270353252352MB 
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 8MB 	|	 Used: 11170MB 	|	 Util 100% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 8131MB 	|	 Used: 3047MB 	|	 Util  27% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11

  if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
  if attention_mask.size() != (bsz, 1, tgt_len, src_len):
  if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):


  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)


virual memory usage: svmem(total=270353252352, available=194665201664, percent=28.0, used=73022582784, free=72601550848, active=61954609152, inactive=127717163008, buffers=82897645568, cached=41831473152, shared=178724864, slab=5975187456) 

RAM Free: 72601550848MB | Used: 73022582784MB | Util  28% | Total 270353252352MB 
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 8MB 	|	 Used: 11170MB 	|	 Util 100% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 8131MB 	|	 Used: 3047MB 	|	 Util  27% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11

  if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
  if attention_mask.size() != (bsz, 1, tgt_len, src_len):
  if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):


  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


virual memory usage: svmem(total=270353252352, available=194742902784, percent=28.0, used=72944873472, free=71617265664, active=62021263360, inactive=128523534336, buffers=83445809152, cached=42345304064, shared=178733056, slab=6080827392) 

RAM Free: 71617265664MB | Used: 72944873472MB | Util  28% | Total 270353252352MB 
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 8MB 	|	 Used: 11170MB 	|	 Util 100% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 8131MB 	|	 Used: 3047MB 	|	 Util  27% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11

  if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
  if attention_mask.size() != (bsz, 1, tgt_len, src_len):
  if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):


  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


virual memory usage: svmem(total=270353252352, available=194622054400, percent=28.0, used=73065721856, free=71467036672, active=62026280960, inactive=128670072832, buffers=83484446720, cached=42336047104, shared=178733056, slab=6081036288) 

RAM Free: 71467036672MB | Used: 73065721856MB | Util  28% | Total 270353252352MB 
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 8MB 	|	 Used: 11170MB 	|	 Util 100% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 7785MB 	|	 Used: 3393MB 	|	 Util  30% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11

  if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
  if attention_mask.size() != (bsz, 1, tgt_len, src_len):
  if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):


  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


virual memory usage: svmem(total=270353252352, available=194284011520, percent=28.1, used=73403756544, free=71035584512, active=62055280640, inactive=129073057792, buffers=83572256768, cached=42341654528, shared=178741248, slab=6081998848) 

RAM Free: 71035584512MB | Used: 73403756544MB | Util  28% | Total 270353252352MB 
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 8MB 	|	 Used: 11170MB 	|	 Util 100% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 7785MB 	|	 Used: 3393MB 	|	 Util  30% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11

  if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
  if attention_mask.size() != (bsz, 1, tgt_len, src_len):
  if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):


  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


virual memory usage: svmem(total=270353252352, available=194274611200, percent=28.1, used=73413156864, free=84538781696, active=56693248000, inactive=120935571456, buffers=83625156608, cached=28776157184, shared=178741248, slab=6078038016) 

RAM Free: 84538781696MB | Used: 73413156864MB | Util  28% | Total 270353252352MB 
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 8MB 	|	 Used: 11170MB 	|	 Util 100% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 7785MB 	|	 Used: 3393MB 	|	 Util  30% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11

  if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
  if attention_mask.size() != (bsz, 1, tgt_len, src_len):
  if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):


  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


virual memory usage: svmem(total=270353252352, available=194175549440, percent=28.2, used=73512218624, free=84580274176, active=56792186880, inactive=120808738816, buffers=83698454528, cached=28562305024, shared=178741248, slab=6057263104) 

RAM Free: 84580274176MB | Used: 73512218624MB | Util  28% | Total 270353252352MB 
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 8MB 	|	 Used: 11170MB 	|	 Util 100% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 7439MB 	|	 Used: 3739MB 	|	 Util  33% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11

  if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
  if attention_mask.size() != (bsz, 1, tgt_len, src_len):
  if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):


  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


virual memory usage: svmem(total=270353252352, available=194270363648, percent=28.1, used=73417404416, free=88612417536, active=56328237056, inactive=117290147840, buffers=83911077888, cached=24412352512, shared=178733056, slab=6016245760) 

RAM Free: 88612417536MB | Used: 73417404416MB | Util  28% | Total 270353252352MB 
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 8MB 	|	 Used: 11170MB 	|	 Util 100% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 7437MB 	|	 Used: 3741MB 	|	 Util  33% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11

  if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
  if attention_mask.size() != (bsz, 1, tgt_len, src_len):
  if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):


  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)


virual memory usage: svmem(total=270353252352, available=194402328576, percent=28.1, used=73285447680, free=88645513216, active=56353202176, inactive=117208772608, buffers=84008603648, cached=24413687808, shared=178733056, slab=6024843264) 

RAM Free: 88645513216MB | Used: 73285447680MB | Util  28% | Total 270353252352MB 
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 8MB 	|	 Used: 11170MB 	|	 Util 100% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 7437MB 	|	 Used: 3741MB 	|	 Util  33% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11

  if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
  if attention_mask.size() != (bsz, 1, tgt_len, src_len):
  if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):


  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)


virual memory usage: svmem(total=270353252352, available=194302193664, percent=28.1, used=73385582592, free=88467976192, active=56356925440, inactive=117390151680, buffers=84078202880, cached=24421490688, shared=178733056, slab=6027952128) 

RAM Free: 88467976192MB | Used: 73385582592MB | Util  28% | Total 270353252352MB 
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 8MB 	|	 Used: 11170MB 	|	 Util 100% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 7091MB 	|	 Used: 4087MB 	|	 Util  37% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11

  if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
  if attention_mask.size() != (bsz, 1, tgt_len, src_len):
  if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):


  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


virual memory usage: svmem(total=270353252352, available=194109661184, percent=28.2, used=73578102784, free=88082485248, active=56378609664, inactive=117745930240, buffers=84173582336, cached=24519081984, shared=178745344, slab=6032338944) 

RAM Free: 88082485248MB | Used: 73578102784MB | Util  28% | Total 270353252352MB 
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 8MB 	|	 Used: 11170MB 	|	 Util 100% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 7091MB 	|	 Used: 4087MB 	|	 Util  37% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11

  if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
  if attention_mask.size() != (bsz, 1, tgt_len, src_len):
  if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):


  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


virual memory usage: svmem(total=270353252352, available=194379231232, percent=28.1, used=73308540928, free=87981170688, active=56483725312, inactive=117699354624, buffers=84501233664, cached=24562307072, shared=178737152, slab=6067175424) 

RAM Free: 87981170688MB | Used: 73308540928MB | Util  28% | Total 270353252352MB 
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 8MB 	|	 Used: 11170MB 	|	 Util 100% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 7091MB 	|	 Used: 4087MB 	|	 Util  37% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11

  if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
  if attention_mask.size() != (bsz, 1, tgt_len, src_len):
  if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):


  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


virual memory usage: svmem(total=270353252352, available=194250887168, percent=28.1, used=73436884992, free=85328855040, active=56510119936, inactive=120334614528, buffers=84621692928, cached=26965819392, shared=178737152, slab=6071422976) 

RAM Free: 85328855040MB | Used: 73436884992MB | Util  28% | Total 270353252352MB 
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 8MB 	|	 Used: 11170MB 	|	 Util 100% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 7091MB 	|	 Used: 4087MB 	|	 Util  37% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11

  if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
  if attention_mask.size() != (bsz, 1, tgt_len, src_len):
  if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):


  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


virual memory usage: svmem(total=270353252352, available=194935472128, percent=27.9, used=72752287744, free=85877440512, active=56518418432, inactive=119770628096, buffers=84695027712, cached=27028496384, shared=178749440, slab=6074855424) 

RAM Free: 85877440512MB | Used: 72752287744MB | Util  28% | Total 270353252352MB 
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 8MB 	|	 Used: 11170MB 	|	 Util 100% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 6745MB 	|	 Used: 4433MB 	|	 Util  40% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11

  if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
  if attention_mask.size() != (bsz, 1, tgt_len, src_len):
  if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):


  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



  0%|          | 0/15000 [00:00<?, ?ex/s]

done
Used data: Dataset({
    features: ['__index_level_0__', 'label', 'input_values', 'attention_mask'],
    num_rows: 15000
}) 
 Used feature utterance with set of values = {'ey': 0, 'uw': 1, 'iy': 2, 'aw': 3, 'ax': 4, 'ae': 5, 'aa': 6, 'ux': 7, 'oy': 8, 'ay': 9, 'eh': 10, 'ow': 11, 'ix': 12, 'ah': 13, 'uh': 14, 'ao': 15, 'axr': 16, 'er': 17, 'ih': 18, 'ax-h': 19, 'other': 21} 
Used model: elgeish/wav2vec2-large-lv60-timit-asr
The task title: timit_asr_en_utterance_task_random=full_grads=False_variational=False_poisoned=0.0




  0%|          | 0/23 [00:00<?, ?it/s]

  if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
  if attention_mask.size() != (bsz, 1, tgt_len, src_len):
  if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):


  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


virual memory usage: svmem(total=270353252352, available=193004089344, percent=28.6, used=74683719680, free=83786043392, active=56468299776, inactive=121867108352, buffers=85085466624, cached=26798022656, shared=178753536, slab=6121148416) 

RAM Free: 83786043392MB | Used: 74683719680MB | Util  29% | Total 270353252352MB 
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 8MB 	|	 Used: 11170MB 	|	 Util 100% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 8477MB 	|	 Used: 2701MB 	|	 Util  24% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11

  if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
  if attention_mask.size() != (bsz, 1, tgt_len, src_len):
  if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):


  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


virual memory usage: svmem(total=270353252352, available=192944234496, percent=28.6, used=74743582720, free=83694694400, active=56528564224, inactive=121900396544, buffers=85120831488, cached=26794143744, shared=178745344, slab=6122459136) 

RAM Free: 83694694400MB | Used: 74743582720MB | Util  29% | Total 270353252352MB 
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 8MB 	|	 Used: 11170MB 	|	 Util 100% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 8477MB 	|	 Used: 2701MB 	|	 Util  24% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11

  if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
  if attention_mask.size() != (bsz, 1, tgt_len, src_len):
  if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):


  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


virual memory usage: svmem(total=270353252352, available=192872136704, percent=28.7, used=74815680512, free=81161609216, active=56531206144, inactive=124421619712, buffers=85167939584, cached=29208023040, shared=178745344, slab=6125293568) 

RAM Free: 81161609216MB | Used: 74815680512MB | Util  29% | Total 270353252352MB 
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 8MB 	|	 Used: 11170MB 	|	 Util 100% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 8477MB 	|	 Used: 2701MB 	|	 Util  24% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11

  if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
  if attention_mask.size() != (bsz, 1, tgt_len, src_len):
  if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):


  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


virual memory usage: svmem(total=270353252352, available=193158864896, percent=28.6, used=74528944128, free=80077565952, active=56622551040, inactive=125356982272, buffers=86496833536, cached=29249908736, shared=178753536, slab=6161367040) 

RAM Free: 80077565952MB | Used: 74528944128MB | Util  29% | Total 270353252352MB 
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 8MB 	|	 Used: 11170MB 	|	 Util 100% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 8131MB 	|	 Used: 3047MB 	|	 Util  27% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11178MB
GPU RAM Free: 11176MB 	|	 Used: 2MB 	|	 Util   0% 	|	 Total 11

  if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
  if attention_mask.size() != (bsz, 1, tgt_len, src_len):
  if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):


  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)


virual memory usage: svmem(total=270353252352, available=165575843840, percent=38.8, used=101934850048, free=46417313792, active=58660204544, inactive=155719172096, buffers=87999549440, cached=34001539072, shared=355917824, slab=6345191424) 

RAM Free: 46417313792MB | Used: 101934850048MB | Util  39% | Total 270353252352MB 
GPU RAM Free: 2153MB 	|	 Used: 9025MB 	|	 Util  81% 	|	 Total 11178MB
GPU RAM Free: 8MB 	|	 Used: 11170MB 	|	 Util 100% 	|	 Total 11178MB
GPU RAM Free: 1559MB 	|	 Used: 9619MB 	|	 Util  86% 	|	 Total 11178MB
GPU RAM Free: 8131MB 	|	 Used: 3047MB 	|	 Util  27% 	|	 Total 11178MB
GPU RAM Free: 2797MB 	|	 Used: 8381MB 	|	 Util  75% 	|	 Total 11178MB
GPU RAM Free: 1627MB 	|	 Used: 9551MB 	|	 Util  85% 	|	 Total 11178MB
GPU RAM Free: 739MB 	|	 Used: 10439MB 	|	 Util  93% 	|	 Total 11178MB
GPU RAM Free: 751MB 	|	 Used: 10427MB 	|	 Util  93% 	|	 Total 11178MB
GPU RAM Free: 457MB 	|	 Used: 10721MB 	|	 Util  96% 	|	 Total 11178MB
GPU RAM Free: 2517MB 	|	 Used: 8661MB 	|	 Util

  if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
  if attention_mask.size() != (bsz, 1, tgt_len, src_len):
  if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):


  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)


virual memory usage: svmem(total=270353252352, available=165372473344, percent=38.8, used=102138245120, free=46227554304, active=58661298176, inactive=155896135680, buffers=87982678016, cached=34004774912, shared=355893248, slab=6346027008) 

RAM Free: 46227554304MB | Used: 102138245120MB | Util  39% | Total 270353252352MB 
GPU RAM Free: 51MB 	|	 Used: 11127MB 	|	 Util 100% 	|	 Total 11178MB
GPU RAM Free: 8MB 	|	 Used: 11170MB 	|	 Util 100% 	|	 Total 11178MB
GPU RAM Free: 1557MB 	|	 Used: 9621MB 	|	 Util  86% 	|	 Total 11178MB
GPU RAM Free: 8131MB 	|	 Used: 3047MB 	|	 Util  27% 	|	 Total 11178MB
GPU RAM Free: 463MB 	|	 Used: 10715MB 	|	 Util  96% 	|	 Total 11178MB
GPU RAM Free: 1325MB 	|	 Used: 9853MB 	|	 Util  88% 	|	 Total 11178MB
GPU RAM Free: 245MB 	|	 Used: 10933MB 	|	 Util  98% 	|	 Total 11178MB
GPU RAM Free: 1735MB 	|	 Used: 9443MB 	|	 Util  84% 	|	 Total 11178MB
GPU RAM Free: 449MB 	|	 Used: 10729MB 	|	 Util  96% 	|	 Total 11178MB
GPU RAM Free: 1369MB 	|	 Used: 9809MB 	|	 Util 

  if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
  if attention_mask.size() != (bsz, 1, tgt_len, src_len):
  if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):


  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


virual memory usage: svmem(total=270353252352, available=165182689280, percent=38.9, used=102328029184, free=46029905920, active=58644443136, inactive=156091232256, buffers=87986790400, cached=34008526848, shared=355893248, slab=6348910592) 

RAM Free: 46029905920MB | Used: 102328029184MB | Util  39% | Total 270353252352MB 
GPU RAM Free: 1773MB 	|	 Used: 9405MB 	|	 Util  84% 	|	 Total 11178MB
GPU RAM Free: 8MB 	|	 Used: 11170MB 	|	 Util 100% 	|	 Total 11178MB
GPU RAM Free: 1195MB 	|	 Used: 9983MB 	|	 Util  89% 	|	 Total 11178MB
GPU RAM Free: 8131MB 	|	 Used: 3047MB 	|	 Util  27% 	|	 Total 11178MB
GPU RAM Free: 407MB 	|	 Used: 10771MB 	|	 Util  96% 	|	 Total 11178MB
GPU RAM Free: 1243MB 	|	 Used: 9935MB 	|	 Util  89% 	|	 Total 11178MB
GPU RAM Free: 179MB 	|	 Used: 10999MB 	|	 Util  98% 	|	 Total 11178MB
GPU RAM Free: 961MB 	|	 Used: 10217MB 	|	 Util  91% 	|	 Total 11178MB
GPU RAM Free: 3MB 	|	 Used: 11175MB 	|	 Util 100% 	|	 Total 11178MB
GPU RAM Free: 1307MB 	|	 Used: 9871MB 	|	 Util  

  if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
  if attention_mask.size() != (bsz, 1, tgt_len, src_len):
  if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):


  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


virual memory usage: svmem(total=270353252352, available=165002559488, percent=39.0, used=102508158976, free=49142784000, active=57836396544, inactive=153813241856, buffers=87990243328, cached=30712066048, shared=355893248, slab=6337298432) 

RAM Free: 49142784000MB | Used: 102508158976MB | Util  39% | Total 270353252352MB 
GPU RAM Free: 1345MB 	|	 Used: 9833MB 	|	 Util  88% 	|	 Total 11178MB
GPU RAM Free: 8MB 	|	 Used: 11170MB 	|	 Util 100% 	|	 Total 11178MB
GPU RAM Free: 1195MB 	|	 Used: 9983MB 	|	 Util  89% 	|	 Total 11178MB
GPU RAM Free: 8131MB 	|	 Used: 3047MB 	|	 Util  27% 	|	 Total 11178MB
GPU RAM Free: 407MB 	|	 Used: 10771MB 	|	 Util  96% 	|	 Total 11178MB
GPU RAM Free: 1243MB 	|	 Used: 9935MB 	|	 Util  89% 	|	 Total 11178MB
GPU RAM Free: 179MB 	|	 Used: 10999MB 	|	 Util  98% 	|	 Total 11178MB
GPU RAM Free: 1689MB 	|	 Used: 9489MB 	|	 Util  85% 	|	 Total 11178MB
GPU RAM Free: 1261MB 	|	 Used: 9917MB 	|	 Util  89% 	|	 Total 11178MB
GPU RAM Free: 607MB 	|	 Used: 10571MB 	|	 Util

  if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
  if attention_mask.size() != (bsz, 1, tgt_len, src_len):
  if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):


  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)


virual memory usage: svmem(total=270353252352, available=164850221056, percent=39.0, used=102660497408, free=48978595840, active=57831866368, inactive=153968353280, buffers=88024530944, cached=30689628160, shared=355893248, slab=6340517888) 

RAM Free: 48978595840MB | Used: 102660497408MB | Util  39% | Total 270353252352MB 
GPU RAM Free: 1345MB 	|	 Used: 9833MB 	|	 Util  88% 	|	 Total 11178MB
GPU RAM Free: 8MB 	|	 Used: 11170MB 	|	 Util 100% 	|	 Total 11178MB
GPU RAM Free: 1195MB 	|	 Used: 9983MB 	|	 Util  89% 	|	 Total 11178MB
GPU RAM Free: 7785MB 	|	 Used: 3393MB 	|	 Util  30% 	|	 Total 11178MB
GPU RAM Free: 407MB 	|	 Used: 10771MB 	|	 Util  96% 	|	 Total 11178MB
GPU RAM Free: 1243MB 	|	 Used: 9935MB 	|	 Util  89% 	|	 Total 11178MB
GPU RAM Free: 179MB 	|	 Used: 10999MB 	|	 Util  98% 	|	 Total 11178MB
GPU RAM Free: 961MB 	|	 Used: 10217MB 	|	 Util  91% 	|	 Total 11178MB
GPU RAM Free: 467MB 	|	 Used: 10711MB 	|	 Util  96% 	|	 Total 11178MB
GPU RAM Free: 607MB 	|	 Used: 10571MB 	|	 Util

  if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
  if attention_mask.size() != (bsz, 1, tgt_len, src_len):
  if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):


  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


virual memory usage: svmem(total=270353252352, available=167813447680, percent=37.9, used=99697270784, free=51896840192, active=57826693120, inactive=151070441472, buffers=88065454080, cached=30693687296, shared=355893248, slab=6342864896) 

RAM Free: 51896840192MB | Used: 99697270784MB | Util  38% | Total 270353252352MB 
GPU RAM Free: 151MB 	|	 Used: 11027MB 	|	 Util  99% 	|	 Total 11178MB
GPU RAM Free: 8MB 	|	 Used: 11170MB 	|	 Util 100% 	|	 Total 11178MB
GPU RAM Free: 1195MB 	|	 Used: 9983MB 	|	 Util  89% 	|	 Total 11178MB
GPU RAM Free: 7785MB 	|	 Used: 3393MB 	|	 Util  30% 	|	 Total 11178MB
GPU RAM Free: 407MB 	|	 Used: 10771MB 	|	 Util  96% 	|	 Total 11178MB
GPU RAM Free: 1243MB 	|	 Used: 9935MB 	|	 Util  89% 	|	 Total 11178MB
GPU RAM Free: 177MB 	|	 Used: 11001MB 	|	 Util  98% 	|	 Total 11178MB
GPU RAM Free: 589MB 	|	 Used: 10589MB 	|	 Util  95% 	|	 Total 11178MB
GPU RAM Free: 423MB 	|	 Used: 10755MB 	|	 Util  96% 	|	 Total 11178MB
GPU RAM Free: 607MB 	|	 Used: 10571MB 	|	 Util  

  if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
  if attention_mask.size() != (bsz, 1, tgt_len, src_len):
  if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):


  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/75 [00:00<?, ?it/s]

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


virual memory usage: svmem(total=270353252352, available=167511785472, percent=38.0, used=99998896128, free=51539054592, active=57842049024, inactive=151415463936, buffers=88095334400, cached=30719967232, shared=355893248, slab=6345658368) 

RAM Free: 51539054592MB | Used: 99998896128MB | Util  38% | Total 270353252352MB 
GPU RAM Free: 225MB 	|	 Used: 10953MB 	|	 Util  98% 	|	 Total 11178MB
GPU RAM Free: 8MB 	|	 Used: 11170MB 	|	 Util 100% 	|	 Total 11178MB
GPU RAM Free: 1193MB 	|	 Used: 9985MB 	|	 Util  89% 	|	 Total 11178MB
GPU RAM Free: 7439MB 	|	 Used: 3739MB 	|	 Util  33% 	|	 Total 11178MB
GPU RAM Free: 407MB 	|	 Used: 10771MB 	|	 Util  96% 	|	 Total 11178MB
GPU RAM Free: 1243MB 	|	 Used: 9935MB 	|	 Util  89% 	|	 Total 11178MB
GPU RAM Free: 177MB 	|	 Used: 11001MB 	|	 Util  98% 	|	 Total 11178MB
GPU RAM Free: 589MB 	|	 Used: 10589MB 	|	 Util  95% 	|	 Total 11178MB
GPU RAM Free: 423MB 	|	 Used: 10755MB 	|	 Util  96% 	|	 Total 11178MB
GPU RAM Free: 607MB 	|	 Used: 10571MB 	|	 Util  

  if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
  if attention_mask.size() != (bsz, 1, tgt_len, src_len):
  if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):


  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

  0%|          | 0/225 [00:00<?, ?it/s]

In [None]:
#plotting cell!
def get_data(fl):
    with open(fl) as json_file:
        data = json.load(json_file)
    return data

dataset = 'timit_asr'
feature = 'sex'
lang = 'en'

rnd = get_data(GRAPHS_PATH + '2022-01-28/' + f"{dataset}_{lang}_{feature}_task_random=full_grads=False_variational={str(False)}_poisoned=0.json")
d = get_data(GRAPHS_PATH + '2022-01-28/' + f"{dataset}_{lang}_{feature}_task_random={str(None)}_grads=False_variational={str(False)}_poisoned=0.json")
var_rnd = get_data(GRAPHS_PATH + '/2022-01-28/' + f"{dataset}_{lang}_{feature}_task_random=full_grads=False_variational={str(True)}_poisoned=0.json")
var_d = get_data(GRAPHS_PATH  + '2022-01-28/' + f"{dataset}_{lang}_{feature}_task_random={str(None)}_grads=False_variational={str(True)}_poisoned=0.json")

# x =  [1, 2, 3] + [8, 11, 13] + [21, 22, 23] 
#on linguistic features:
#x = [1, 2, 3] + [8, 11, 13] + [21, 22]

def plot(x, rnd, d, var_rnd, var_d):
    print(x)
    print("var. ord. / data. ord.", np.array(var_d['data']['loss']) / np.array(d['data']['loss']))
    print("data. ord. / data. rand.", np.array(var_rnd['data']['loss']) / np.array(rnd['data']['loss']))

    print("data. rand. / data. ord.", np.array(rnd['data']['loss']) / np.array(d['data']['loss']))
    print("var. rnd. / var. ord.", np.array(var_rnd['data']['loss']) / np.array(var_d['data']['loss']))

    fig, axs = plt.subplots(2, 2, sharex=True)
    plt.rcParams.update({'font.size': 14})
    plt.rcParams["figure.figsize"] = (16, 12)

    axs[0, 0].bar(x, var_d['data']['loss'], color = 'r', label = 'variational')
    axs[0, 0].bar(x, d['data']['loss'], color = 'k', label = 'data')
    axs[0, 0].set_title("Codelength on pretrained network")
    axs[0, 0].set_xticks(x)
    axs[0, 0].legend()
    axs[0, 0].grid(True)

    axs[1, 0].bar(x, var_rnd['data']['loss'], color = 'r', label = 'variational')
    axs[1, 0].bar(x, var_d['data']['loss'], color = 'y', label = 'ord. variational')
    axs[1, 0].bar(x, rnd['data']['loss'], color = 'k', label = 'data')
    axs[1, 0].set_title("Codelength on rand. init. network")
    axs[1, 0].legend()
    axs[1, 0].grid(True)

    axs[0, 1].plot(x, d['data']['metrics'],  lw = 2, label = 'pretrained')
    axs[0, 1].plot(x, rnd['data']['metrics'],  lw = 2, label = 'rand. init.')
    axs[0, 1].legend()
    axs[0, 1].grid(True)
    axs[0, 1].set_title("Metrics [F1]")

    axs[1, 1].plot(x, np.abs(np.array(d['data']['metrics']) - np.array(rnd['data']['metrics'])), c = 'k', lw = 2, label = 'data')
    axs[1, 1].plot(x, np.abs(np.array(var_d['data']['metrics']) - np.array(var_rnd['data']['metrics'])), c = 'r', lw = 2, label = 'variational')
    axs[1, 1].set_title("Selectivity " + r"$\sigma(m, r, \hat{y}, y) \triangleq |r(m, \hat{y}, y) - r(m_{rand}, \hat{y}, y)|$")
    axs[1, 1].legend()
    axs[1, 1].grid(True)
    fig.tight_layout()

    plt.savefig(f'{feature}.png', bbox_inches='tight')

plot(np.arange(1, 24, 1), rnd, d, var_rnd, var_d)

In [None]:
dataset = 'timit_asr'
feature = 'pos_tag'
lang = 'en'

rnd = get_data(GRAPHS_PATH + '2022-01-28/' + f"{dataset}_{lang}_{feature}_task_random=full_grads=False_variational={str(False)}_poisoned=0.json")
d = get_data(GRAPHS_PATH + '2022-01-28/' + f"{dataset}_{lang}_{feature}_task_random={str(None)}_grads=False_variational={str(False)}_poisoned=0.json")
var_rnd = get_data(GRAPHS_PATH + '/2022-01-28/' + f"{dataset}_{lang}_{feature}_task_random=full_grads=False_variational={str(True)}_poisoned=0.json")
var_d = get_data(GRAPHS_PATH  + '2022-01-28/' + f"{dataset}_{lang}_{feature}_task_random={str(None)}_grads=False_variational={str(True)}_poisoned=0.json")
plot(np.arange(1, 24, 1), rnd, d, var_rnd, var_d)

In [None]:
dataset = 'timit_asr'
feature = 'age_bin'
lang = 'en'

rnd = get_data(GRAPHS_PATH + '2022-01-28/' + f"{dataset}_{lang}_{feature}_task_random=full_grads=False_variational={str(False)}_poisoned=0.json")
d = get_data(GRAPHS_PATH + '2022-01-28/' + f"{dataset}_{lang}_{feature}_task_random={str(None)}_grads=False_variational={str(False)}_poisoned=0.json")
var_rnd = get_data(GRAPHS_PATH + '/2022-01-28/' + f"{dataset}_{lang}_{feature}_task_random=full_grads=False_variational={str(True)}_poisoned=0.json")
var_d = get_data(GRAPHS_PATH  + '2022-01-28/' + f"{dataset}_{lang}_{feature}_task_random={str(None)}_grads=False_variational={str(True)}_poisoned=0.json")
plot(np.arange(1, 24, 1), rnd, d, var_rnd, var_d)

In [None]:
dataset = 'timit_asr'
feature = 'utterance'
lang = 'en'

rnd = get_data(GRAPHS_PATH + '2022-01-28/' + f"{dataset}_{lang}_{feature}_task_random=full_grads=False_variational={str(False)}_poisoned=0.json")
d = get_data(GRAPHS_PATH + '2022-01-28/' + f"{dataset}_{lang}_{feature}_task_random={str(None)}_grads=False_variational={str(False)}_poisoned=0.json")
var_rnd = get_data(GRAPHS_PATH + '/2022-01-28/' + f"{dataset}_{lang}_{feature}_task_random=full_grads=False_variational={str(True)}_poisoned=0.json")
var_d = get_data(GRAPHS_PATH  + '2022-01-28/' + f"{dataset}_{lang}_{feature}_task_random={str(None)}_grads=False_variational={str(True)}_poisoned=0.json")
plot(np.arange(1, 24, 1), rnd, d, var_rnd, var_d)

## Inference class (TBD)

In [None]:
class InferenceR:
    def __init__(self, probing_model_checkpoint, model_path: str,
                       on_logits: bool = False,
                       on_weights: bool = False,
                       on_attentions: bool = True):
        """An inference class
        """
        self.checkpoint = torch.load(probing_model_checkpoint)
        self.model = Wav2Vec2ForCTC.from_pretrained(model_path, cache_dir = CACHE_DIR)
        self.probe = Wav2Vec2ForCTC.from_pretrained(model_path, cache_dir = CACHE_DIR)

        #flags
        self.on_l = on_logits
        self.on_w = on_weights
        self.on_att = on_attentions

    def _reconstruct_model(self, prober):
        layer_idx = self.checkpoint['layer_index']
        self.probe.wav2vec2.encoder.layers = self.probe.wav2vec2.encoder.layers[:layer_idx]
        self.probing_model = prober(parent_model = self.probe.wav2vec2.encoder,
                                    clf = LinearModel(**self.checkpoint['model_params']),
                                    enable_grads = False)
        self.probing_model.load_state_dict(self.checkpoint['model_state_dict'])
        self.probe.wav2vec2.encoder.layers = self.model.wav2vec2.encoder.layers[layer_idx:]
        self.clf = self.probing_model.clf
    def _compute_attention_flow(self, dataloader: torch.utils.data.DataLoader):
        pass
    def _compute_shap_cam_on_logits(self, on_target: bool = True, on_probing: bool = False): 
        pass
    def visualize_attention_flow(self, data):
        pass
    def visualize_shap_cam(self, data, tgt: str):
        pass