In [1]:
%cd ../

/mnt/kireev/pycharm-deploy/vtb


In [2]:
from glob import glob

In [3]:
import numpy as np
import pandas as pd

In [4]:
import torch

In [5]:
import pytorch_lightning as pl

In [6]:
from pyhocon import ConfigFactory

In [7]:
FOLD_ID = 1

In [8]:
fold_id_test = FOLD_ID

In [9]:
folds_count = len(glob('data/train_matching_*.csv'))
folds_count

6

In [10]:
# fold_id_valid = np.random.choice([i for i in range(folds_count) if i != fold_id_test], size=1)[0]
fold_id_valid = (fold_id_test + 1) % folds_count
fold_id_valid

2

In [11]:
df_matching_train = pd.concat([pd.read_csv(f'data/train_matching_{i}.csv')
                              for i in range(folds_count) 
                              if i not in (fold_id_test, fold_id_valid)])
df_matching_valid = pd.read_csv(f'data/train_matching_{fold_id_valid}.csv')
df_matching_test = pd.read_csv(f'data/train_matching_{fold_id_test}.csv')

In [12]:
[len(df) for df in [df_matching_train, df_matching_valid, df_matching_test]]

[11721, 2930, 2930]

In [13]:
def trx_types(df):
    df['mcc_code'] = df['mcc_code'].astype(str)
    df['currency_rk'] = df['currency_rk'].astype(str)
    df['event_time'] = pd.to_datetime(df['transaction_dttm']).astype(int) / 1e9
    return df[['user_id', 'event_time', 'mcc_code', 'currency_rk', 'transaction_amt']]

In [14]:
df_trx_train = pd.concat([trx_types(pd.read_csv(f'data/transactions_{i}.csv'))
                              for i in range(folds_count) 
                              if i not in (fold_id_test, fold_id_valid)])
df_trx_valid = trx_types(pd.read_csv(f'data/transactions_{fold_id_valid}.csv'))
df_trx_test = trx_types(pd.read_csv(f'data/transactions_{fold_id_test}.csv'))

In [15]:
def click_types(df):
    df['event_time'] = pd.to_datetime(df['timestamp']).astype(int) / 1e9
    df = pd.merge(df, pd.read_csv('data/click_categories.csv'), on='cat_id')
    df['cat_id'] = df['cat_id'].astype(str)
    return df[['user_id', 'event_time', 'cat_id', 'level_0', 'level_1', 'level_2']]

In [16]:
df_click_train = pd.concat([click_types(pd.read_csv(f'data/clickstream_{i}.csv'))
                              for i in range(folds_count) 
                              if i not in (fold_id_test, fold_id_valid)])
df_click_valid = click_types(pd.read_csv(f'data/clickstream_{fold_id_valid}.csv'))
df_click_test = click_types(pd.read_csv(f'data/clickstream_{fold_id_test}.csv'))

In [17]:
import logging
import numpy as np
import pandas as pd

from typing import List

from dltranz.data_preprocessing.base import DataPreprocessor
from dltranz.data_preprocessing.util import pd_hist

logger = logging.getLogger(__name__)


class PandasDataPreprocessor(DataPreprocessor):
    """Data preprocessor based on pandas.DataFrame

    During preprocessing it
        * transform `cols_event_time` column with date and time
        * encodes category columns `cols_category` into ints;
        * apply logarithm transformation to `cols_log_norm' columns;
        * groups flat data by `col_id`;
        * arranges data into list of dicts with features

    Parameters
    ----------
    col_id : str
        name of column with ids
    cols_event_time : str,
        name of column with time and date
    cols_category : list[str],
        list of category columns
    cols_log_norm : list[str],
        list of columns to be logarithmed
    time_transformation: str. Default: 'default'.
        type of transformation to be applied to time column
    print_dataset_info : bool. Default: False.
        If True, print dataset stats during preprocessor fitting and data transformation
    """

    def __init__(self,
                 col_id: str,
                 cols_event_time: str,
                 cols_category: List[str],
                 cols_log_norm: List[str],
                 time_transformation: str = 'default',
                 print_dataset_info: bool = False):

        super().__init__(col_id, cols_event_time, cols_category, cols_log_norm)
        self.print_dataset_info = print_dataset_info
        self.time_transformation = time_transformation

    def fit(self, dt, **params):
        """
        Parameters
        ----------
        dt : pandas.DataFrame with flat data

        Returns
        -------
        self : object
            Fitted preprocessor.
        """
        # Reset internal state before fitting
        self._reset()

        for col in self.cols_category:
            pd_col = dt[col].astype(str)
            mapping = {k: i + 1 for i, k in enumerate(pd_col.value_counts().index)}
            self.cols_category_mapping[col] = mapping

            if self.print_dataset_info:
                logger.info(f'Encoder stat for "{col}":\ncodes | trx_count\n{pd_hist(dt[col], col)}')

        return self

    def transform(self, df, copy=True):
        """Perform preprocessing.
        Parameters
        ----------
        df : pandas.DataFrame with flat data
        copy : bool, default=None
            Copy the input X or not.
        Returns
        -------
        features : List of dicts grouped by col_id.
        """
        self.check_is_fitted()
        df_data = df.copy() if copy else df

        if self.print_dataset_info:
            logger.info(f'Found {df_data[self.col_id].nunique()} unique ids')

        # event_time mapping
        if self.time_transformation == 'none':
            pass
        elif self.time_transformation == 'default':
            df_data = self._td_default(df_data, self.cols_event_time)
        elif self.time_transformation == 'float':
            df_data = self._td_float(df_data, self.cols_event_time)
        elif self.time_transformation == 'gender':
            df_data = self._td_gender(df_data, self.cols_event_time)
        else:
            raise NotImplementedError(f'Unknown type of data transformation: "{self.time_transformation}"')

        for col in self.cols_category:
            if col not in self.cols_category_mapping:
                raise KeyError(f"column {col} isn't in fitted category columns")
            pd_col = df_data[col].astype(str)
            df_data[col] = pd_col.map(self.cols_category_mapping[col]) \
                .fillna(max(self.cols_category_mapping[col].values()))
            if self.print_dataset_info:
                logger.info(f'Encoder stat for "{col}":\ncodes | trx_count\n{pd_hist(df_data[col], col)}')

        for col in self.cols_log_norm:
            df_data[col] = np.log1p(abs(df_data[col])) * np.sign(df_data[col])
            df_data[col] /= abs(df_data[col]).max()
            if self.print_dataset_info:
                logger.info(f'Encoder stat for "{col}":\ncodes | trx_count\n{pd_hist(df_data[col], col)}')

        if self.print_dataset_info:
            df = df_data.groupby(self.col_id)['event_time'].count()
            logger.info(f'Trx count per clients:\nlen(trx_list) | client_count\n{pd_hist(df, "trx_count")}')

        # column filter
        used_columns = [col for col in df_data.columns
                        if col in self.cols_category + self.cols_log_norm + ['event_time', self.col_id]]

        logger.info('Feature collection in progress ...')
        features = df_data[used_columns] \
            .assign(et_index=lambda x: x['event_time']) \
            .set_index([self.col_id, 'et_index']).sort_index() \
            .groupby(self.col_id).apply(lambda x: {k: np.array(v) for k, v in x.to_dict(orient='list').items()}) \
            .rename('feature_arrays').reset_index().to_dict(orient='records')

        def squeeze(rec):
            return {self.col_id: rec[self.col_id], **rec['feature_arrays']}
        features = [squeeze(r) for r in features]

        if self.print_dataset_info:
            feature_names = list(features[0].keys())
            logger.info(f'Feature names: {feature_names}')

        logger.info(f'Prepared features for {len(features)} clients')
        return features

    @staticmethod
    def _td_default(df, cols_event_time):
        df_event_time = df[cols_event_time].drop_duplicates()
        df_event_time = df_event_time.sort_values(cols_event_time)
        df_event_time['event_time'] = np.arange(len(df_event_time))
        df = pd.merge(df, df_event_time, on=cols_event_time)
        logger.info('Default time transformation')
        return df

    @staticmethod
    def _td_float(df, col_event_time):
        df['event_time'] = df[col_event_time].astype(float)
        logger.info('To-float time transformation')
        return df

    @staticmethod
    def _td_gender(df, col_event_time):
        """Gender-dataset-like transformation

        'd hh:mm:ss' -> float where integer part is day number and fractional part is seconds from day begin
        '1 00:00:00' -> 1.0
        '1 12:00:00' -> 1.5
        '1 01:00:00' -> 1 + 1 / 24
        '2 23:59:59' -> 1.99
        '432 12:00:00' -> 432.5

        :param df:
        :param col_event_time:
        :return:
        """
        padded_time = df[col_event_time].str.pad(15, 'left', '0')
        day_part = padded_time.str[:6].astype(float)
        time_part = pd.to_datetime(padded_time.str[7:], format='%H:%M:%S').values.astype(int) // 1e9
        time_part = time_part % (24 * 60 * 60) / (24 * 60 * 60)
        df['event_time'] = day_part + time_part
        logger.info('Gender-dataset-like time transformation')
        return df


In [18]:
preprocessor_trx = PandasDataPreprocessor(
    col_id='user_id',
    cols_event_time='event_time',
    time_transformation='none',
    cols_category=["mcc_code", "currency_rk"],
    cols_log_norm=["transaction_amt"],
    print_dataset_info=False,
)

preprocessor_click = PandasDataPreprocessor(
    col_id='user_id',
    cols_event_time='event_time',
    time_transformation='none',
    cols_category=['cat_id', 'level_0', 'level_1', 'level_2'],
    cols_log_norm=[],
    print_dataset_info=False,
)

In [19]:
from dltranz.data_load.iterable_processing.category_size_clip import CategorySizeClip

In [20]:
category_max_size_trx = {
    'mcc_code': 350,
    'currency_rk': 5,
}
category_max_size_click = {
    'cat_id': 400,
    'level_0': 400,
    'level_1': 400,
    'level_2': 400,
}

In [21]:
def trx_to_torch(seq):
    seq = CategorySizeClip(category_max_size_trx)(seq)
    for x in seq:
        yield x['user_id'], {
            'event_time': torch.from_numpy(x['event_time']).float(),
            'mcc_code': torch.from_numpy(x['mcc_code']).int(),
            'currency_rk': torch.from_numpy(x['currency_rk']).int(),
            'transaction_amt': torch.from_numpy(x['transaction_amt']).float(),
        }

def click_to_torch(seq):
    seq = CategorySizeClip(category_max_size_click)(seq)
    for x in seq:
        yield x['user_id'], {
            'event_time': torch.from_numpy(x['event_time']).float(),
            'cat_id': torch.from_numpy(x['cat_id']).int(),
            'level_0': torch.from_numpy(x['level_0']).int(),
            'level_1': torch.from_numpy(x['level_1']).int(),
            'level_2': torch.from_numpy(x['level_2']).int(),

        }

In [22]:
features_trx_train = dict(trx_to_torch(preprocessor_trx.fit_transform(df_trx_train)))
features_trx_valid = dict(trx_to_torch(preprocessor_trx.transform(df_trx_valid)))
features_trx_test = dict(trx_to_torch(preprocessor_trx.transform(df_trx_test)))

In [23]:
features_click_train = dict(click_to_torch(preprocessor_click.fit_transform(df_click_train)))
features_click_valid = dict(click_to_torch(preprocessor_click.transform(df_click_valid)))
features_click_test = dict(click_to_torch(preprocessor_click.transform(df_click_test)))

In [24]:
import pickle

In [25]:
with open('preprocessor_trx.p', 'wb') as f:
    pickle.dump(preprocessor_trx, f)
with open('preprocessor_click.p', 'wb') as f:
    pickle.dump(preprocessor_click, f)

In [26]:
from vtb_code.data import PairedDataset, paired_collate_fn

In [27]:
from dltranz.data_load import augmentation_chain
from dltranz.data_load.augmentations.seq_len_limit import SeqLenLimit
from dltranz.data_load.augmentations.random_slice import RandomSlice

In [28]:
import random

class RandomSample:
    def __init__(self, min_len, max_len, rate_for_min=1.0):
        super().__init__()

        self.min_len = min_len
        self.max_len = max_len
        self.rate_for_min = rate_for_min

    def __call__(self, x):
        seq_len = len(next(iter(x.values())))

        idx = self.get_idx(seq_len)
        new_x = {k: v[idx] for k, v in x.items()}
        return new_x

    def get_idx(self, seq_len):
        new_idx = np.arange(seq_len)

        min_len, max_len = self.get_min_max(seq_len)
        if max_len < min_len:
            return new_idx
        new_len = random.randint(min_len, max_len)

        return np.sort(np.random.choice(new_idx, size=new_len, replace=False))

    def get_min_max(self, seq_len):
        max_len = int(min(self.max_len, seq_len))
        min_len = int(min(self.min_len, seq_len * self.rate_for_min))
        if min_len < 1:
            min_len = 1
        return min_len, max_len


In [53]:
class DropDuplicate:
    def __init__(self, col_check, col_new_cnt=None, keep='first'):
        super().__init__()

        self.col_check = col_check
        self.col_new_cnt = col_new_cnt
        if keep != 'first':
            raise NotImplementedError()

    def __call__(self, x):
        idx, new_cnt = self.get_idx(x[self.col_check])
        new_x = {k: v[idx] for k, v in x.items()}
        if self.col_new_cnt is not None:
            new_x[self.col_new_cnt] = new_cnt
        return new_x

    def get_idx(self, x):
        diff = np.diff(x, prepend=x[0] - 1)
        new_ix = np.where(diff != 0)[0]
        new_cnt = np.diff(new_ix, append=len(x))
        return new_ix, new_cnt


In [31]:
from dltranz.seq_encoder import create_encoder
from dltranz.metric_learn.sampling_strategies import get_sampling_strategy
from dltranz.metric_learn.losses import get_loss

In [32]:
from vtb_code.metrics import PrecisionK, MeanReciprocalRankK

In [33]:
class PairedModule(pl.LightningModule):
    def __init__(self, params, neg_count, k,
                 lr, weight_decay,
                 step_size, gamma,
                 base_lr, max_lr, step_size_up, step_size_down,
                ):
        super().__init__()
        self.save_hyperparameters(ignore=['params', 'loss_fn', 'loss_w'])
        
        self.seq_encoder_trx = torch.nn.Sequential(
            create_encoder(params['trx_seq'], is_reduce_sequence=True),
#             torch.nn.Linear(params['trx_seq.rnn.hidden_size'], params['head_size']),
        )
        self.seq_encoder_click = torch.nn.Sequential(
            create_encoder(params['click_seq'], is_reduce_sequence=True),
#             torch.nn.Linear(params['click_seq.rnn.hidden_size'], params['head_size']),
        )
                
        self.train_precision = PrecisionK(k=k, compute_on_step=False)
        self.train_mrr = MeanReciprocalRankK(k=k, compute_on_step=False)
        self.valid_precision = PrecisionK(k=k, compute_on_step=False)
        self.valid_mrr = MeanReciprocalRankK(k=k, compute_on_step=False)
        
    def configure_optimizers(self):
        optim = torch.optim.Adam(self.parameters(), lr=self.hparams.lr, weight_decay=self.hparams.weight_decay)
        if self.hparams.step_size is not None:
            scheduler = torch.optim.lr_scheduler.StepLR(
                optim, step_size=self.hparams.step_size, gamma=self.hparams.gamma)
        else:
            sheduler = torch.optim.lr_scheduler.CyclicLR(
                optim,
                base_lr=self.hparams.base_lr, max_lr=self.hparams.max_lr,
                step_size_up=self.hparams.step_size_up,
                step_size_down=self.hparams.step_size_down,
                cycle_momentum=False,
            )
            scheduler = {'scheduler': sheduler, 'interval': 'step'}
        return [optim], [scheduler]
    
#     def forward(self, batch):
#         x_trx, x_click = batch
#         z_trx = self.seq_encoder_trx(x_trx)  # B, H
#         z_click = self.seq_encoder_click(x_click)  # B, H
        
#         B, H = z_trx.size()
#         logits = (z_trx * z_click).sum(dim=1)  # B(trx),
#         return logits
    
    def pos_pairs(self, a, b):
        l2 = torch.nn.functional.pairwise_distance(a, b)  # B(trx),
        return l2
    
    def neg_pairs(self, a, b):
        l2 = torch.nn.functional.pairwise_distance(a.unsqueeze(1), b.unsqueeze(0))  # B(trx) * B(click)
        B, _ = l2.size()
        device = l2.device
        neg_mask = torch.eye(B, device=device) == 1
        l2 = l2.masked_fill(neg_mask, 10)
        k = min(self.hparams.neg_count, B - 1)
        neg_pairs = torch.topk(l2, k=k, dim=1, largest=False).values
        return neg_pairs
        
    def training_step(self, batch, batch_idx):
        x_trx, x_click = batch
        z_trx = self.seq_encoder_trx(x_trx)  # B, H
        z_click = self.seq_encoder_click(x_click)  # B, H

        z_trx = torch.nn.functional.normalize(z_trx, dim=1)
        z_click = torch.nn.functional.normalize(z_click, dim=1)
       
        with torch.no_grad():
            l2 = torch.nn.functional.pairwise_distance(z_trx.unsqueeze(1), z_click.unsqueeze(0))
            # B(trx) * B(click)
            self.train_precision(-l2)
            self.train_mrr(-l2)
        
        B, H = z_trx.size()
        loss_sum, cnt = 0.0, 0
        res = self.pos_pairs(z_trx, z_click)
        self.log('logits_metrics/l2_pos_trx_click', res.mean())
        loss_sum += res.pow(2).sum()
        cnt += res.size(0)
        
        res = self.neg_pairs(z_trx, z_trx)
        self.log('logits_metrics/l2_neg_trx_trx', res.mean())
        loss_sum += torch.relu(0.5 - res).pow(2).sum()
        cnt += res.size(0) * res.size(1)

        res = self.neg_pairs(z_trx, z_click)
        self.log('logits_metrics/l2_neg_trx_click', res.mean())
        loss_sum += torch.relu(0.5 - res).pow(2).sum()
        cnt += res.size(0) * res.size(1)

        res = self.neg_pairs(z_click, z_click)
        self.log('logits_metrics/l2_neg_click_click', res.mean())
        loss_sum += torch.relu(0.5 - res).pow(2).sum()
        cnt += res.size(0) * res.size(1)

        loss = loss_sum / cnt
        self.log('loss/l2', res.mean())
        return loss

    def validation_step(self, batch, batch_idx):
        x_trx, x_click = batch
        z_trx = self.seq_encoder_trx(x_trx)  # B, H
        z_click = self.seq_encoder_click(x_click)  # B, H

        z_trx = torch.nn.functional.normalize(z_trx, dim=1)
        z_click = torch.nn.functional.normalize(z_click, dim=1)
       
        with torch.no_grad():
            l2 = torch.nn.functional.pairwise_distance(z_trx.unsqueeze(1), z_click.unsqueeze(0))
            # B(trx) * B(click)
            self.valid_precision(-l2)
            self.valid_mrr(-l2)

    def training_epoch_end(self, _):
        self.log('train_metrics/precision', self.train_precision, prog_bar=True)
        self.log('train_metrics/mrr', self.train_mrr, prog_bar=True)

    def validation_epoch_end(self, _):
        self.log('valid_metrics/precision', self.valid_precision, prog_bar=True)
        self.log('valid_metrics/mrr', self.valid_mrr, prog_bar=True)
    

In [59]:
sup_model = PairedModule(
    ConfigFactory.parse_string('''
    trx_seq: {
        trx_encoder: {
          use_batch_norm_with_lens: false
          norm_embeddings: false,
          embeddings_noise: 0.000,
          embeddings: {
            mcc_code: {in: 350, out: 64},
            currency_rk: {in: 10, out: 4}
          },
          numeric_values: {
            transaction_amt: identity
          }
        },
        encoder_type: rnn,
        rnn: {
          type: gru,
          hidden_size: 64,
          bidir: false,
          trainable_starter: static
        }
    }
    click_seq: {
        trx_encoder: {
          use_batch_norm_with_lens: false
          norm_embeddings: false,
          embeddings_noise: 0.000,
          embeddings: {
            cat_id: {in: 400, out: 64},
            level_0: {in: 400, out: 16}
            level_1: {in: 400, out: 8}
            level_2: {in: 400, out: 4}
          },
          numeric_values: {
          }
        },
        encoder_type: rnn,
        rnn: {
          type: gru,
          hidden_size: 64,
          bidir: false,
          trainable_starter: static
        }    
    }
'''),                     
    neg_count=8, k=17,
    lr=0.004, weight_decay=0,
    step_size=500, gamma=0.3,
    base_lr=0.0005, max_lr=0.004, step_size_up=300, step_size_down=900,
)


In [60]:
dataset_train = PairedDataset(
    np.concatenate([
        df_matching_train[lambda x: x['rtk'].ne('0')].values,
    ], axis=1), 
    data=[
        features_trx_train,
        features_click_train,
    ],
    augmentations=[
        augmentation_chain(RandomSlice(256, 512)),  # 2000
        augmentation_chain(DropDuplicate('cat_id'), RandomSlice(256, 512)),  # 5000
    ],
)

dataset_valid = PairedDataset(
    np.concatenate([
        df_matching_valid[lambda x: x['rtk'].ne('0')].values,
    ], axis=1), 
    data=[
        features_trx_valid,
        features_click_valid,
    ],
    augmentations=[
        augmentation_chain(RandomSlice(256, 512)),  # 2000
        augmentation_chain(DropDuplicate('cat_id'), RandomSlice(256, 512)),  # 5000
    ],
)

In [61]:
train_dl = torch.utils.data.DataLoader(
    dataset_train,
    collate_fn=paired_collate_fn,
    shuffle=True,
    num_workers=12,
    batch_size=512,
    persistent_workers=True,
)
valid_dl = torch.utils.data.DataLoader(
    dataset_valid,
    collate_fn=paired_collate_fn,
    shuffle=False,
    num_workers=4,
    batch_size=512,
    persistent_workers=True,
)

In [62]:
trainer = pl.Trainer(
    gpus=[3],
    max_steps=30000,
    callbacks=[
        pl.callbacks.LearningRateMonitor(),
        pl.callbacks.ModelCheckpoint(
            every_n_train_steps=1000, save_top_k=-1,
        ),
    ]
)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


In [None]:
trainer.fit(sup_model, train_dl, valid_dl)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]

  | Name              | Type                | Params
----------------------------------------------------------
0 | seq_encoder_trx   | Sequential          | 48.4 K
1 | seq_encoder_click | Sequential          | 67.2 K
2 | train_precision   | PrecisionK          | 0     
3 | train_mrr         | MeanReciprocalRankK | 0     
4 | valid_precision   | PrecisionK          | 0     
5 | valid_mrr         | MeanReciprocalRankK | 0     
----------------------------------------------------------
115 K     Trainable params
0         Non-trainable params
115 K     Total params
0.463     Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f1db74f0ca0>
Traceback (most recent call last):
  File "/mnt2/kireev/pipenv_envs/vtb-6O4wd6SN/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/mnt2/kireev/pipenv_envs/vtb-6O4wd6SN/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f1db74f0ca0><function _MultiProcessingDataLoaderIter.__del__ at 0x7f1db74f0ca0>

Traceback (most recent call last):
Traceback (most recent call last):
  File "/mnt2/kireev/pipenv_envs/vtb-6O4wd6SN/lib/python3.8/site-packages/torch/utils/data/dataloader.py", 

Validating: 0it [00:00, ?it/s]

Exception ignored in: Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f1db74f0ca0><function _MultiProcessingDataLoaderIter.__del__ at 0x7f1db74f0ca0>

Traceback (most recent call last):
Traceback (most recent call last):
  File "/mnt2/kireev/pipenv_envs/vtb-6O4wd6SN/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
  File "/mnt2/kireev/pipenv_envs/vtb-6O4wd6SN/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
Exception ignored in:     <function _MultiProcessingDataLoaderIter.__del__ at 0x7f1db74f0ca0>self._shutdown_workers()

Traceback (most recent call last):
  File "/mnt2/kireev/pipenv_envs/vtb-6O4wd6SN/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
  File "/mnt2/kireev/pipenv_envs/vtb-6O4wd6SN/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
        if w.is_alive():    self._shutdown_workers()
self._shutdown_wo

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f1db74f0ca0>
Traceback (most recent call last):
  File "/mnt2/kireev/pipenv_envs/vtb-6O4wd6SN/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/mnt2/kireev/pipenv_envs/vtb-6O4wd6SN/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f1db74f0ca0>
Traceback (most recent call last):
  File "/mnt2/kireev/pipenv_envs/vtb-6O4wd6SN/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/mnt2/kireev/pipenv_envs/vtb-6O4wd6SN/lib/python3.8/site-packages/

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f1db74f0ca0>
Traceback (most recent call last):
  File "/mnt2/kireev/pipenv_envs/vtb-6O4wd6SN/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/mnt2/kireev/pipenv_envs/vtb-6O4wd6SN/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f1db74f0ca0>
Traceback (most recent call last):
  File "/mnt2/kireev/pipenv_envs/vtb-6O4wd6SN/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/mnt2/kireev/pipenv_envs/vtb-6O4wd6SN/lib/python3.8/site-packages/

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]