##### Environment initialization

In [1]:
try:
    import os
    from kaggle_secrets import UserSecretsClient  # type: ignore

    secrets_client = UserSecretsClient()
    os.environ['GITHUB_TOKEN'] = secrets_client.get_secret('GITHUB_TOKEN')
    os.environ['WANDB_TOKEN'] = secrets_client.get_secret('WANDB_TOKEN')

    os.environ['__KGLTBX_INSTALL_FROM_GITHUB'] = '1'
    os.environ['__KGLTBX_ENVIRONMENT'] = 'kaggle'
except Exception:
    print('Kaggle initialization failed, probably not running on Kaggle...')

Kaggle initialization failed, probably not running on Kaggle...


In [2]:
try:
    import os
    from google.colab import drive  # type: ignore

    drive.mount('/content/drive')

    with open('/content/drive/MyDrive/credentials/.env') as f:
        for line in f:
            line = line.strip()
            k, v, *_ = line.split('=')
            os.environ[k] = v

    os.environ['__KGLTBX_INSTALL_FROM_GITHUB'] = '1'
    os.environ['__KGLTBX_ENVIRONMENT'] = 'colab'
except Exception:
    print('Colab initialization failed, probably not running on Colab...')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


#### Requirements

In [3]:
%%writefile requirements.txt

iterative-stratification==0.1.7
textstat

git+https://${GITHUB_TOKEN}@github.com/andrei-papou/kaggle-toolbox.git@rc-v0.1.9#egg=kaggle_toolbox[remote,wandb]

Writing requirements.txt


In [4]:
# Install requirements only when running on Kaggle.
!if [ "$__KGLTBX_INSTALL_FROM_GITHUB" == "1" ]; then pip install -r requirements.txt; fi
!rm requirements.txt

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting kaggle_toolbox[remote,wandb]
  Cloning https://****@github.com/andrei-papou/kaggle-toolbox.git (to revision rc-v0.1.9) to /tmp/pip-install-nsj2fmqk/kaggle-toolbox_a4c20df82fe84c4f81e39716f3ea43e1
  Running command git clone -q 'https://****@github.com/andrei-papou/kaggle-toolbox.git' /tmp/pip-install-nsj2fmqk/kaggle-toolbox_a4c20df82fe84c4f81e39716f3ea43e1
  Running command git checkout -b rc-v0.1.9 --track origin/rc-v0.1.9
  Switched to a new branch 'rc-v0.1.9'
  Branch 'rc-v0.1.9' set up to track remote branch 'rc-v0.1.9' from 'origin'.
Collecting textstat
  Downloading textstat-0.7.3-py3-none-any.whl (105 kB)
[K     |████████████████████████████████| 105 kB 28.7 MB/s 
Collecting pyphen
  Downloading pyphen-0.13.1-py3-none-any.whl (2.0 MB)
[K     |████████████████████████████████| 2.0 MB 54.4 MB/s 
Installing collected packages: pyphen, textstat
Successfully installed pyphe

#### Imports

In [5]:
import itertools
import os
import typing as t
from pathlib import Path

import pandas as pd
import torch
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
from kaggle_toolbox import device
from kaggle_toolbox.environment import Environment
from kaggle_toolbox.data import LabeledDatasetItem, DatasetItemCollator
from kaggle_toolbox.device import CUDADevice
from kaggle_toolbox.features.transform import contiguous_to_categorical
from kaggle_toolbox.iter import FixedSubsetIterPlannerBuilder, FracSubsetSize, Index
from kaggle_toolbox.logging.stdout import StdOutLogger
from kaggle_toolbox.logging.wandb import WAndBLogger
from kaggle_toolbox.loss.regression import SmoothL1Loss, MSELoss
from kaggle_toolbox.lr_scheduling import create_cosine_scheduler_with_warmup
from kaggle_toolbox.metrics.regression import MCRMSEMetric
from kaggle_toolbox.model import Model
from kaggle_toolbox.nlp.transformer import Backbone, StandardModel, Squeezer, \
    MeanPooler, AttentionHeadPooler, TakeNthSqueezer, ConcatSqueezer, create_nakama_optimizer, \
    get_tokenizer_for_backbone, Tokenizer, TokenizerResult, TokenizerResultCollator, \
    seed_everything, standard_init_linear, standard_init_layer_norm, standard_init_module
from kaggle_toolbox.path import format_path
from kaggle_toolbox.prediction import PredDict
from kaggle_toolbox.progress import NotebookProgressBar
from kaggle_toolbox.trainer import StandardIterationTrainer, FullCycleTrainer, train_kfold_model
from kaggle_toolbox.typing import DynamicDict, filter_maybe_list
from kaggle_toolbox.validation import analyze_val_strategy, build_fold_result_df
from textstat import textstat
from torch.optim import Optimizer
from torch.utils.data import Dataset as TorchDataset
from transformers.data.data_collator import DataCollatorWithPadding
from transformers.optimization import AdamW
from transformers.utils.generic import PaddingStrategy
from transformers.utils.logging import set_verbosity_error as set_transformers_verbosity_error


NotebookProgressBar.attach_to_pandas()
set_transformers_verbosity_error()

#### Dataset

In [6]:
class Dataset(TorchDataset[LabeledDatasetItem[TokenizerResult]]):

    def __init__(
            self,
            df: pd.DataFrame,
            tokenizer: Tokenizer,
            max_len: int,
            target_list: t.List[str]):
        self._df = df.copy().reset_index(drop=True)
        self._tokenizer = tokenizer
        self._max_len = max_len
        self._target_list = target_list

    def _get_tokenizer_input(self, row: DynamicDict) -> str:
        (
            full_text,
         ) = (
            row.get_typed_or_raise('full_text', str),
         )

        score_str = ' '.join([
            str(int(x * 100)) for x in [
                textstat.flesch_reading_ease(full_text),
                textstat.flesch_kincaid_grade(full_text),
                textstat.gunning_fog(full_text),
                textstat.smog_index(full_text),
                textstat.automated_readability_index(full_text),
                textstat.coleman_liau_index(full_text),
                textstat.linsear_write_formula(full_text),
                textstat.dale_chall_readability_score(full_text),
                textstat.text_standard(full_text, float_output=True),
                textstat.spache_readability(full_text),
                textstat.mcalpine_eflaw(full_text),
                textstat.reading_time(full_text, ms_per_char=14.69),
            ]
        ])

        return score_str + ' ' + full_text

    def sort_by_tokenizer_input_len(self):
        self._df['_tok_input_len'] = self._df.progress_apply(
            lambda row: self._get_tokenizer_input(DynamicDict(t.cast(t.Dict[str, t.Any], row))), axis=1)
        self._df = self._df.sort_values('_tok_input_len')

    def __len__(self) -> int:
        return len(self._df)

    def __getitem__(self, idx: int) -> LabeledDatasetItem[TokenizerResult]:
        row = self._df.iloc[idx]

        tokenizer_input = self._get_tokenizer_input(DynamicDict(t.cast(t.Dict[str, t.Any], row)))
        id = str(row['text_id'])

        tokenizer_result = self._tokenizer.tokenize(
            tokenizer_input, max_len=self._max_len)
        target_tensor = torch.tensor(
            [float(row[target]) for target in self._target_list],
            dtype=torch.float32)

        return LabeledDatasetItem(
            id=[id],
            x=tokenizer_result,
            y=target_tensor)

#### Optimizer

In [7]:
def create_llrd_optimizer(
        model: StandardModel[t.Any], 
        layerwise_lr: float,
        layerwise_weight_decay: float,
        layerwise_lr_decay: float,
        eps: float) -> torch.optim.Optimizer:
    no_decay = ["bias", "LayerNorm.weight"]
    # initialize lr for task specific layer
    optimizer_grouped_parameters = [
        {
            "params": [p for n, p in model.head_named_parameters if "model" not in n],
            "weight_decay": 0.0,
            "lr": layerwise_lr,
        },
    ]
    # initialize lrs for every layer
    layers = [model.backbone._inner.embeddings] + list(model.backbone._inner.encoder.layer)
    layers.reverse()
    lr = layerwise_lr
    for layer in layers:
        optimizer_grouped_parameters += [
            {
                "params": [p for n, p in layer.named_parameters() if not any(nd in n for nd in no_decay)],
                "weight_decay": layerwise_weight_decay,
                "lr": lr,
            },
            {
                "params": [p for n, p in layer.named_parameters() if any(nd in n for nd in no_decay)],
                "weight_decay": 0.0,
                "lr": lr,
            },
        ]
        lr *= layerwise_lr_decay
    return AdamW(
        optimizer_grouped_parameters,
        lr=layerwise_lr,
        eps=eps,
        correct_bias=True)

#### FGM

In [8]:
class FGM:

    def __init__(self, model: Model[t.Any]):
        self._model = model
        self._backup = {}

    def attack(self, epsilon: float = 1., emb_name: str = 'word_embeddings'):
        for name, param in self._model.named_parameters():
            if param.requires_grad and emb_name in name:
                assert param.grad is not None
                self._backup[name] = param.data.clone()
                norm = torch.norm(param.grad)
                if norm != 0:
                    r_at = epsilon * param.grad / norm
                    param.data.add_(r_at)

    def restore(self, emb_name: str = 'word_embeddings'):
        for name, param in self._model.named_parameters():
            if param.requires_grad and emb_name in name:
                assert name in self._backup
                param.data = self._backup[name]
            self._backup = {}

#### Iteration Trainer

In [9]:
class IterationTrainer(StandardIterationTrainer[TokenizerResult]):
    pass

    # def __init__(
    #     self, *args, **kwargs):
    #     super().__init__(*args, **kwargs)
    #     self._fgm = FGM(self._model)

    # def _after_forward_pass(self, idx: Index, x: TokenizerResult, y: torch.Tensor):
    #     self._fgm.attack()
    #     y_preds = self._model(x)
    #     loss_adv = self._criterion(y_preds, y)
    #     loss_adv.backward()
    #     self._fgm.restore()

#### Parameters

In [10]:
TARGET_LIST = [
    'cohesion',
    'syntax',
    'vocabulary',
    'phraseology',
    'grammar',
    'conventions',
]
ENVIRONMENT = os.getenv('__KGLTBX_ENVIRONMENT', 'laptop')
_env = Environment(ENVIRONMENT)

IS_KAGGLE = 'KAGGLE_URL_BASE' in os.environ
IS_PRERUN = False
SEED = 42
NUM_FOLDS = 5
FOLD_LIST = [0, 1, 2, 3, 4] if not IS_PRERUN else [0]
DEVICE = CUDADevice()
BACKBONE = 'microsoft/deberta-v3-base'
MAX_LEN = 1024 + 12
ENCODER_LR = 1e-5
DECODER_LR = 1e-4

LAYERWISE_LR = 5e-5
LAYERWISE_LR_DECAY = 0.9
LAYERWISE_WEIGHT_DECAY = 0.01
LAYERWISE_ADAM_EPS = 1e-6
MAX_GRAD_NORM = 1000.0

BATCH_SIZE = _env.param(kaggle=2, colab=2, laptop=1)
ACCUMULATE_GRADIENT_STEPS = _env.param(kaggle=4, colab=4, laptop=8)
NUM_EPOCHS = 5
VAL_FREQ = 0.25
NUM_WORKERS = _env.param(kaggle=2, colab=2, laptop=4)

ROOT_DIR = _env.param(
    kaggle=Path('/kaggle'),
    colab=Path('/content/drive/MyDrive'),
    laptop=Path('/kaggle'))
DATA_DIR = _env.param(
    kaggle=ROOT_DIR / 'input',
    colab=ROOT_DIR / 'data',
    laptop=ROOT_DIR / 'data')
FP_ELL_DATASET_DIR = _env.param(
    kaggle=DATA_DIR / 'feedback-prize-english-language-learning',
    colab=DATA_DIR / 'fp-ell',
    laptop=DATA_DIR / 'fp-ell')
MODEL_DIR = _env.param(
    kaggle=ROOT_DIR / 'working',
    colab=ROOT_DIR / 'models/fp-ell',
    laptop=ROOT_DIR / 'models')
OOF_DIR = _env.param(
    kaggle=ROOT_DIR / 'working',
    colab=ROOT_DIR / 'oof/fp-ell',
    laptop=ROOT_DIR / 'oof')

RUN_ID = 'multi-v1-1024-layer_norm-gradclip-textstat'
MODEL_PATH_TEMPLATE = _env.param(
    kaggle=MODEL_DIR / f'{RUN_ID}-fold_{{fold}}.pt',
    colab=MODEL_DIR / f'{RUN_ID}-fold_{{fold}}.pt' if IS_PRERUN else None,
    laptop=None)
OOF_PATH = OOF_DIR / f'{RUN_ID}.csv'

In [11]:
print(f'GPU model: {DEVICE.get_name()}')

GPU model: tesla_t4


#### Pinning the seed

In [12]:
seed_everything(seed=SEED)

#### Data loading

In [13]:
def _read_data(dataset_dir_path: Path, target_list: t.List[str], num_folds: int, seed: int) -> pd.DataFrame:
    all_df = pd.read_csv(dataset_dir_path / 'train.csv')
    target_arr = contiguous_to_categorical(all_df[target_list].values)

    mskf = MultilabelStratifiedKFold(n_splits=num_folds, shuffle=True, random_state=seed)
    for fold_, (_, v_) in enumerate(mskf.split(X=all_df, y=target_arr)):
        all_df.loc[v_, 'fold'] = fold_

    return all_df

all_df = _read_data(
    dataset_dir_path=FP_ELL_DATASET_DIR,
    target_list=TARGET_LIST,
    num_folds=NUM_FOLDS,
    seed=SEED)

analyze_val_strategy(all_df, target_list=TARGET_LIST, num_folds=NUM_FOLDS)

Unnamed: 0,fold,num_samples,cohesion_mean,syntax_mean,vocabulary_mean,phraseology_mean,grammar_mean,conventions_mean
0,0,782,3.077366,2.971867,3.205243,3.065857,2.959719,3.035166
1,1,783,3.12516,3.007024,3.226054,3.111111,3.015964,3.079183
2,2,782,3.140665,3.068414,3.258312,3.138747,3.069693,3.116368
3,3,782,3.131074,3.048593,3.245524,3.125959,3.042839,3.074169
4,4,782,3.161125,3.045396,3.243606,3.142583,3.076087,3.100384


#### Entrypoint

In [14]:
def _train_model(fold: int) -> t.Tuple[float, PredDict]:
    backbone = Backbone.from_huggingface_checkpoint(BACKBONE, zero_out_dropout=True)
    # standard_init_module(backbone.inner.encoder.layer[-1])
    tokenizer = get_tokenizer_for_backbone(backbone=BACKBONE, padding_strategy=PaddingStrategy.DO_NOT_PAD)
    model: StandardModel[TokenizerResult] = StandardModel(
        backbone=backbone,
        squeezer=TakeNthSqueezer(),
        pooler=MeanPooler(),
        dnn=torch.nn.Sequential(
            torch.nn.LayerNorm(backbone.out_dim_size),
            torch.nn.Linear(backbone.out_dim_size, len(TARGET_LIST)),
            # standard_init_layer_norm(
            #     torch.nn.LayerNorm(backbone.out_dim_size)),
            # standard_init_linear(
            #     torch.nn.Linear(backbone.out_dim_size, len(TARGET_LIST)),
            #     std=backbone.initializer_range if backbone.initializer_range is not None else 0.02)
        ))
    optimizer = create_nakama_optimizer(
        model=model,
        encoder_lr=ENCODER_LR,
        decoder_lr=DECODER_LR)
#     optimizer = create_llrd_optimizer(
#         model=model,
#         layerwise_lr=LAYERWISE_LR,
#         layerwise_lr_decay=LAYERWISE_LR_DECAY,
#         layerwise_weight_decay=LAYERWISE_WEIGHT_DECAY,
#         eps=LAYERWISE_ADAM_EPS)

    train_df, valid_df = all_df[all_df['fold'] != fold], all_df[all_df['fold'] == fold]

    train_dataset = Dataset(
        df=train_df,
        tokenizer=tokenizer,
        max_len=MAX_LEN,
        target_list=TARGET_LIST)
    valid_dataset = Dataset(
        df=valid_df,
        tokenizer=tokenizer,
        max_len=MAX_LEN,
        target_list=TARGET_LIST)
    valid_dataset.sort_by_tokenizer_input_len()

    num_training_steps = (len(train_dataset) * NUM_EPOCHS) // (BATCH_SIZE * ACCUMULATE_GRADIENT_STEPS)

    trainer: FullCycleTrainer[TokenizerResult] = FullCycleTrainer(
        iteration_trainer=IterationTrainer(
            model=model,
            criterion=MSELoss(),
            optimizer=optimizer,
            scheduler=create_cosine_scheduler_with_warmup(
                optimizer=optimizer,
                num_training_steps=num_training_steps,
                warmup_steps_ratio=0.0,
                num_cycles=0.5),
            pred_quality_metric_list=[
                MCRMSEMetric(),
            ],
            device=DEVICE,
            max_grad_norm=MAX_GRAD_NORM,
            accumulate_gradient_steps=ACCUMULATE_GRADIENT_STEPS,
            progress_bar=NotebookProgressBar()),
        train_iter_planner_builder=FixedSubsetIterPlannerBuilder(FracSubsetSize(VAL_FREQ)),
        batch_size=BATCH_SIZE,
        collator=DatasetItemCollator(
            id_collate_fn=lambda x: sum(x, []),
            x_collate_fn=TokenizerResultCollator(DataCollatorWithPadding(tokenizer.tokenizer))),
        num_epochs=NUM_EPOCHS,
        num_workers=NUM_WORKERS,
        model_comparison_metric=MCRMSEMetric.valid_name(),
        model_comparison_metric_criteria=MCRMSEMetric.criteria,
        save_model_to_path=format_path(MODEL_PATH_TEMPLATE, fold=str(fold)) \
            if MODEL_PATH_TEMPLATE is not None else None,
        logger_list=filter_maybe_list([
            StdOutLogger(),
            WAndBLogger(
                user_name='andrei-papou',
                project='fp-ell',
                run_id=RUN_ID,
                metric_prefix=f'f{fold}'
            ) if not IS_PRERUN else None,
        ]))

    return trainer.do_full_cycle(train_dataset, valid_dataset)

score_list, oof_pred_dict = train_kfold_model(
    train_model_fn=_train_model,
    fold_list=FOLD_LIST)
oof_pred_dict.save_to_csv(
    OOF_PATH,
    score_col_name_list=[f'{target}_score' for target in TARGET_LIST])
build_fold_result_df(fold_list=FOLD_LIST, score_list=score_list)

Downloading:   0%|          | 0.00/579 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/371M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.46M [00:00<?, ?B/s]

  "The sentencepiece tokenizer that you are converting to a fast tokenizer uses the byte fallback option"


  0%|          | 0/782 [00:00<?, ?it/s]

Training.:   0%|          | 0/391 [00:00<?, ?it/s]

  torch.nn.utils.clip_grad.clip_grad_norm(self._model.parameters(), self._max_grad_norm)


Validating.:   0%|          | 0/391 [00:00<?, ?it/s]

Step 1 metrics:
	train_loss = 1.04995036
	train_mcrmse = 1.01585007
	valid_loss = 0.23938499
	valid_mcrmse = 0.48853245
Best metric improved from inf to 0.4885324537754059. Saving the model.


Training.:   0%|          | 0/391 [00:00<?, ?it/s]

Validating.:   0%|          | 0/391 [00:00<?, ?it/s]

Step 2 metrics:
	train_loss = 0.24727812
	train_mcrmse = 0.49654198
	valid_loss = 0.26382652
	valid_mcrmse = 0.51213175


Training.:   0%|          | 0/391 [00:00<?, ?it/s]

Validating.:   0%|          | 0/391 [00:00<?, ?it/s]

Step 3 metrics:
	train_loss = 0.24685232
	train_mcrmse = 0.49622473
	valid_loss = 0.21571362
	valid_mcrmse = 0.46382320
Best metric improved from 0.4885324537754059 to 0.46382319927215576. Saving the model.


Training.:   0%|          | 0/392 [00:00<?, ?it/s]

Validating.:   0%|          | 0/391 [00:00<?, ?it/s]

Step 4 metrics:
	train_loss = 0.23647854
	train_mcrmse = 0.48524800
	valid_loss = 0.22665435
	valid_mcrmse = 0.47507238


Training.:   0%|          | 0/391 [00:00<?, ?it/s]

Validating.:   0%|          | 0/391 [00:00<?, ?it/s]

Step 5 metrics:
	train_loss = 0.22067574
	train_mcrmse = 0.46951696
	valid_loss = 0.21052894
	valid_mcrmse = 0.45815530
Best metric improved from 0.46382319927215576 to 0.4581553041934967. Saving the model.


Training.:   0%|          | 0/391 [00:00<?, ?it/s]

Validating.:   0%|          | 0/391 [00:00<?, ?it/s]

Step 6 metrics:
	train_loss = 0.21270369
	train_mcrmse = 0.46055818
	valid_loss = 0.24324995
	valid_mcrmse = 0.49217263


Training.:   0%|          | 0/391 [00:00<?, ?it/s]

Validating.:   0%|          | 0/391 [00:00<?, ?it/s]

Step 7 metrics:
	train_loss = 0.20840093
	train_mcrmse = 0.45589909
	valid_loss = 0.21127456
	valid_mcrmse = 0.45875350


Training.:   0%|          | 0/392 [00:00<?, ?it/s]

Validating.:   0%|          | 0/391 [00:00<?, ?it/s]

Step 8 metrics:
	train_loss = 0.20724832
	train_mcrmse = 0.45433632
	valid_loss = 0.20938703
	valid_mcrmse = 0.45688498
Best metric improved from 0.4581553041934967 to 0.4568849802017212. Saving the model.


Training.:   0%|          | 0/391 [00:00<?, ?it/s]

Validating.:   0%|          | 0/391 [00:00<?, ?it/s]

Step 9 metrics:
	train_loss = 0.19923446
	train_mcrmse = 0.44616547
	valid_loss = 0.20518304
	valid_mcrmse = 0.45234463
Best metric improved from 0.4568849802017212 to 0.4523446261882782. Saving the model.


Training.:   0%|          | 0/391 [00:00<?, ?it/s]

Validating.:   0%|          | 0/391 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f17f0758950>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1510, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1493, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.7/multiprocessing/process.py", line 151, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f17f0758950>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1510, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1493, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/pytho

Step 10 metrics:
	train_loss = 0.19668801
	train_mcrmse = 0.44293961
	valid_loss = 0.20653459
	valid_mcrmse = 0.45378748


Training.:   0%|          | 0/391 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f17f0758950>
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1510, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py", line 1493, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.7/multiprocessing/process.py", line 151, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: Traceback (most recent call last):
  File "/usr/lib/python3.7/multiprocessing/queues.py", line 242, in _feed
    send_bytes(obj)
<function _MultiProcessingDataLoaderIter.__del__ at 0x7f17f0758950>
  File "/usr/lib/python3.7/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
Traceback (most recent call last):
  File "/usr/lib/python3.7/m

Validating.:   0%|          | 0/391 [00:00<?, ?it/s]

Step 11 metrics:
	train_loss = 0.18569387
	train_mcrmse = 0.43014660
	valid_loss = 0.20827511
	valid_mcrmse = 0.45560539


Training.:   0%|          | 0/392 [00:00<?, ?it/s]

Validating.:   0%|          | 0/391 [00:00<?, ?it/s]

Step 12 metrics:
	train_loss = 0.18644601
	train_mcrmse = 0.43095016
	valid_loss = 0.20188603
	valid_mcrmse = 0.44866946
Best metric improved from 0.4523446261882782 to 0.4486694633960724. Saving the model.


Training.:   0%|          | 0/391 [00:00<?, ?it/s]

Validating.:   0%|          | 0/391 [00:00<?, ?it/s]

Step 13 metrics:
	train_loss = 0.17845033
	train_mcrmse = 0.42227912
	valid_loss = 0.22481112
	valid_mcrmse = 0.47301641


Training.:   0%|          | 0/391 [00:00<?, ?it/s]

Validating.:   0%|          | 0/391 [00:00<?, ?it/s]

Step 14 metrics:
	train_loss = 0.17659421
	train_mcrmse = 0.41976973
	valid_loss = 0.20957839
	valid_mcrmse = 0.45712140


Training.:   0%|          | 0/391 [00:00<?, ?it/s]

Validating.:   0%|          | 0/391 [00:00<?, ?it/s]

Step 15 metrics:
	train_loss = 0.17037591
	train_mcrmse = 0.41208312
	valid_loss = 0.20590596
	valid_mcrmse = 0.45307636


Training.:   0%|          | 0/392 [00:00<?, ?it/s]

Validating.:   0%|          | 0/391 [00:00<?, ?it/s]

Step 16 metrics:
	train_loss = 0.17119522
	train_mcrmse = 0.41307375
	valid_loss = 0.20573443
	valid_mcrmse = 0.45292822


Training.:   0%|          | 0/391 [00:00<?, ?it/s]

Validating.:   0%|          | 0/391 [00:00<?, ?it/s]

Step 17 metrics:
	train_loss = 0.16422185
	train_mcrmse = 0.40512049
	valid_loss = 0.20470193
	valid_mcrmse = 0.45166692


Training.:   0%|          | 0/391 [00:00<?, ?it/s]

Validating.:   0%|          | 0/391 [00:00<?, ?it/s]

Step 18 metrics:
	train_loss = 0.15648787
	train_mcrmse = 0.39515948
	valid_loss = 0.20710136
	valid_mcrmse = 0.45436701


Training.:   0%|          | 0/391 [00:00<?, ?it/s]

Validating.:   0%|          | 0/391 [00:00<?, ?it/s]

Step 19 metrics:
	train_loss = 0.15712252
	train_mcrmse = 0.39585316
	valid_loss = 0.20433597
	valid_mcrmse = 0.45134714


Training.:   0%|          | 0/392 [00:00<?, ?it/s]

Validating.:   0%|          | 0/391 [00:00<?, ?it/s]

Step 20 metrics:
	train_loss = 0.16370404
	train_mcrmse = 0.40400317
	valid_loss = 0.20431031
	valid_mcrmse = 0.45131826


Unnamed: 0,fold,score
0,0,0.448669


#### Environment shutdown

In [15]:
if ENVIRONMENT == 'colab':
    from google.colab import runtime  # type: ignore

    runtime.unassign()