# Pip Wheels

In [1]:
'''
!pip install pytorch_lightning
!pip install torchmetrics
!pip install tokenizers
!pip install transformers
!pip install ray[tune]
'''

'\n!pip install pytorch_lightning\n!pip install torchmetrics\n!pip install tokenizers\n!pip install transformers\n!pip install ray[tune]\n'

# Imports

In [2]:
# General Libraries
import os
import re
import random
import numpy as np
import pandas as pd
import scipy as sp
from tqdm.auto import tqdm
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Any, Dict, List, Optional, Type
from copy import deepcopy

# PyTorch Lightning
import pytorch_lightning as pl
from pytorch_lightning import LightningDataModule, seed_everything, Trainer, LightningModule
from torchmetrics import Accuracy
from torchmetrics.functional import f1_score, auroc
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.loops.loop import Loop
from pytorch_lightning.loops.fit_loop import FitLoop
from pytorch_lightning.trainer.states import TrainerFn

# PyTorch
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, Subset

# Scikit-learn
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, multilabel_confusion_matrix
from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold

# Ray[Tune]
import ray
from ray import air
from ray import tune
from ray.air import session
from ray.tune.integration.pytorch_lightning import TuneReportCallback


# HuggingFace Libraries
import tokenizers
import transformers 
from transformers import AutoTokenizer, AutoModel, AutoConfig
from transformers import get_linear_schedule_with_warmup, AdamW, get_cosine_schedule_with_warmup
%env TOKENIZERS_PARALLELISM=true

env: TOKENIZERS_PARALLELISM=true


In [3]:
ray.init(num_gpus=4)

2022-11-06 20:03:43,285	INFO worker.py:1518 -- Started a local Ray instance.


0,1
Python version:,3.8.10
Ray version:,2.0.1


# Configuration

## Configuration Class: notebook-specific settings

In [4]:
class CFG:
    # General
    seed = 42
    
    # Debug 
    debug = True
    debug_samples = 1000

## Configuration Dictionary: trial-specific settings

In [5]:
# Defining a search space!
config_dict = {
    "target_size" : 1,
    "num_workers" : 8,
    
    # Training parameters
    "batch_size" : 64,
    "epochs" : 2,
    "n_fold" : 2,
    "warmup_steps" : 0,
    "min_lr" : 1e-6,
    "encoder_lr" : 2e-5,
    "decoder_lr" : 2e-5,
    "eps" : 1e-6,
    "betas" : (0.9, 0.999),
    "weight_decay" : 0.01,
    "fc_dropout" : 0.2,

    # Transformers
    # "model" : tune.grid_search(["distilbert-base-uncased", "microsoft/deberta-v3-large"]),
    # "model" : tune.choice(["microsoft/deberta-v3-large"]),
    # "model" : tune.choice(["distilbert-base-uncased"]),
    "model" : "distilbert-base-uncased",
}

## Directories

In [6]:
INPUT_DIR = '../dataset/us-patent-phrase-to-phrase-matching/'
OUTPUT_DIR = './'
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

## Logger

In [7]:
logger = TensorBoardLogger("lightning_logs", name="USPPPM")

## Random seed

In [8]:
pl.seed_everything(CFG.seed)

Global seed set to 42


42

## Scoring Function

In [9]:
def get_score(y_true, y_pred):
    score = sp.stats.pearsonr(y_true, y_pred)[0]
    return score

# Data Loading

In [10]:
cpc_texts = torch.load('cpc_texts.pth')
dataframe = pd.read_csv("dataframe.csv")
display(dataframe.head())

Unnamed: 0.1,Unnamed: 0,id,anchor,target,context,score,context_text,text,score_map
0,0,37d61fd2272659b1,abatement,abatement of pollution,A47,0.5,HUMAN NECESSITIES. FURNITURE; DOMESTIC ARTICLE...,abatement[SEP]abatement of pollution[SEP]HUMAN...,2
1,1,7b9652b17b68b7a4,abatement,act of abating,A47,0.75,HUMAN NECESSITIES. FURNITURE; DOMESTIC ARTICLE...,abatement[SEP]act of abating[SEP]HUMAN NECESSI...,3
2,2,36d72442aefd8232,abatement,active catalyst,A47,0.25,HUMAN NECESSITIES. FURNITURE; DOMESTIC ARTICLE...,abatement[SEP]active catalyst[SEP]HUMAN NECESS...,1
3,3,5296b0c19e1ce60e,abatement,eliminating process,A47,0.5,HUMAN NECESSITIES. FURNITURE; DOMESTIC ARTICLE...,abatement[SEP]eliminating process[SEP]HUMAN NE...,2
4,4,54c1e3b9184cb5b6,abatement,forest region,A47,0.0,HUMAN NECESSITIES. FURNITURE; DOMESTIC ARTICLE...,abatement[SEP]forest region[SEP]HUMAN NECESSIT...,0


## Debug Slicing

In [11]:
if CFG.debug:
    dataframe = dataframe.iloc[:CFG.debug_samples,:]

## Train-Test Split

In [12]:
train_df, test_df = train_test_split(dataframe, test_size = 0.1, random_state = CFG.seed, stratify = dataframe.score_map)
display(train_df.head())
display(test_df.head())

Unnamed: 0.1,Unnamed: 0,id,anchor,target,context,score,context_text,text,score_map
983,983,d336ee90bdc74b1c,air flow line,fluid flow line,B63,0.75,PERFORMING OPERATIONS; TRANSPORTING. SHIPS OR ...,air flow line[SEP]fluid flow line[SEP]PERFORMI...,3
700,700,e5a6dccf738babe3,adjacent laterally,adjacent to mall,A41,0.0,HUMAN NECESSITIES. WEARING APPAREL,adjacent laterally[SEP]adjacent to mall[SEP]HU...,0
463,463,f030009ac7858e1b,acrylate groups,interpolymer invention,D21,0.25,TEXTILES; PAPER. PAPER-MAKING; PRODUCTION OF C...,acrylate groups[SEP]interpolymer invention[SEP...,1
926,926,0136064bfb779543,agitate means,muscle shivering,B01,0.0,PERFORMING OPERATIONS; TRANSPORTING. PHYSICAL ...,agitate means[SEP]muscle shivering[SEP]PERFORM...,0
235,235,e865c688d8198872,accept information,information data,A45,0.25,HUMAN NECESSITIES. HAND OR TRAVELLING ARTICLES,accept information[SEP]information data[SEP]HU...,1


Unnamed: 0.1,Unnamed: 0,id,anchor,target,context,score,context_text,text,score_map
719,719,9658a68dedd1b4cc,adjacent laterally,radius,A41,0.25,HUMAN NECESSITIES. WEARING APPAREL,adjacent laterally[SEP]radius[SEP]HUMAN NECESS...,1
256,256,d2088cdd8be8761b,achieve authentication,biometric,H04,0.25,ELECTRICITY. ELECTRIC COMMUNICATION TECHNIQUE,achieve authentication[SEP]biometric[SEP]ELECT...,1
548,548,8ed41bd0deb21205,activating position,active material,G06,0.0,PHYSICS. COMPUTING; CALCULATING; COUNTING,activating position[SEP]active material[SEP]PH...,0
473,473,07f1cfe84cd4ebdc,acrylate groups,nitro group,D21,0.0,TEXTILES; PAPER. PAPER-MAKING; PRODUCTION OF C...,acrylate groups[SEP]nitro group[SEP]TEXTILES; ...,0
308,308,f798712a28d6660b,acid absorption,rosmarinic acid,B01,0.25,PERFORMING OPERATIONS; TRANSPORTING. PHYSICAL ...,acid absorption[SEP]rosmarinic acid[SEP]PERFOR...,1


# Dataset Preparation

## Tokenizer

In [13]:
def set_tokenizer(config_dict):
    tokenizer = AutoTokenizer.from_pretrained(config_dict['model'])
    tokenizer.save_pretrained(OUTPUT_DIR+'tokenizer/')
    config_dict['tokenizer'] = tokenizer

## Maximum length

In [14]:
def set_max_len(config_dict, cpc_texts=cpc_texts, train_df=dataframe):
    tokenizer = config_dict['tokenizer']
    lengths_dict = {}

    lengths = []
    tk0 = tqdm(cpc_texts.values(), total=len(cpc_texts))
    for text in tk0:
        length = len(tokenizer(text, add_special_tokens=False)['input_ids'])
        lengths.append(length)
    lengths_dict['context_text'] = lengths

    for text_col in ['anchor', 'target']:
        lengths = []
        tk0 = tqdm(train_df[text_col].fillna("").values, total=len(train_df))
        for text in tk0:
            length = len(tokenizer(text, add_special_tokens=False)['input_ids'])
            lengths.append(length)
        lengths_dict[text_col] = lengths

    config_dict['max_len'] = max(lengths_dict['anchor']) + max(lengths_dict['target'])\
                    + max(lengths_dict['context_text']) + 4 # CLS + SEP + SEP + SEP

## Dataset

In [15]:
def prepare_input(config_dict, text):
    tokenizer = config_dict['tokenizer']
    inputs = tokenizer(text,
                       add_special_tokens = True,
                       max_length = config_dict['max_len'],
                       padding = "max_length",
                       return_offsets_mapping = False)
    for k, v in inputs.items():
        inputs[k] = torch.tensor(v, dtype=torch.long)
    return inputs

In [16]:
class USPPM_dataset(Dataset):
    def __init__(self, config_dict, train_df, train=True):
        self.config_dict = config_dict
        self.texts = train_df['text'].values
        self.train = train
        if train:
            self.labels = train_df['score'].values
            self.score_map = train_df['score_map'].values

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, item):
        inputs = prepare_input(self.config_dict, self.texts[item])
        if self.train:
            labels = torch.tensor(self.labels[item], dtype=torch.float)
            return dict(
                  inputs = inputs,
                  labels = labels
            )
        else:
            return dict(
                  inputs = inputs
            )

# K-Fold 

## KFold DataModule definition                       

In [17]:
class BaseKFoldDataModule(LightningDataModule, ABC):
    @abstractmethod
    def setup_folds(self, num_folds: int) -> None:
        pass

    @abstractmethod
    def setup_fold_index(self, fold_index: int) -> None:
        pass

## KFoldDataModule implementation

In [18]:
@dataclass
class USPPPM_kf_datamodule(BaseKFoldDataModule):
    def __init__(self, config_dict, dataframe = dataframe):
        
        self.config_dict = config_dict
        self.prepare_data_per_node = False
        self._log_hyperparams = False
        
        train_dataset: Optional[Dataset] = None
        test_dataset: Optional[Dataset] = None
        train_fold: Optional[Dataset] = None
        val_fold: Optional[Dataset] = None
        
        self.dataframe = dataframe
            
    def setup(self, stage: Optional[str] = None) -> None:
        train_df, test_df = train_test_split(self.dataframe, test_size = 0.1, random_state = CFG.seed, stratify = self.dataframe.score_map)
        self.train_dataset = USPPM_dataset(self.config_dict, train_df)
        self.test_dataset = USPPM_dataset(self.config_dict, test_df)

    def setup_folds(self, num_folds: int) -> None:
        self.num_folds = num_folds
        Fold = StratifiedKFold(n_splits=self.num_folds, shuffle=True)
        self.splits = [split for split in Fold.split(self.train_dataset, self.train_dataset.score_map)]

    def setup_fold_index(self, fold_index: int) -> None:
        train_indices, val_indices = self.splits[fold_index]
        self.train_fold = Subset(self.train_dataset, train_indices)
        self.val_fold = Subset(self.train_dataset, val_indices)
        print("TRAIN FOLD", fold_index + 1, len(self.train_fold))
        print("VALID FOLD", fold_index + 1, len(self.val_fold))

    def train_dataloader(self) -> DataLoader:
        return DataLoader(self.train_fold, num_workers = self.config_dict['num_workers'], batch_size = self.config_dict['batch_size'])

    def val_dataloader(self) -> DataLoader:
        return DataLoader(self.val_fold, num_workers = self.config_dict['num_workers'], batch_size = self.config_dict['batch_size'])
    
    def test_dataloader(self) -> DataLoader:
        return DataLoader(self.test_dataset, num_workers = self.config_dict['num_workers'], batch_size = self.config_dict['batch_size'])
    
    def __post_init__(cls):
        super().__init__()

## Ensemble Model for kfold

In [19]:
class EnsembleVotingModel(LightningModule):
    def __init__(self, model_cls: Type[LightningModule], checkpoint_paths: List[str]):
        super().__init__()
        # Create `num_folds` models with their associated fold weights
        self.models = torch.nn.ModuleList([model_cls.load_from_checkpoint(p) for p in checkpoint_paths])
        self.last_acc = Accuracy()

    def test_step(self, batch: Any, batch_idx: int, dataloader_idx: int = 0) -> None:
        # Compute the averaged predictions over the `num_folds` models.
        logits = torch.stack([m(batch[0]) for m in self.models])

        avg_logits = logits.mean(0)
        acc = self.last_acc(avg_logits, batch[1])

        accs = torch.stack([self.last_acc(logit, batch[1]) for logit in logits])
        avg_acc = accs.mean(0)
        self.log('voting acc', acc)
        print('accs print', accs)
        print('avg_acc print', avg_acc)
        self.log('avg_acc', avg_acc)

## KFoldLoop implementation

In [20]:
class KFoldLoop(Loop):
    def __init__(self, num_folds: int, export_path: str) -> None:
        super().__init__()
        self.num_folds = num_folds
        self.current_fold: int = 0
        self.export_path = export_path

    @property
    def done(self) -> bool:
        return self.current_fold >= self.num_folds

    def connect(self, fit_loop: FitLoop) -> None:
        self.fit_loop = fit_loop

    def reset(self) -> None:
        """Nothing to reset in this loop."""

    def on_run_start(self, *args: Any, **kwargs: Any) -> None:
        """Used to call `setup_folds` from the `BaseKFoldDataModule` instance and store the original weights of the model."""
        assert isinstance(self.trainer.datamodule, BaseKFoldDataModule)
        self.trainer.datamodule.setup_folds(self.num_folds)
        self.lightning_module_state_dict = deepcopy(self.trainer.lightning_module.state_dict())

    def on_advance_start(self, *args: Any, **kwargs: Any) -> None:
        """Used to call `setup_fold_index` from the `BaseKFoldDataModule` instance."""
        print(f"STARTING FOLD {self.current_fold+1}")
        assert isinstance(self.trainer.datamodule, BaseKFoldDataModule)
        self.trainer.datamodule.setup_fold_index(self.current_fold)

    def advance(self, *args: Any, **kwargs: Any) -> None:
        """Used to the run a fitting and testing on the current hold."""
        self._reset_fitting()  # requires to reset the tracking stage.
        self.fit_loop.run()

        self._reset_testing()  # requires to reset the tracking stage.
        self.trainer.test_loop.run()
        print('TEST for FOLD', self.current_fold+1)
        
        self.current_fold += 1  # increment fold tracking number.

    def on_advance_end(self) -> None:
        """Used to save the weights of the current fold and reset the LightningModule and its optimizers."""
        self.trainer.save_checkpoint(os.path.join(self.export_path, f"model.{self.current_fold}.pt"))
        # restore the original weights + optimizers and schedulers.
        self.trainer.lightning_module.load_state_dict(self.lightning_module_state_dict)
        self.trainer.strategy.setup_optimizers(self.trainer)
        self.replace(fit_loop=FitLoop)

    def on_run_end(self) -> None:
        """Used to compute the performance of the ensemble model on the test set."""
        checkpoint_paths = [os.path.join(self.export_path, f"model.{f_idx + 1}.pt") for f_idx in range(self.num_folds)]
        voting_model = EnsembleVotingModel(type(self.trainer.lightning_module), checkpoint_paths)
        voting_model.trainer = self.trainer

        # This requires to connect the new model and move it the right device.
        self.trainer.strategy.connect(voting_model)
        self.trainer.strategy.model_to_device()
        self.trainer.test_loop.run()

    def on_save_checkpoint(self) -> Dict[str, int]:
        return {"current_fold": self.current_fold}

    def on_load_checkpoint(self, state_dict: Dict) -> None:
        self.current_fold = state_dict["current_fold"]

    def _reset_fitting(self) -> None:
        self.trainer.reset_train_dataloader()
        self.trainer.reset_val_dataloader()
        self.trainer.state.fn = TrainerFn.FITTING
        self.trainer.training = True

    def _reset_testing(self) -> None:
        self.trainer.reset_test_dataloader()
        self.trainer.state.fn = TrainerFn.TESTING
        self.trainer.testing = True

    def __getattr__(self, key) -> Any:
        # requires to be overridden as attributes of the wrapped loop are being accessed.
        if key not in self.__dict__:
            return getattr(self.fit_loop, key)
        return self.__dict__[key]

# Model

In [21]:
class USPPPM_model(pl.LightningModule):
    def __init__(self, config_dict=config_dict, config_path=None, pretrained=True):
        super().__init__()
        
        if config_path is None:
            self.config = AutoConfig.from_pretrained(config_dict['model'], output_hidden_states = True)
        else:
            self.config = torch.load(config_path)
        
        self.config_dict = config_dict
        self.n_warmup_steps = config_dict['warmup_steps']
        self.n_training_steps = config_dict['training_steps']
        self.criterion = nn.BCEWithLogitsLoss(reduction="mean")
        
        if pretrained:
            self.model = AutoModel.from_pretrained(config_dict['model'], config = self.config)
        else:
            self.model = AutoModel.from_config(self.config)
            
        self.fc_dropout = nn.Dropout(config_dict['fc_dropout'])
        self.fc = nn.Linear(self.config.hidden_size, config_dict['target_size'])
        self._init_weights(self.fc)
        self.attention = nn.Sequential(
            nn.Linear(self.config.hidden_size, 512),
            nn.Tanh(),
            nn.Linear(512, 1),
            nn.Softmax(dim=1)
        )
        
        self.batch_labels = []
        self._init_weights(self.attention)
        
    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.bias is not None:
                module.bias.data.zero_()
        elif isinstance(module, nn.Embedding):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.padding_idx is not None:
                module.weight.data[module.padding_idx].zero_()
        elif isinstance(module, nn.LayerNorm):
            module.bias.data.zero_()
            module.weight.data.fill_(1.0)
    
    def feature(self, inputs):
        outputs = self.model(**inputs)
        last_hidden_states = outputs[0]
        weights = self.attention(last_hidden_states)
        feature = torch.sum(weights * last_hidden_states, dim=1)
        return feature

    def forward(self, inputs, labels=None):
        feature = self.feature(inputs)
        output = self.fc(self.fc_dropout(feature))
        
        loss = 0
        if labels is not None:
            loss = self.criterion(output, labels)
        return loss, output
    
    def training_step(self, batch, batch_idx):
        inputs = batch["inputs"]
        labels = batch["labels"]
        loss, outputs = self(inputs, labels.unsqueeze(1))
        self.log("train_loss", loss, prog_bar=True, logger=True)
        # session.report({"train_loss": loss})  # Send the score to Tune.
        return {"loss": loss, "predictions": outputs, "labels": labels}

    def validation_step(self, batch, batch_idx):
        inputs = batch["inputs"]
        labels = batch["labels"]
        loss, outputs = self(inputs, labels.unsqueeze(1))
        self.log("val_loss", loss, prog_bar=True, logger=True)
        # session.report({"val_loss": loss})  # Send the score to Tune.
        return {"loss": loss, "predictions": outputs, "labels": labels}

    def test_step(self, batch, batch_idx):
        inputs = batch["inputs"]
        labels = batch["labels"]
        loss, outputs = self(inputs, labels.unsqueeze(1))
        self.log("test_loss", loss, prog_bar=True, logger=True)
        # session.report({"test_loss": loss})  # Send the score to Tune.
        return {"loss": loss, "predictions": outputs, "labels": labels}
    
    def validation_epoch_end(self, batch_results):
        outputs, labels, losses = [], [], []
        for batch in batch_results:
            outputs.append(batch['predictions'])
            labels.append(batch['labels'])
            losses.append(batch['loss'])

        labels = torch.cat(labels).cpu().numpy()
        predictions = np.concatenate(torch.cat(outputs).sigmoid().to('cpu').numpy())
        score = get_score(labels, predictions)
        self.log("val_score", score, prog_bar=True, logger=True)
        # tune.report({"val_score": score})  # Send the score to Tune.

    def configure_optimizers(self):
        optimizer = AdamW(self.parameters(), lr=self.config_dict['encoder_lr'])
        # optimizer = AdamW(self.parameters(), lr=2e-5)
        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=self.n_warmup_steps,
            num_training_steps=self.n_training_steps
        )
        return dict(
          optimizer=optimizer,
          lr_scheduler=dict(
            scheduler=scheduler,
            interval='step'
          )
        )

# Training

## Callbacks

In [22]:
checkpoint_callback = ModelCheckpoint(
    dirpath="checkpoints",
    filename="best_checkpoint",
    save_top_k=1,
    verbose=True,
    monitor='val_loss',
    mode='min'
)

early_stopping_callback = EarlyStopping(monitor='val_loss', patience=2)

metrics = {"val_score": "val_score", "train_loss" : "train_loss", "val_loss" : "val_loss"}

In [23]:
def trainable(config_dict):  # Pass a "config" dictionary into your trainable.

    steps_per_epoch = len(train_df) // config_dict['batch_size']
    config_dict['training_steps'] = steps_per_epoch * config_dict['epochs']
    
    set_tokenizer(config_dict)
    set_max_len(config_dict)
    # train_dataset = USPPM_dataset(config_dict)
    datamodule = USPPPM_kf_datamodule(config_dict, dataframe)
    
    model = USPPPM_model(config_dict)
    
    callbacks = [TuneReportCallback(metrics, on="validation_end")]
    trainer = pl.Trainer(
            logger=logger,
            num_sanity_val_steps=0,
            check_val_every_n_epoch=1,
            callbacks=callbacks,
            max_epochs=config_dict['epochs'],
            devices=[1],
            accelerator="gpu",
            )
    
    internal_fit_loop = trainer.fit_loop
    trainer.fit_loop = KFoldLoop(config_dict['n_fold'], export_path="./")
    trainer.fit_loop.connect(internal_fit_loop)
    
    trainer.fit(model, datamodule)

In [24]:
tuner = tune.Tuner(tune.with_resources(trainable, 
                                       {"gpu": 4}), 
                                       param_space = config_dict,
                                       tune_config = tune.TuneConfig(metric="val_score", mode="max"),
                                       # tune_config = tune.TuneConfig(metric="val_score", mode="max"),
                                       run_config = air.RunConfig(name="tune_uspppm", verbose=3)
                                      )
                  

In [25]:
results = tuner.fit()

best_result = results.get_best_result()  # Get best result object
print(best_result)



Trial name,status,loc,iter,total time (s),val_score,train_loss,val_loss
trainable_bdd23_00000,ERROR,131.114.50.210:362371,4,65.7888,0.0805005,0.686017,0.669862

Trial name,# failures,error file
trainable_bdd23_00000,1,/storagenfs/m.petix/ray_results/tune_uspppm/trainable_bdd23_00000_0_2022-11-06_20-03-47/error.txt


100%|██████████| 136/136 [00:00<00:00, 4578.24it/s]
100%|██████████| 1000/1000 [00:00<00:00, 14155.93it/s]
  0%|          | 0/1000 [00:00<?, ?it/s]
100%|██████████| 1000/1000 [00:00<00:00, 12903.33it/s]
[2m[36m(trainable pid=362371)[0m GPU available: True (cuda), used: True
[2m[36m(trainable pid=362371)[0m TPU available: False, using: 0 TPU cores
[2m[36m(trainable pid=362371)[0m IPU available: False, using: 0 IPUs
[2m[36m(trainable pid=362371)[0m HPU available: False, using: 0 HPUs
[2m[36m(trainable pid=362371)[0m   rank_zero_deprecation(
[2m[36m(trainable pid=362371)[0m   rank_zero_deprecation("The `on_init_end` callback hook was deprecated in v1.6 and will be removed in v1.8.")
[2m[36m(trainable pid=362371)[0m   rank_zero_deprecation(
[2m[36m(trainable pid=362371)[0m   rank_zero_deprecation(
[2m[36m(trainable pid=362371)[0m   rank_zero_deprecation(
[2m[36m(trainable pid=362371)[0m   rank_zero_deprecation(
[2m[36m(trainable pid=362371)[0m Missing logge

[2m[36m(trainable pid=362371)[0m STARTING FOLD 1
[2m[36m(trainable pid=362371)[0m TRAIN FOLD 1 450
[2m[36m(trainable pid=362371)[0m VALID FOLD 1 450
Epoch 0:   0%|          | 0/16 [00:00<?, ?it/s] 


[2m[36m(trainable pid=362371)[0m LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
[2m[36m(trainable pid=362371)[0m 
[2m[36m(trainable pid=362371)[0m   | Name       | Type              | Params
[2m[36m(trainable pid=362371)[0m -------------------------------------------------
[2m[36m(trainable pid=362371)[0m 0 | criterion  | BCEWithLogitsLoss | 0     
[2m[36m(trainable pid=362371)[0m 1 | model      | DistilBertModel   | 66.4 M
[2m[36m(trainable pid=362371)[0m 2 | fc_dropout | Dropout           | 0     
[2m[36m(trainable pid=362371)[0m 3 | fc         | Linear            | 769   
[2m[36m(trainable pid=362371)[0m 4 | attention  | Sequential        | 394 K 
[2m[36m(trainable pid=362371)[0m -------------------------------------------------
[2m[36m(trainable pid=362371)[0m 66.8 M    Trainable params
[2m[36m(trainable pid=362371)[0m 0         Non-trainable params
[2m[36m(trainable pid=362371)[0m 66.8 M    Total params
[2m[36m(trainable pid=362371)[0m 26

Epoch 0:   6%|▋         | 1/16 [00:01<00:15,  1.04s/it, loss=0.691, v_num=0, train_loss=0.691]
Epoch 0:  12%|█▎        | 2/16 [00:01<00:09,  1.53it/s, loss=0.681, v_num=0, train_loss=0.671]
Epoch 0:  19%|█▉        | 3/16 [00:01<00:06,  1.92it/s, loss=0.671, v_num=0, train_loss=0.650]
Epoch 0:  25%|██▌       | 4/16 [00:01<00:05,  2.19it/s, loss=0.675, v_num=0, train_loss=0.688]
Epoch 0:  31%|███▏      | 5/16 [00:02<00:04,  2.40it/s, loss=0.668, v_num=0, train_loss=0.639]
Epoch 0:  38%|███▊      | 6/16 [00:02<00:03,  2.56it/s, loss=0.661, v_num=0, train_loss=0.626]
Epoch 0:  50%|█████     | 8/16 [00:02<00:02,  3.02it/s, loss=0.677, v_num=0, train_loss=0.755]
Validation: 0it [00:00, ?it/s][A0m 
[2m[36m(trainable pid=362371)[0m 
Validation:   0%|          | 0/8 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/8 [00:00<?, ?it/s][A
Epoch 0:  56%|█████▋    | 9/16 [00:03<00:02,  2.84it/s, loss=0.677, v_num=0, train_loss=0.755]
[2m[36m(trainable pid=362371)[0m 
Epoch 0:  

[2m[36m(trainable pid=362371)[0m `Trainer.fit` stopped: `max_epochs=2` reached.


Testing DataLoader 0:  50%|█████     | 1/2 [00:00<00:00, 57.41it/s]
Testing DataLoader 0: 100%|██████████| 2/2 [00:00<00:00, 14.75it/s]
[2m[36m(trainable pid=362371)[0m ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
[2m[36m(trainable pid=362371)[0m ┃        Test metric        ┃       DataLoader 0        ┃
[2m[36m(trainable pid=362371)[0m ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
[2m[36m(trainable pid=362371)[0m │         test_loss         │    0.6477512121200562     │
[2m[36m(trainable pid=362371)[0m └───────────────────────────┴───────────────────────────┘
[2m[36m(trainable pid=362371)[0m TEST for FOLD 1
[2m[36m(trainable pid=362371)[0m STARTING FOLD 2
[2m[36m(trainable pid=362371)[0m TRAIN FOLD 2 450
[2m[36m(trainable pid=362371)[0m VALID FOLD 2 450
Epoch 0:   0%|          | 0/16 [00:00<?, ?it/s]              
Epoch 0:   6%|▋         | 1/16 [00:00<00:11,  1.28it/s, loss=0.669, v_num=0, train_loss=0.694, val_loss=0.646, val_score=0

[2m[36m(trainable pid=362371)[0m `Trainer.fit` stopped: `max_epochs=2` reached.


Testing DataLoader 0:  50%|█████     | 1/2 [00:00<00:00, 67.54it/s]
Testing DataLoader 0: 100%|██████████| 2/2 [00:00<00:00, 14.84it/s]
[2m[36m(trainable pid=362371)[0m ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
[2m[36m(trainable pid=362371)[0m ┃        Test metric        ┃       DataLoader 0        ┃
[2m[36m(trainable pid=362371)[0m ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
[2m[36m(trainable pid=362371)[0m │         test_loss         │    0.6688225269317627     │
[2m[36m(trainable pid=362371)[0m └───────────────────────────┴───────────────────────────┘
[2m[36m(trainable pid=362371)[0m TEST for FOLD 2


[2m[36m(trainable pid=362371)[0m 2022-11-06 20:05:25,111	ERROR function_trainable.py:298 -- Runner Thread raised error.
[2m[36m(trainable pid=362371)[0m Traceback (most recent call last):
[2m[36m(trainable pid=362371)[0m   File "/storagenfs/m.petix/.local/lib/python3.8/site-packages/ray/tune/trainable/function_trainable.py", line 289, in run
[2m[36m(trainable pid=362371)[0m     self._entrypoint()
[2m[36m(trainable pid=362371)[0m   File "/storagenfs/m.petix/.local/lib/python3.8/site-packages/ray/tune/trainable/function_trainable.py", line 362, in entrypoint
[2m[36m(trainable pid=362371)[0m     return self._trainable_func(
[2m[36m(trainable pid=362371)[0m   File "/storagenfs/m.petix/.local/lib/python3.8/site-packages/ray/util/tracing/tracing_helper.py", line 466, in _resume_span
[2m[36m(trainable pid=362371)[0m     return method(self, *_args, **_kwargs)
[2m[36m(trainable pid=362371)[0m   File "/storagenfs/m.petix/.local/lib/python3.8/site-packages/ray/tune/trai

Result for trainable_bdd23_00000:
  date: 2022-11-06_20-05-00
  done: false
  experiment_id: 22f475b356ee4d0f94b7d54f8e5c17e3
  experiment_tag: '0'
  hostname: c4130-p100
  iterations_since_restore: 4
  node_ip: 131.114.50.210
  pid: 362371
  time_since_restore: 65.7887601852417
  time_this_iter_s: 14.71964693069458
  time_total_s: 65.7887601852417
  timestamp: 1667761500
  timesteps_since_restore: 0
  train_loss: 0.6860167980194092
  training_iteration: 4
  trial_id: bdd23_00000
  val_loss: 0.6698618531227112
  val_score: 0.08050053239597282
  warmup_time: 0.0051670074462890625
  


2022-11-06 20:05:25,527	ERROR tune.py:754 -- Trials did not complete: [trainable_bdd23_00000]
2022-11-06 20:05:25,528	INFO tune.py:758 -- Total run time: 98.48 seconds (98.17 seconds for the tuning loop).


Result(metrics={'val_score': 0.08050053239597282, 'train_loss': 0.6860167980194092, 'val_loss': 0.6698618531227112, 'done': False, 'trial_id': 'bdd23_00000', 'experiment_tag': '0'}, error=RayTaskError(TypeError)(TypeError("load_from_checkpoint() missing 1 required positional argument: 'checkpoint_path'")), log_dir=PosixPath('/storagenfs/m.petix/ray_results/tune_uspppm/trainable_bdd23_00000_0_2022-11-06_20-03-47'))


In [26]:
ray.shutdown()