# Pip Wheels

In [1]:
'''
!pip install pytorch_lightning
!pip install torchmetrics
!pip install tokenizers
!pip install transformers
!pip install ray[tune]
'''

'\n!pip install pytorch_lightning\n!pip install torchmetrics\n!pip install tokenizers\n!pip install transformers\n!pip install ray[tune]\n'

# Imports

In [2]:
# General Libraries
import os
import re
import random
import numpy as np
import pandas as pd
import scipy as sp



# PyTorch Lightning
import pytorch_lightning as pl
from pytorch_lightning import seed_everything, Trainer, LightningModule
from torchmetrics import Accuracy
from torchmetrics.functional import f1_score, auroc
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping, TQDMProgressBar 
from pytorch_lightning.loggers import TensorBoardLogger



# Scikit-learn
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, multilabel_confusion_matrix


# Ray[Tune]
import ray
from ray import air
from ray import tune
from ray.air import session
from ray.tune.integration.pytorch_lightning import TuneReportCallback

import torch
# HuggingFace Libraries
import tokenizers
import transformers 

%env TOKENIZERS_PARALLELISM=true

#out code 
from kfold_loop import KFoldLoop
from USPPM_model import USPPPM_model
from USPPM_dataset import set_tokenizer, set_max_len
from USPPM_kfold_datamodule import USPPPM_kf_datamodule

from datetime import datetime



env: TOKENIZERS_PARALLELISM=true


# Configuration

## Configuration Class: notebook-specific settings

In [3]:
class CFG:
    # General
    seed = 42
    
    # Debug 
    debug = False
    debug_samples = 100

## Configuration Dictionary: trial-specific settings

In [4]:
# Defining a search space!
config_dict = {
    "target_size" : 1,
    "num_workers" : 16,
    
    # Training parameters
    "batch_size" : tune.grid_search([4,8,16,32,64,128]),
    "epochs" : 20,
    "n_fold" : tune.grid_search([8,16,32]),
    "warmup_steps" : 0,
    "min_lr" : 1e-6,
    "encoder_lr" : 2e-5,
    "decoder_lr" : 2e-5,
    "eps" : 1e-6,
    "betas" : (0.9, 0.999),
    "weight_decay" : 0.01,
    "fc_dropout" : 0.2,
    "seed" : 42,

    # Transformers
    # "model" : tune.choice(["microsoft/deberta-v3-large"]),
    #"model" : tune.choice(["distilbert-base-uncased"]),
    "model" : tune.grid_search(["AI-Growth-Lab/PatentSBERTa","distilbert-base-uncased","ahotrod/electra_large_discriminator_squad2_512",
                                "Yanhao/simcse-bert-for-patent","microsoft/deberta-v3-large","anferico/bert-for-patents"])
}

## Directories

In [5]:
INPUT_DIR = '../dataset/us-patent-phrase-to-phrase-matching/'
OUTPUT_DIR = './'
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

## Logger

In [6]:
logger = TensorBoardLogger("lightning_logs", name="USPPPM")

## Random seed

In [7]:
pl.seed_everything(CFG.seed)

Global seed set to 42


42

# Data Loading

In [8]:
cpc_texts = torch.load('cpc_texts.pth')
dataframe = pd.read_csv("dataframe.csv")
display(dataframe.head())

Unnamed: 0.1,Unnamed: 0,id,anchor,target,context,score,context_text,text,score_map
0,0,37d61fd2272659b1,abatement,abatement of pollution,A47,0.5,HUMAN NECESSITIES. FURNITURE; DOMESTIC ARTICLE...,abatement[SEP]abatement of pollution[SEP]HUMAN...,2
1,1,7b9652b17b68b7a4,abatement,act of abating,A47,0.75,HUMAN NECESSITIES. FURNITURE; DOMESTIC ARTICLE...,abatement[SEP]act of abating[SEP]HUMAN NECESSI...,3
2,2,36d72442aefd8232,abatement,active catalyst,A47,0.25,HUMAN NECESSITIES. FURNITURE; DOMESTIC ARTICLE...,abatement[SEP]active catalyst[SEP]HUMAN NECESS...,1
3,3,5296b0c19e1ce60e,abatement,eliminating process,A47,0.5,HUMAN NECESSITIES. FURNITURE; DOMESTIC ARTICLE...,abatement[SEP]eliminating process[SEP]HUMAN NE...,2
4,4,54c1e3b9184cb5b6,abatement,forest region,A47,0.0,HUMAN NECESSITIES. FURNITURE; DOMESTIC ARTICLE...,abatement[SEP]forest region[SEP]HUMAN NECESSIT...,0


## Debug Slicing

In [9]:
if CFG.debug:
    dataframe = dataframe.iloc[:CFG.debug_samples,:]

## Train-Test Split

In [10]:
# train_df, test_df = train_test_split(dataframe, test_size = 0.1, random_state = CFG.seed, stratify = dataframe.score_map)
train_df, test_df = train_test_split(dataframe, test_size = 0.1, random_state = CFG.seed)
display(train_df.head())
display(test_df.head())

Unnamed: 0.1,Unnamed: 0,id,anchor,target,context,score,context_text,text,score_map
9900,9900,0dbb44b9a145edec,distributor pipe,pipe,B01,0.5,PERFORMING OPERATIONS; TRANSPORTING. PHYSICAL ...,distributor pipe[SEP]pipe[SEP]PERFORMING OPERA...,2
1303,1303,74afca34a5439c23,ammonia recovery,recovery of water,C01,0.25,HEMISTRY; METALLURGY. INORGANIC CHEMISTRY,ammonia recovery[SEP]recovery of water[SEP]HEM...,1
16591,16591,6371befc3ee1b0f2,inner closed,cylindrical inner member,E04,0.5,FIXED CONSTRUCTIONS. BUILDING,inner closed[SEP]cylindrical inner member[SEP]...,2
25822,25822,20489196c73bd86b,produce thin layers,produce layers,G01,0.5,PHYSICS. MEASURING; TESTING,produce thin layers[SEP]produce layers[SEP]PHY...,2
23640,23640,9af994b21c892022,parallel orientation,zero angle,G06,0.25,PHYSICS. COMPUTING; CALCULATING; COUNTING,parallel orientation[SEP]zero angle[SEP]PHYSIC...,1


Unnamed: 0.1,Unnamed: 0,id,anchor,target,context,score,context_text,text,score_map
33511,33511,ed1c4e525eb105fe,transmit alarm,display indicator,G08,0.0,PHYSICS. SIGNALLING,transmit alarm[SEP]display indicator[SEP]PHYSI...,0
18670,18670,5386316f318f5221,locking formation,retaining element,B60,0.25,PERFORMING OPERATIONS; TRANSPORTING. VEHICLES ...,locking formation[SEP]retaining element[SEP]PE...,1
18049,18049,1544ca6753fcbddd,lateral power,transducer,H01,0.25,ELECTRICITY. BASIC ELECTRIC ELEMENTS,lateral power[SEP]transducer[SEP]ELECTRICITY. ...,1
31660,31660,f9d8979b94cec923,spreader body,spreader,A01,0.75,HUMAN NECESSITIES. GRICULTURE; FORESTRY; ANIMA...,spreader body[SEP]spreader[SEP]HUMAN NECESSITI...,3
15573,15573,e151ca5ea5cc0f08,high gradient magnetic separators,magnetic filtration,B03,0.5,PERFORMING OPERATIONS; TRANSPORTING. SEPARATIO...,high gradient magnetic separators[SEP]magnetic...,2


# Training

## Callbacks

In [11]:
ray.init(num_gpus=4)

2022-11-14 11:57:32,190	INFO worker.py:1518 -- Started a local Ray instance.


0,1
Python version:,3.8.10
Ray version:,2.0.1


In [12]:
checkpoint_callback = ModelCheckpoint(
    dirpath="checkpoints",
    filename="best_checkpoint",
    save_top_k=1,
    verbose=True,
    monitor='val_loss',
    mode='min'
)

early_stopping_callback = EarlyStopping(monitor='val_loss', patience=2)

metrics = {"val_score": "val_score", "train_loss" : "train_loss", "val_loss" : "val_loss"}

In [13]:
def trainable(config_dict):  # Pass a "config" dictionary into your trainable.

    steps_per_epoch = len(train_df) // config_dict['batch_size']
    config_dict['training_steps'] = steps_per_epoch * config_dict['epochs']
    
    set_tokenizer(config_dict, OUTPUT_DIR)
    set_max_len(config_dict, cpc_texts, dataframe)
    # train_dataset = USPPM_dataset(config_dict)
    datamodule = USPPPM_kf_datamodule(config_dict, dataframe)
    
    model = USPPPM_model(config_dict)
    
    callbacks = [TuneReportCallback(metrics, on="validation_end"), checkpoint_callback, early_stopping_callback, TQDMProgressBar(refresh_rate=2)]
    trainer = pl.Trainer(
            logger=logger,
            num_sanity_val_steps=0,
            check_val_every_n_epoch=1,
            callbacks=callbacks,
            max_epochs=config_dict['epochs'],
            #devices=[1],
            accelerator="gpu",
        
            )
    
    export_path = f'./checkpoints/{config_dict["model"].replace("/","-")}_bs{config_dict["batch_size"]}_k{config_dict["n_fold"]}_{datetime.now().time()}'
    try:
        
        os.mkdir("checkpoints/")
        os.mkdir(export_path)
    except FileExistsError:
        pass
    
    internal_fit_loop = trainer.fit_loop
    trainer.fit_loop = KFoldLoop(config_dict['n_fold'], config_dict, export_path=export_path)
    trainer.fit_loop.connect(internal_fit_loop)
    
    trainer.fit(model, datamodule)

In [14]:
resource_group = tune.PlacementGroupFactory([{"CPU": 1, "GPU": 1}])

tuner = tune.Tuner(tune.with_resources(trainable, 
                                       {"cpu":0.25,"gpu":1}),
                                       param_space = config_dict,
                                       tune_config = tune.TuneConfig(metric="val_score", mode="max",max_concurrent_trials=4),
                                       # tune_config = tune.TuneConfig(metric="val_score", mode="max"),
                                       run_config = air.RunConfig(name="tune_uspppm", verbose=2, progress_reporter=tune.JupyterNotebookReporter(overwrite=True))
                                       )


Trial name,status,loc,batch_size,model,n_fold
trainable_2544e_00000,RUNNING,131.114.50.210:3796157,4,AI-Growth-Lab/P_83a0,8
trainable_2544e_00001,RUNNING,131.114.50.210:3796198,8,AI-Growth-Lab/P_83a0,8
trainable_2544e_00002,RUNNING,131.114.50.210:3796370,16,AI-Growth-Lab/P_83a0,8
trainable_2544e_00003,RUNNING,131.114.50.210:3797917,32,AI-Growth-Lab/P_83a0,8


In [None]:
results = tuner.fit()

best_result = results.get_best_result()  # Get best result object
print(best_result)

100%|██████████| 136/136 [00:00<00:00, 4997.02it/s]
  0%|          | 0/36473 [00:00<?, ?it/s]
  3%|▎         | 1239/36473 [00:00<00:02, 12383.96it/s]
  7%|▋         | 2722/36473 [00:00<00:02, 13822.27it/s]
 11%|█▏        | 4125/36473 [00:00<00:02, 13913.35it/s]
 15%|█▌        | 5517/36473 [00:00<00:02, 13501.66it/s]
 19%|█▉        | 6992/36473 [00:00<00:02, 13942.82it/s]
 23%|██▎       | 8404/36473 [00:00<00:02, 14000.21it/s]
 27%|██▋       | 9874/36473 [00:00<00:01, 14224.27it/s]
 31%|███▏      | 11413/36473 [00:00<00:01, 14592.18it/s]
 35%|███▌      | 12916/36473 [00:00<00:01, 14724.83it/s]
 40%|███▉      | 14474/36473 [00:01<00:01, 14985.35it/s]
 44%|████▍     | 15974/36473 [00:01<00:01, 14243.16it/s]
 48%|████▊     | 17514/36473 [00:01<00:01, 14578.62it/s]
 52%|█████▏    | 18992/36473 [00:01<00:01, 14637.38it/s]
 60%|██████    | 21976/36473 [00:01<00:01, 14434.50it/s]
 64%|██████▍   | 23474/36473 [00:01<00:00, 14592.63it/s]
 69%|██████▊   | 25022/36473 [00:01<00:00, 14852.66it/s]
 

[2m[36m(trainable pid=3796157)[0m STARTING FOLD 1
[2m[36m(trainable pid=3796157)[0m TRAIN FOLD 1 28721
[2m[36m(trainable pid=3796157)[0m VALID FOLD 1 4104
Epoch 0:   0%|          | 0/8207 [00:00<?, ?it/s] 


[2m[36m(trainable pid=3796157)[0m LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [2]
[2m[36m(trainable pid=3796157)[0m 
[2m[36m(trainable pid=3796157)[0m   | Name       | Type              | Params
[2m[36m(trainable pid=3796157)[0m -------------------------------------------------
[2m[36m(trainable pid=3796157)[0m 0 | model      | MPNetModel        | 109 M 
[2m[36m(trainable pid=3796157)[0m 1 | criterion  | BCEWithLogitsLoss | 0     
[2m[36m(trainable pid=3796157)[0m 2 | fc_dropout | Dropout           | 0     
[2m[36m(trainable pid=3796157)[0m 3 | fc         | Linear            | 769   
[2m[36m(trainable pid=3796157)[0m 4 | attention  | Sequential        | 394 K 
[2m[36m(trainable pid=3796157)[0m -------------------------------------------------
[2m[36m(trainable pid=3796157)[0m 109 M     Trainable params
[2m[36m(trainable pid=3796157)[0m 0         Non-trainable params
[2m[36m(trainable pid=3796157)[0m 109 M     Total params
[2m[36m(trainable pid=3796157

Epoch 0:   0%|          | 2/8207 [00:01<1:29:12,  1.53it/s, loss=0.64, v_num=0, train_loss=0.605]
Epoch 0:   0%|          | 4/8207 [00:01<50:31,  2.71it/s, loss=0.626, v_num=0, train_loss=0.537] 


 64%|██████▍   | 23333/36473 [00:01<00:00, 13512.32it/s]
 68%|██████▊   | 24763/36473 [00:01<00:00, 13743.96it/s]


Epoch 0:   0%|          | 6/8207 [00:01<37:44,  3.62it/s, loss=0.671, v_num=0, train_loss=0.728]
Epoch 0:   0%|          | 8/8207 [00:01<31:16,  4.37it/s, loss=0.666, v_num=0, train_loss=0.725]


 72%|███████▏  | 26138/36473 [00:01<00:00, 13708.71it/s]
 76%|███████▌  | 27602/36473 [00:02<00:00, 13985.03it/s]
 80%|███████▉  | 29004/36473 [00:02<00:00, 13994.93it/s]


Epoch 0:   0%|          | 10/8207 [00:02<27:20,  5.00it/s, loss=0.662, v_num=0, train_loss=0.668]


 83%|████████▎ | 30407/36473 [00:02<00:00, 14001.76it/s]
 87%|████████▋ | 31808/36473 [00:02<00:00, 13853.78it/s]


Epoch 0:   0%|          | 12/8207 [00:02<24:51,  5.49it/s, loss=0.663, v_num=0, train_loss=0.662]
Epoch 0:   0%|          | 14/8207 [00:02<23:09,  5.90it/s, loss=0.666, v_num=0, train_loss=0.656]


 95%|█████████▌| 34702/36473 [00:02<00:00, 14190.64it/s]
 99%|█████████▉| 36122/36473 [00:02<00:00, 14085.80it/s]
100%|██████████| 36473/36473 [00:02<00:00, 13759.27it/s]


Epoch 0:   0%|          | 16/8207 [00:02<21:41,  6.29it/s, loss=0.657, v_num=0, train_loss=0.557]
Epoch 0:   0%|          | 18/8207 [00:02<20:36,  6.62it/s, loss=0.654, v_num=0, train_loss=0.656]
Epoch 0:   0%|          | 20/8207 [00:02<19:43,  6.91it/s, loss=0.648, v_num=0, train_loss=0.570]
Epoch 0:   0%|          | 22/8207 [00:03<18:59,  7.18it/s, loss=0.656, v_num=0, train_loss=0.714]
Epoch 0:   0%|          | 24/8207 [00:03<18:22,  7.43it/s, loss=0.664, v_num=0, train_loss=0.624]
Epoch 0:   0%|          | 26/8207 [00:03<17:50,  7.64it/s, loss=0.657, v_num=0, train_loss=0.620]
Epoch 0:   0%|          | 28/8207 [00:03<17:27,  7.81it/s, loss=0.66, v_num=0, train_loss=0.707] 
Epoch 0:   0%|          | 30/8207 [00:03<17:07,  7.96it/s, loss=0.654, v_num=0, train_loss=0.678]
Epoch 0:   0%|          | 32/8207 [00:03<16:48,  8.11it/s, loss=0.649, v_num=0, train_loss=0.567]
Epoch 0:   0%|          | 34/8207 [00:04<16:30,  8.25it/s, loss=0.657, v_num=0, train_loss=0.830]


100%|██████████| 136/136 [00:00<00:00, 4194.18it/s]
  0%|          | 0/36473 [00:00<?, ?it/s]
  4%|▍         | 1454/36473 [00:00<00:02, 14537.73it/s]


Epoch 0:   0%|          | 36/8207 [00:04<16:13,  8.39it/s, loss=0.656, v_num=0, train_loss=0.660]
Epoch 0:   0%|          | 38/8207 [00:04<15:59,  8.52it/s, loss=0.658, v_num=0, train_loss=0.682]


  8%|▊         | 3001/36473 [00:00<00:02, 15081.74it/s]
 12%|█▏        | 4538/36473 [00:00<00:02, 15210.15it/s]
 21%|██        | 7581/36473 [00:00<00:01, 14934.80it/s]
 25%|██▍       | 9076/36473 [00:00<00:01, 14878.93it/s]


Epoch 0:   0%|          | 40/8207 [00:04<15:46,  8.63it/s, loss=0.666, v_num=0, train_loss=0.590]
Epoch 0:   1%|          | 42/8207 [00:04<15:34,  8.74it/s, loss=0.663, v_num=0, train_loss=0.785]
Epoch 0:   1%|          | 44/8207 [00:04<15:25,  8.82it/s, loss=0.654, v_num=0, train_loss=0.591]


 29%|██▉       | 10565/36473 [00:00<00:01, 14780.68it/s]
 33%|███▎      | 12044/36473 [00:00<00:01, 14359.94it/s]
 41%|████▏     | 15102/36473 [00:01<00:01, 14810.01it/s]
 46%|████▌     | 16631/36473 [00:01<00:01, 14952.15it/s]


Epoch 0:   1%|          | 46/8207 [00:05<15:17,  8.89it/s, loss=0.643, v_num=0, train_loss=0.556]
Epoch 0:   1%|          | 48/8207 [00:05<15:08,  8.98it/s, loss=0.641, v_num=0, train_loss=0.623]


 50%|█████     | 18244/36473 [00:01<00:01, 15305.22it/s]
[2m[36m(trainable pid=3796198)[0m GPU available: True (cuda), used: True
[2m[36m(trainable pid=3796198)[0m TPU available: False, using: 0 TPU cores
[2m[36m(trainable pid=3796198)[0m IPU available: False, using: 0 IPUs
[2m[36m(trainable pid=3796198)[0m HPU available: False, using: 0 HPUs
[2m[36m(trainable pid=3796198)[0m   rank_zero_deprecation(
[2m[36m(trainable pid=3796198)[0m   rank_zero_deprecation("The `on_init_end` callback hook was deprecated in v1.6 and will be removed in v1.8.")
[2m[36m(trainable pid=3796198)[0m   rank_zero_deprecation(
[2m[36m(trainable pid=3796198)[0m   rank_zero_deprecation(
[2m[36m(trainable pid=3796198)[0m   rank_zero_deprecation(
[2m[36m(trainable pid=3796198)[0m   rank_zero_deprecation(
[2m[36m(trainable pid=3796198)[0m Missing logger folder: lightning_logs/USPPPM
[2m[36m(trainable pid=3796198)[0m   rank_zero_warn(f"Checkpoint directory {dirpath} exists and is no

Epoch 0:   1%|          | 50/8207 [00:05<14:59,  9.07it/s, loss=0.638, v_num=0, train_loss=0.712]


 63%|██████▎   | 22971/36473 [00:01<00:00, 15601.57it/s]
 67%|██████▋   | 24532/36473 [00:01<00:00, 15593.66it/s]


Epoch 0:   1%|          | 52/8207 [00:05<14:56,  9.09it/s, loss=0.647, v_num=0, train_loss=0.706]
Epoch 0:   1%|          | 54/8207 [00:05<14:49,  9.16it/s, loss=0.628, v_num=0, train_loss=0.589]


 72%|███████▏  | 26092/36473 [00:01<00:00, 15145.36it/s]
 76%|███████▌  | 27610/36473 [00:01<00:00, 14816.58it/s]
 80%|████████  | 29222/36473 [00:01<00:00, 15192.20it/s]


Epoch 0:   1%|          | 56/8207 [00:06<14:42,  9.24it/s, loss=0.636, v_num=0, train_loss=0.620]


 84%|████████▍ | 30763/36473 [00:02<00:00, 15253.36it/s]
 89%|████████▊ | 32348/36473 [00:02<00:00, 15428.92it/s]


Epoch 0:   1%|          | 58/8207 [00:06<14:35,  9.31it/s, loss=0.636, v_num=0, train_loss=0.670]
Epoch 0:   1%|          | 60/8207 [00:06<14:30,  9.36it/s, loss=0.645, v_num=0, train_loss=0.787]


 93%|█████████▎| 33896/36473 [00:02<00:00, 15442.91it/s]
100%|██████████| 36473/36473 [00:02<00:00, 15215.92it/s]
  0%|          | 0/36473 [00:00<?, ?it/s]


Epoch 0:   1%|          | 62/8207 [00:06<14:27,  9.39it/s, loss=0.648, v_num=0, train_loss=0.750]
Epoch 0:   1%|          | 64/8207 [00:06<14:21,  9.45it/s, loss=0.651, v_num=0, train_loss=0.568]


  4%|▍         | 1504/36473 [00:00<00:02, 15030.73it/s]
  8%|▊         | 3015/36473 [00:00<00:02, 15073.95it/s]
 12%|█▏        | 4552/36473 [00:00<00:02, 15207.72it/s]


Epoch 0:   1%|          | 66/8207 [00:06<14:16,  9.50it/s, loss=0.652, v_num=0, train_loss=0.589]


 17%|█▋        | 6073/36473 [00:00<00:02, 14990.30it/s]
 21%|██        | 7588/36473 [00:00<00:01, 15044.99it/s]


Epoch 0:   1%|          | 68/8207 [00:07<14:12,  9.55it/s, loss=0.651, v_num=0, train_loss=0.676]
Epoch 0:   1%|          | 70/8207 [00:07<14:09,  9.58it/s, loss=0.66, v_num=0, train_loss=0.539] 


 25%|██▍       | 9093/36473 [00:00<00:01, 14968.74it/s]
 29%|██▉       | 10594/36473 [00:00<00:01, 14980.95it/s]
 33%|███▎      | 12148/36473 [00:00<00:01, 15154.37it/s]
 38%|███▊      | 13761/36473 [00:00<00:01, 15456.30it/s]
 42%|████▏     | 15365/36473 [00:01<00:01, 15635.74it/s]


Epoch 0:   1%|          | 72/8207 [00:07<14:04,  9.63it/s, loss=0.662, v_num=0, train_loss=0.727]
Epoch 0:   1%|          | 74/8207 [00:07<14:00,  9.68it/s, loss=0.67, v_num=0, train_loss=0.644] 
Epoch 0:   1%|          | 76/8207 [00:07<13:56,  9.72it/s, loss=0.677, v_num=0, train_loss=0.728]


 51%|█████     | 18480/36473 [00:01<00:01, 15474.10it/s]
 55%|█████▍    | 20028/36473 [00:01<00:01, 15327.53it/s]
[2m[36m(trainable pid=3796198)[0m LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
 59%|█████▉    | 21562/36473 [00:01<00:00, 15006.15it/s]
[2m[36m(trainable pid=3796198)[0m 
[2m[36m(trainable pid=3796198)[0m   | Name       | Type              | Params
[2m[36m(trainable pid=3796198)[0m -------------------------------------------------
[2m[36m(trainable pid=3796198)[0m 0 | model      | MPNetModel        | 109 M 
[2m[36m(trainable pid=3796198)[0m 1 | criterion  | BCEWithLogitsLoss | 0     
[2m[36m(trainable pid=3796198)[0m 2 | fc_dropout | Dropout           | 0     
[2m[36m(trainable pid=3796198)[0m 3 | fc         | Linear            | 769   
[2m[36m(trainable pid=3796198)[0m 4 | attention  | Sequential        | 394 K 
[2m[36m(trainable pid=3796198)[0m -------------------------------------------------
[2m[36m(trainable pid=3796198)[0m 109 M     Train

Epoch 0:   1%|          | 78/8207 [00:07<13:53,  9.75it/s, loss=0.671, v_num=0, train_loss=0.631]
[2m[36m(trainable pid=3796198)[0m STARTING FOLD 1
[2m[36m(trainable pid=3796198)[0m TRAIN FOLD 1 28721
[2m[36m(trainable pid=3796198)[0m VALID FOLD 1 4104
Epoch 0:   0%|          | 0/4104 [00:00<?, ?it/s] 
Epoch 0:   1%|          | 80/8207 [00:08<13:49,  9.80it/s, loss=0.658, v_num=0, train_loss=0.611]


 67%|██████▋   | 24590/36473 [00:01<00:00, 15069.10it/s]
 72%|███████▏  | 26098/36473 [00:01<00:00, 14769.13it/s]
 76%|███████▌  | 27577/36473 [00:01<00:00, 14733.86it/s]


Epoch 0:   1%|          | 82/8207 [00:08<13:46,  9.84it/s, loss=0.655, v_num=0, train_loss=0.685]


 80%|███████▉  | 29059/36473 [00:01<00:00, 14757.47it/s]
 84%|████████▎ | 30536/36473 [00:02<00:00, 14680.52it/s]


Epoch 0:   1%|          | 84/8207 [00:08<13:42,  9.87it/s, loss=0.653, v_num=0, train_loss=0.619]
Epoch 0:   1%|          | 86/8207 [00:08<13:41,  9.88it/s, loss=0.657, v_num=0, train_loss=0.634]


 88%|████████▊ | 32005/36473 [00:02<00:00, 14482.66it/s]
 92%|█████████▏| 33455/36473 [00:02<00:00, 14356.17it/s]


Epoch 0:   1%|          | 88/8207 [00:08<13:39,  9.91it/s, loss=0.657, v_num=0, train_loss=0.550]
Epoch 0:   1%|          | 90/8207 [00:09<13:36,  9.94it/s, loss=0.65, v_num=0, train_loss=0.617] 


 96%|█████████▌| 34892/36473 [00:02<00:00, 14106.61it/s]
100%|██████████| 36473/36473 [00:02<00:00, 14838.95it/s]


Epoch 0:   1%|          | 92/8207 [00:09<13:33,  9.97it/s, loss=0.639, v_num=0, train_loss=0.664]
Epoch 0:   0%|          | 2/4104 [00:01<45:18,  1.51it/s, loss=0.678, v_num=0, train_loss=0.657]
Epoch 0:   1%|          | 94/8207 [00:09<13:31,  9.99it/s, loss=0.639, v_num=0, train_loss=0.716]
Epoch 0:   1%|          | 96/8207 [00:09<13:28, 10.03it/s, loss=0.634, v_num=0, train_loss=0.626]
Epoch 0:   0%|          | 4/4104 [00:01<27:02,  2.53it/s, loss=0.673, v_num=0, train_loss=0.673]
Epoch 0:   1%|          | 98/8207 [00:09<13:26, 10.06it/s, loss=0.655, v_num=0, train_loss=0.835]
Epoch 0:   1%|          | 100/8207 [00:09<13:24, 10.08it/s, loss=0.654, v_num=0, train_loss=0.620]
Epoch 0:   0%|          | 6/4104 [00:01<20:56,  3.26it/s, loss=0.67, v_num=0, train_loss=0.655] 
Epoch 0:   1%|          | 102/8207 [00:10<13:23, 10.08it/s, loss=0.653, v_num=0, train_loss=0.567]
Epoch 0:   0%|          | 8/4104 [00:02<17:55,  3.81it/s, loss=0.667, v_num=0, train_loss=0.641]
Epoch 0:   1%|▏       

  0%|          | 0/136 [00:00<?, ?it/s]
100%|██████████| 136/136 [00:00<00:00, 4790.19it/s]
  0%|          | 0/36473 [00:00<?, ?it/s]
  4%|▍         | 1454/36473 [00:00<00:02, 14536.37it/s]


Epoch 0:   0%|          | 18/4104 [00:03<12:53,  5.28it/s, loss=0.652, v_num=0, train_loss=0.639]
Epoch 0:   1%|▏         | 118/8207 [00:11<13:10, 10.23it/s, loss=0.667, v_num=0, train_loss=0.633]


  8%|▊         | 2924/36473 [00:00<00:02, 14630.69it/s]
 12%|█▏        | 4401/36473 [00:00<00:02, 14693.73it/s]


Epoch 0:   1%|▏         | 120/8207 [00:11<13:09, 10.25it/s, loss=0.668, v_num=0, train_loss=0.644]
Epoch 0:   0%|          | 20/4104 [00:03<12:27,  5.46it/s, loss=0.654, v_num=0, train_loss=0.654]


 20%|██        | 7340/36473 [00:00<00:02, 14565.53it/s]
 24%|██▍       | 8797/36473 [00:00<00:01, 14528.77it/s]


Epoch 0:   1%|▏         | 122/8207 [00:11<13:07, 10.26it/s, loss=0.665, v_num=0, train_loss=0.652]
Epoch 0:   1%|          | 22/4104 [00:03<12:06,  5.62it/s, loss=0.653, v_num=0, train_loss=0.600]
Epoch 0:   2%|▏         | 124/8207 [00:12<13:06, 10.28it/s, loss=0.649, v_num=0, train_loss=0.595]


 28%|██▊       | 10250/36473 [00:00<00:01, 14377.56it/s]
 32%|███▏      | 11689/36473 [00:00<00:01, 14359.68it/s]


Epoch 0:   1%|          | 24/4104 [00:04<11:47,  5.77it/s, loss=0.65, v_num=0, train_loss=0.645] 
Epoch 0:   2%|▏         | 126/8207 [00:12<13:06, 10.28it/s, loss=0.644, v_num=0, train_loss=0.582]


[2m[36m(trainable pid=3796370)[0m GPU available: True (cuda), used: True
[2m[36m(trainable pid=3796370)[0m TPU available: False, using: 0 TPU cores
[2m[36m(trainable pid=3796370)[0m IPU available: False, using: 0 IPUs
[2m[36m(trainable pid=3796370)[0m HPU available: False, using: 0 HPUs
[2m[36m(trainable pid=3796370)[0m   rank_zero_deprecation(
[2m[36m(trainable pid=3796370)[0m   rank_zero_deprecation("The `on_init_end` callback hook was deprecated in v1.6 and will be removed in v1.8.")
[2m[36m(trainable pid=3796370)[0m   rank_zero_deprecation(
[2m[36m(trainable pid=3796370)[0m   rank_zero_deprecation(
[2m[36m(trainable pid=3796370)[0m   rank_zero_deprecation(
[2m[36m(trainable pid=3796370)[0m   rank_zero_deprecation(
[2m[36m(trainable pid=3796370)[0m Missing logger folder: lightning_logs/USPPPM
 40%|███▉      | 14588/36473 [00:01<00:01, 14423.93it/s]
[2m[36m(trainable pid=3796370)[0m   rank_zero_warn(f"Checkpoint directory {dirpath} exists and is no

Epoch 0:   2%|▏         | 128/8207 [00:12<13:04, 10.29it/s, loss=0.637, v_num=0, train_loss=0.611]
Epoch 0:   1%|          | 26/4104 [00:04<11:32,  5.89it/s, loss=0.647, v_num=0, train_loss=0.643]
Epoch 0:   2%|▏         | 130/8207 [00:12<13:03, 10.31it/s, loss=0.628, v_num=0, train_loss=0.619]


 48%|████▊     | 17467/36473 [00:01<00:01, 14350.37it/s]
 52%|█████▏    | 18915/36473 [00:01<00:01, 14387.24it/s]


Epoch 0:   1%|          | 28/4104 [00:04<11:21,  5.98it/s, loss=0.645, v_num=0, train_loss=0.591]
Epoch 0:   2%|▏         | 132/8207 [00:12<13:02, 10.33it/s, loss=0.635, v_num=0, train_loss=0.796]


 60%|█████▉    | 21838/36473 [00:01<00:01, 14504.08it/s]
 64%|██████▍   | 23289/36473 [00:01<00:00, 14432.96it/s]


Epoch 0:   2%|▏         | 134/8207 [00:12<13:01, 10.32it/s, loss=0.622, v_num=0, train_loss=0.478]
Epoch 0:   1%|          | 30/4104 [00:04<11:10,  6.08it/s, loss=0.656, v_num=0, train_loss=0.697]


 68%|██████▊   | 24733/36473 [00:01<00:00, 14327.12it/s]
 72%|███████▏  | 26166/36473 [00:01<00:00, 14284.56it/s]


Epoch 0:   2%|▏         | 136/8207 [00:13<13:00, 10.34it/s, loss=0.626, v_num=0, train_loss=0.671]
Epoch 0:   1%|          | 32/4104 [00:05<11:00,  6.17it/s, loss=0.653, v_num=0, train_loss=0.588]
Epoch 0:   2%|▏         | 138/8207 [00:13<12:59, 10.35it/s, loss=0.624, v_num=0, train_loss=0.756]


 80%|███████▉  | 29038/36473 [00:02<00:00, 14265.37it/s]
 84%|████████▎ | 30465/36473 [00:02<00:00, 14224.15it/s]


Epoch 0:   2%|▏         | 140/8207 [00:13<12:58, 10.36it/s, loss=0.612, v_num=0, train_loss=0.535]
Epoch 0:   1%|          | 34/4104 [00:05<10:53,  6.23it/s, loss=0.65, v_num=0, train_loss=0.720] 


 88%|████████▊ | 31919/36473 [00:02<00:00, 14314.97it/s]
 91%|█████████▏| 33358/36473 [00:02<00:00, 14336.74it/s]


Epoch 0:   2%|▏         | 142/8207 [00:13<12:58, 10.36it/s, loss=0.606, v_num=0, train_loss=0.509]
Epoch 0:   1%|          | 36/4104 [00:05<10:46,  6.30it/s, loss=0.66, v_num=0, train_loss=0.821]
Epoch 0:   2%|▏         | 144/8207 [00:13<12:57, 10.37it/s, loss=0.599, v_num=0, train_loss=0.567]


 99%|█████████▉| 36204/36473 [00:02<00:00, 14026.40it/s]
100%|██████████| 36473/36473 [00:02<00:00, 14330.10it/s]
  4%|▎         | 1327/36473 [00:00<00:02, 13262.23it/s]


Epoch 0:   1%|          | 38/4104 [00:05<10:39,  6.36it/s, loss=0.659, v_num=0, train_loss=0.734]
Epoch 0:   2%|▏         | 146/8207 [00:14<12:56, 10.38it/s, loss=0.61, v_num=0, train_loss=0.572] 


  7%|▋         | 2719/36473 [00:00<00:02, 13648.07it/s]
 11%|█▏        | 4144/36473 [00:00<00:02, 13921.22it/s]


Epoch 0:   2%|▏         | 148/8207 [00:14<12:55, 10.39it/s, loss=0.608, v_num=0, train_loss=0.482]
Epoch 0:   1%|          | 40/4104 [00:06<10:31,  6.43it/s, loss=0.653, v_num=0, train_loss=0.636]


 15%|█▌        | 5584/36473 [00:00<00:02, 14107.66it/s]
 23%|██▎       | 8428/36473 [00:00<00:01, 14168.45it/s]


Epoch 0:   2%|▏         | 150/8207 [00:14<12:54, 10.41it/s, loss=0.618, v_num=0, train_loss=0.725]
Epoch 0:   1%|          | 42/4104 [00:06<10:26,  6.49it/s, loss=0.649, v_num=0, train_loss=0.561]
Epoch 0:   2%|▏         | 152/8207 [00:14<12:53, 10.42it/s, loss=0.611, v_num=0, train_loss=0.629]


 27%|██▋       | 9859/36473 [00:00<00:01, 14211.34it/s]
 31%|███       | 11281/36473 [00:00<00:01, 13566.55it/s]


Epoch 0:   2%|▏         | 154/8207 [00:14<12:52, 10.43it/s, loss=0.619, v_num=0, train_loss=0.576]
Epoch 0:   1%|          | 44/4104 [00:06<10:22,  6.52it/s, loss=0.649, v_num=0, train_loss=0.612]
[2m[36m(trainable pid=3796370)[0m STARTING FOLD 1
[2m[36m(trainable pid=3796370)[0m TRAIN FOLD 1 28721
[2m[36m(trainable pid=3796370)[0m VALID FOLD 1 4104
Epoch 0:   0%|          | 0/2053 [00:00<?, ?it/s] 


 35%|███▍      | 12676/36473 [00:00<00:01, 13679.67it/s]
[2m[36m(trainable pid=3796370)[0m LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]
[2m[36m(trainable pid=3796370)[0m 
[2m[36m(trainable pid=3796370)[0m   | Name       | Type              | Params
[2m[36m(trainable pid=3796370)[0m -------------------------------------------------
[2m[36m(trainable pid=3796370)[0m 0 | model      | MPNetModel        | 109 M 
[2m[36m(trainable pid=3796370)[0m 1 | criterion  | BCEWithLogitsLoss | 0     
[2m[36m(trainable pid=3796370)[0m 2 | fc_dropout | Dropout           | 0     
[2m[36m(trainable pid=3796370)[0m 3 | fc         | Linear            | 769   
[2m[36m(trainable pid=3796370)[0m 4 | attention  | Sequential        | 394 K 
[2m[36m(trainable pid=3796370)[0m -------------------------------------------------
[2m[36m(trainable pid=3796370)[0m 109 M     Trainable params
[2m[36m(trainable pid=3796370)[0m 0         Non-trainable params
[2m[36m(trainable pid=3796370)[

Epoch 0:   2%|▏         | 156/8207 [00:14<12:52, 10.42it/s, loss=0.615, v_num=0, train_loss=0.667]
Epoch 0:   1%|          | 46/4104 [00:07<10:18,  6.56it/s, loss=0.648, v_num=0, train_loss=0.681]
Epoch 0:   2%|▏         | 158/8207 [00:15<12:51, 10.43it/s, loss=0.614, v_num=0, train_loss=0.491]


 46%|████▌     | 16802/36473 [00:01<00:01, 13450.59it/s]
 50%|████▉     | 18150/36473 [00:01<00:01, 13304.19it/s]
 54%|█████▎    | 19515/36473 [00:01<00:01, 13404.66it/s]
 57%|█████▋    | 20910/36473 [00:01<00:01, 13563.33it/s]
 61%|██████    | 22268/36473 [00:01<00:01, 13241.58it/s]


Epoch 0:   1%|          | 48/4104 [00:07<10:15,  6.59it/s, loss=0.653, v_num=0, train_loss=0.652]
Epoch 0:   2%|▏         | 160/8207 [00:15<12:50, 10.44it/s, loss=0.625, v_num=0, train_loss=0.620]
Epoch 0:   2%|▏         | 162/8207 [00:15<12:50, 10.45it/s, loss=0.628, v_num=0, train_loss=0.648]


 65%|██████▍   | 23615/36473 [00:01<00:00, 13307.16it/s]


Epoch 0:   2%|▏         | 164/8207 [00:15<12:49, 10.46it/s, loss=0.632, v_num=0, train_loss=0.569]
Epoch 0:   1%|          | 50/4104 [00:07<10:12,  6.62it/s, loss=0.655, v_num=0, train_loss=0.641]


 68%|██████▊   | 24948/36473 [00:01<00:00, 13298.18it/s]


Epoch 0:   2%|▏         | 166/8207 [00:15<12:47, 10.47it/s, loss=0.632, v_num=0, train_loss=0.569]
Epoch 0:   2%|▏         | 166/8207 [00:15<12:47, 10.47it/s, loss=0.62, v_num=0, train_loss=0.509] 


 72%|███████▏  | 26280/36473 [00:01<00:00, 13070.86it/s]
 76%|███████▌  | 27589/36473 [00:02<00:00, 12974.24it/s]
 79%|███████▉  | 28888/36473 [00:02<00:00, 12960.17it/s]


Epoch 0:   1%|▏         | 52/4104 [00:07<10:12,  6.62it/s, loss=0.661, v_num=0, train_loss=0.740]
Epoch 0:   2%|▏         | 168/8207 [00:16<12:46, 10.48it/s, loss=0.632, v_num=0, train_loss=0.638]


 83%|████████▎ | 30238/36473 [00:02<00:00, 13118.40it/s]


Epoch 0:   2%|▏         | 170/8207 [00:16<12:46, 10.49it/s, loss=0.618, v_num=0, train_loss=0.483]
Epoch 0:   1%|▏         | 54/4104 [00:08<10:08,  6.66it/s, loss=0.66, v_num=0, train_loss=0.673] 


 90%|█████████ | 32962/36473 [00:02<00:00, 13368.41it/s]
 94%|█████████▍| 34349/36473 [00:02<00:00, 13515.60it/s]
100%|██████████| 36473/36473 [00:02<00:00, 13504.48it/s]


Epoch 0:   2%|▏         | 172/8207 [00:16<12:45, 10.50it/s, loss=0.622, v_num=0, train_loss=0.747]
Epoch 0:   1%|▏         | 56/4104 [00:08<10:04,  6.69it/s, loss=0.654, v_num=0, train_loss=0.638]
Epoch 0:   0%|          | 2/2053 [00:01<26:08,  1.31it/s, loss=0.703, v_num=0, train_loss=0.678]
Epoch 0:   2%|▏         | 174/8207 [00:16<12:44, 10.51it/s, loss=0.62, v_num=0, train_loss=0.538] 
Epoch 0:   2%|▏         | 176/8207 [00:16<12:43, 10.52it/s, loss=0.626, v_num=0, train_loss=0.738]
Epoch 0:   1%|▏         | 58/4104 [00:08<10:01,  6.72it/s, loss=0.654, v_num=0, train_loss=0.693]
Epoch 0:   0%|          | 4/2053 [00:01<16:37,  2.05it/s, loss=0.675, v_num=0, train_loss=0.647]
Epoch 0:   2%|▏         | 178/8207 [00:16<12:42, 10.53it/s, loss=0.633, v_num=0, train_loss=0.728]
Epoch 0:   1%|▏         | 60/4104 [00:08<10:00,  6.74it/s, loss=0.654, v_num=0, train_loss=0.594]
Epoch 0:   2%|▏         | 180/8207 [00:17<12:41, 10.55it/s, loss=0.642, v_num=0, train_loss=0.683]
Epoch 0:   2%|▏  

[2m[36m(trainable pid=3797917)[0m GPU available: True (cuda), used: True
[2m[36m(trainable pid=3797917)[0m TPU available: False, using: 0 TPU cores
[2m[36m(trainable pid=3797917)[0m IPU available: False, using: 0 IPUs
[2m[36m(trainable pid=3797917)[0m HPU available: False, using: 0 HPUs


Epoch 0:   3%|▎         | 216/8207 [00:20<12:26, 10.70it/s, loss=0.664, v_num=0, train_loss=0.623]
Epoch 0:   2%|▏         | 84/4104 [00:12<09:40,  6.92it/s, loss=0.612, v_num=0, train_loss=0.613]
Epoch 0:   1%|          | 20/2053 [00:05<08:59,  3.77it/s, loss=0.665, v_num=0, train_loss=0.659]
Epoch 0:   3%|▎         | 218/8207 [00:20<12:26, 10.70it/s, loss=0.648, v_num=0, train_loss=0.595]
Epoch 0:   2%|▏         | 86/4104 [00:12<09:39,  6.94it/s, loss=0.614, v_num=0, train_loss=0.671]
Epoch 0:   3%|▎         | 220/8207 [00:20<12:26, 10.70it/s, loss=0.652, v_num=0, train_loss=0.607]
Epoch 0:   1%|          | 22/2053 [00:05<08:47,  3.85it/s, loss=0.66, v_num=0, train_loss=0.663] 
Epoch 0:   2%|▏         | 88/4104 [00:12<09:37,  6.96it/s, loss=0.615, v_num=0, train_loss=0.671]
Epoch 0:   3%|▎         | 222/8207 [00:20<12:25, 10.71it/s, loss=0.643, v_num=0, train_loss=0.530]
Epoch 0:   3%|▎         | 224/8207 [00:20<12:24, 10.72it/s, loss=0.65, v_num=0, train_loss=0.751] 
Epoch 0:   2%|▏

In [None]:
ray.shutdown()

In [None]:
# Get a dataframe for the last reported results of all of the trials 
df = results.get_dataframe() 

In [None]:
df.to_csv('grid_search_results.csv')