# Pip Wheels

In [1]:
'''
!pip install pytorch_lightning
!pip install torchmetrics
!pip install tokenizers
!pip install transformers
!pip install ray[tune]
'''

'\n!pip install pytorch_lightning\n!pip install torchmetrics\n!pip install tokenizers\n!pip install transformers\n!pip install ray[tune]\n'

# Imports

In [2]:
# General Libraries
import os
import re
import random
import numpy as np
import pandas as pd
import scipy as sp



# PyTorch Lightning
import pytorch_lightning as pl
from pytorch_lightning import seed_everything, Trainer, LightningModule
from torchmetrics import Accuracy
from torchmetrics.functional import f1_score, auroc
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping, TQDMProgressBar 
from pytorch_lightning.loggers import TensorBoardLogger



# Scikit-learn
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, multilabel_confusion_matrix


# Ray[Tune]
import ray
from ray import air
from ray import tune
from ray.air import session
from ray.tune.integration.pytorch_lightning import TuneReportCallback

import torch
# HuggingFace Libraries
import tokenizers
import transformers 

%env TOKENIZERS_PARALLELISM=true

#out code 
from kfold_loop import KFoldLoop
from USPPM_model import USPPPM_model
from USPPM_dataset import set_tokenizer, set_max_len
from USPPM_kfold_datamodule import USPPPM_kf_datamodule

from datetime import datetime



env: TOKENIZERS_PARALLELISM=true


In [3]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 

# Configuration

## Configuration Class: notebook-specific settings

In [4]:
class CFG:
    # General
    seed = 42
    
    # Debug 
    debug = False
    debug_samples = 100

## Configuration Dictionary: trial-specific settings

In [5]:
# Defining a search space!
config_dict = {
    "target_size" : 1,
    "num_workers" : 16,
    
    # Training parameters
    "batch_size" : tune.grid_search([4,8,16,32,64,128]),
    "epochs" : 2,
    "n_fold" : tune.grid_search([8,16,32]),
    "warmup_steps" : 0,
    "min_lr" : 1e-6,
    "encoder_lr" : 2e-5,
    "decoder_lr" : 2e-5,
    "eps" : 1e-6,
    "betas" : (0.9, 0.999),
    "weight_decay" : 0.01,
    "fc_dropout" : 0.2,
    "seed" : 42,

    # Transformers
    # "model" : tune.choice(["microsoft/deberta-v3-large"]),
    #"model" : tune.choice(["distilbert-base-uncased"]),
    "model" : tune.grid_search(["AI-Growth-Lab/PatentSBERTa","distilbert-base-uncased","ahotrod/electra_large_discriminator_squad2_512",
                                "Yanhao/simcse-bert-for-patent","microsoft/deberta-v3-large","anferico/bert-for-patents"])
}

## Directories

In [6]:
INPUT_DIR = '../dataset/us-patent-phrase-to-phrase-matching/'
OUTPUT_DIR = './'
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

# Data Loading

In [7]:
cpc_texts = torch.load('cpc_texts.pth')
dataframe = pd.read_csv("dataframe.csv")
display(dataframe.head())

Unnamed: 0.1,Unnamed: 0,id,anchor,target,context,score,context_text,text,score_map
0,0,37d61fd2272659b1,abatement,abatement of pollution,A47,0.5,HUMAN NECESSITIES. FURNITURE; DOMESTIC ARTICLE...,abatement[SEP]abatement of pollution[SEP]HUMAN...,2
1,1,7b9652b17b68b7a4,abatement,act of abating,A47,0.75,HUMAN NECESSITIES. FURNITURE; DOMESTIC ARTICLE...,abatement[SEP]act of abating[SEP]HUMAN NECESSI...,3
2,2,36d72442aefd8232,abatement,active catalyst,A47,0.25,HUMAN NECESSITIES. FURNITURE; DOMESTIC ARTICLE...,abatement[SEP]active catalyst[SEP]HUMAN NECESS...,1
3,3,5296b0c19e1ce60e,abatement,eliminating process,A47,0.5,HUMAN NECESSITIES. FURNITURE; DOMESTIC ARTICLE...,abatement[SEP]eliminating process[SEP]HUMAN NE...,2
4,4,54c1e3b9184cb5b6,abatement,forest region,A47,0.0,HUMAN NECESSITIES. FURNITURE; DOMESTIC ARTICLE...,abatement[SEP]forest region[SEP]HUMAN NECESSIT...,0


## Debug Slicing

In [8]:
if CFG.debug:
    dataframe = dataframe.iloc[:CFG.debug_samples,:]

## Train-Test Split

In [9]:
# train_df, test_df = train_test_split(dataframe, test_size = 0.1, random_state = CFG.seed, stratify = dataframe.score_map)
train_df, test_df = train_test_split(dataframe, test_size = 0.1, random_state = CFG.seed)
display(train_df.head())
display(test_df.head())

Unnamed: 0.1,Unnamed: 0,id,anchor,target,context,score,context_text,text,score_map
9900,9900,0dbb44b9a145edec,distributor pipe,pipe,B01,0.5,PERFORMING OPERATIONS; TRANSPORTING. PHYSICAL ...,distributor pipe[SEP]pipe[SEP]PERFORMING OPERA...,2
1303,1303,74afca34a5439c23,ammonia recovery,recovery of water,C01,0.25,HEMISTRY; METALLURGY. INORGANIC CHEMISTRY,ammonia recovery[SEP]recovery of water[SEP]HEM...,1
16591,16591,6371befc3ee1b0f2,inner closed,cylindrical inner member,E04,0.5,FIXED CONSTRUCTIONS. BUILDING,inner closed[SEP]cylindrical inner member[SEP]...,2
25822,25822,20489196c73bd86b,produce thin layers,produce layers,G01,0.5,PHYSICS. MEASURING; TESTING,produce thin layers[SEP]produce layers[SEP]PHY...,2
23640,23640,9af994b21c892022,parallel orientation,zero angle,G06,0.25,PHYSICS. COMPUTING; CALCULATING; COUNTING,parallel orientation[SEP]zero angle[SEP]PHYSIC...,1


Unnamed: 0.1,Unnamed: 0,id,anchor,target,context,score,context_text,text,score_map
33511,33511,ed1c4e525eb105fe,transmit alarm,display indicator,G08,0.0,PHYSICS. SIGNALLING,transmit alarm[SEP]display indicator[SEP]PHYSI...,0
18670,18670,5386316f318f5221,locking formation,retaining element,B60,0.25,PERFORMING OPERATIONS; TRANSPORTING. VEHICLES ...,locking formation[SEP]retaining element[SEP]PE...,1
18049,18049,1544ca6753fcbddd,lateral power,transducer,H01,0.25,ELECTRICITY. BASIC ELECTRIC ELEMENTS,lateral power[SEP]transducer[SEP]ELECTRICITY. ...,1
31660,31660,f9d8979b94cec923,spreader body,spreader,A01,0.75,HUMAN NECESSITIES. GRICULTURE; FORESTRY; ANIMA...,spreader body[SEP]spreader[SEP]HUMAN NECESSITI...,3
15573,15573,e151ca5ea5cc0f08,high gradient magnetic separators,magnetic filtration,B03,0.5,PERFORMING OPERATIONS; TRANSPORTING. SEPARATIO...,high gradient magnetic separators[SEP]magnetic...,2


# Training

## Callbacks

In [10]:
ray.init(num_gpus=4)

2022-11-14 15:38:56,087	INFO worker.py:1518 -- Started a local Ray instance.


0,1
Python version:,3.8.10
Ray version:,2.0.1


In [11]:


metrics = {"val_score": "val_score", "train_loss" : "train_loss", "val_loss" : "val_loss"}

In [12]:
def trainable(config_dict):  # Pass a "config" dictionary into your trainable.
    trial_id = ray.air.session.get_trial_id()
    logging_dir = f"USPPPM_{trial_id}"
    
    export_path = f'./ensemble_checkpoints/{trial_id}'
    
    for d in ["ensemble_checkpoints/",export_path,"lightning_logs",f"lightning_logs/{logging_dir}"]:
        try:
            os.mkdir(d)
        except FileExistsError:
            pass
    
    logger = TensorBoardLogger("lightning_logs", name=logging_dir)
    pl.seed_everything(CFG.seed)
    
    checkpoint_callback = ModelCheckpoint(
    dirpath=f"checkpoints/{trial_id}_checkpoints",
    filename="best_checkpoint",
    save_top_k=1,
    verbose=True,
    monitor='val_loss',
    mode='min'
    )

    early_stopping_callback = EarlyStopping(monitor='val_loss', patience=2)
    
    steps_per_epoch = len(train_df) // config_dict['batch_size']
    config_dict['training_steps'] = steps_per_epoch * config_dict['epochs']
    
    set_tokenizer(config_dict, OUTPUT_DIR)
    set_max_len(config_dict, cpc_texts, dataframe)
    # train_dataset = USPPM_dataset(config_dict)
    datamodule = USPPPM_kf_datamodule(config_dict, dataframe)
    
    model = USPPPM_model(config_dict)
    
    callbacks = [TuneReportCallback(metrics, on="validation_end"), checkpoint_callback, early_stopping_callback, TQDMProgressBar(refresh_rate=2)]
    trainer = pl.Trainer(
            logger=logger,
            num_sanity_val_steps=0,
            check_val_every_n_epoch=1,
            callbacks=callbacks,
            max_epochs=config_dict['epochs'],
            #devices=[1],
            accelerator="gpu",
            )
    

    
    internal_fit_loop = trainer.fit_loop
    trainer.fit_loop = KFoldLoop(config_dict['n_fold'], config_dict, export_path=export_path)
    trainer.fit_loop.connect(internal_fit_loop)
    
    trainer.fit(model, datamodule)

In [13]:
resource_group = tune.PlacementGroupFactory([{"CPU": 1, "GPU": 1}])

tuner = tune.Tuner(tune.with_resources(trainable, 
                                       {"cpu":0.25,"gpu":1}),
                                       param_space = config_dict,
                                       tune_config = tune.TuneConfig(metric="val_score", mode="max",max_concurrent_trials=4),
                                       # tune_config = tune.TuneConfig(metric="val_score", mode="max"),
                                       run_config = air.RunConfig(name="tune_uspppm", verbose=2, progress_reporter=tune.JupyterNotebookReporter(overwrite=True))
                                       )


Trial name,status,loc,batch_size,model,n_fold
trainable_13334_00000,RUNNING,131.114.50.210:4032152,4,AI-Growth-Lab/P_5850,8
trainable_13334_00001,RUNNING,131.114.50.210:4032261,8,AI-Growth-Lab/P_5850,8
trainable_13334_00002,RUNNING,131.114.50.210:4032435,16,AI-Growth-Lab/P_5850,8
trainable_13334_00003,RUNNING,131.114.50.210:4033973,32,AI-Growth-Lab/P_5850,8


In [None]:
results = tuner.fit()

best_result = results.get_best_result()  # Get best result object
print(best_result)

[2m[36m(trainable pid=4032152)[0m Global seed set to 42
100%|██████████| 136/136 [00:00<00:00, 4062.89it/s]
  0%|          | 0/36473 [00:00<?, ?it/s]
  4%|▍         | 1417/36473 [00:00<00:02, 14163.63it/s]
  8%|▊         | 2870/36473 [00:00<00:02, 14376.45it/s]
 12%|█▏        | 4308/36473 [00:00<00:02, 14238.30it/s]
 16%|█▌        | 5732/36473 [00:00<00:02, 14233.65it/s]
 20%|█▉        | 7156/36473 [00:00<00:02, 13983.19it/s]
 23%|██▎       | 8556/36473 [00:00<00:02, 13874.98it/s]
 27%|██▋       | 9944/36473 [00:00<00:01, 13670.96it/s]
 31%|███       | 11374/36473 [00:00<00:01, 13866.83it/s]
 35%|███▌      | 12823/36473 [00:00<00:01, 14058.25it/s]
 39%|███▉      | 14370/36473 [00:01<00:01, 14487.28it/s]
 43%|████▎     | 15837/36473 [00:01<00:01, 14540.08it/s]
 47%|████▋     | 17292/36473 [00:01<00:01, 14214.01it/s]
 52%|█████▏    | 18809/36473 [00:01<00:01, 14497.66it/s]
 56%|█████▌    | 20261/36473 [00:01<00:01, 14499.16it/s]
 60%|█████▉    | 21713/36473 [00:01<00:01, 14398.80it/s]

[2m[36m(trainable pid=4032152)[0m STARTING FOLD 1
[2m[36m(trainable pid=4032152)[0m TRAIN FOLD 1 28721
[2m[36m(trainable pid=4032152)[0m VALID FOLD 1 4104
Epoch 0:   0%|          | 0/8207 [00:00<?, ?it/s] 


 97%|█████████▋| 35398/36473 [00:02<00:00, 14934.13it/s]
100%|██████████| 36473/36473 [00:02<00:00, 15199.12it/s]
[2m[36m(trainable pid=4032435)[0m Global seed set to 42


Epoch 0:   0%|          | 2/8207 [00:01<1:51:09,  1.23it/s, loss=0.65, v_num=0, train_loss=0.606]
Epoch 0:   0%|          | 4/8207 [00:01<1:02:24,  2.19it/s, loss=0.65, v_num=0, train_loss=0.713]


100%|██████████| 136/136 [00:00<00:00, 4945.94it/s]
  0%|          | 0/36473 [00:00<?, ?it/s]
  4%|▍         | 1623/36473 [00:00<00:02, 16221.55it/s]


Epoch 0:   0%|          | 6/8207 [00:02<45:59,  2.97it/s, loss=0.655, v_num=0, train_loss=0.829] 
Epoch 0:   0%|          | 8/8207 [00:02<37:46,  3.62it/s, loss=0.665, v_num=0, train_loss=0.636]


  9%|▉         | 3268/36473 [00:00<00:02, 16349.70it/s]
 14%|█▎        | 4974/36473 [00:00<00:01, 16670.89it/s]


Epoch 0:   0%|          | 10/8207 [00:02<32:58,  4.14it/s, loss=0.669, v_num=0, train_loss=0.623]


 23%|██▎       | 8288/36473 [00:00<00:01, 16404.69it/s]
 27%|██▋       | 9929/36473 [00:00<00:01, 16280.70it/s]


Epoch 0:   0%|          | 12/8207 [00:02<29:47,  4.58it/s, loss=0.677, v_num=0, train_loss=0.751]


 32%|███▏      | 11558/36473 [00:00<00:01, 16162.03it/s]
 36%|███▌      | 13175/36473 [00:00<00:01, 16142.14it/s]


Epoch 0:   0%|          | 14/8207 [00:02<27:26,  4.98it/s, loss=0.675, v_num=0, train_loss=0.661]
Epoch 0:   0%|          | 16/8207 [00:03<25:41,  5.31it/s, loss=0.672, v_num=0, train_loss=0.615]


 41%|████      | 14790/36473 [00:00<00:01, 15819.51it/s]
 45%|████▍     | 16383/36473 [00:01<00:01, 15850.62it/s]
 49%|████▉     | 17969/36473 [00:01<00:01, 15442.71it/s]
 54%|█████▎    | 19576/36473 [00:01<00:01, 15626.58it/s]


Epoch 0:   0%|          | 18/8207 [00:03<24:15,  5.63it/s, loss=0.67, v_num=0, train_loss=0.646] 


 62%|██████▏   | 22790/36473 [00:01<00:00, 15851.53it/s]
 67%|██████▋   | 24407/36473 [00:01<00:00, 15945.02it/s]


Epoch 0:   0%|          | 20/8207 [00:03<23:06,  5.90it/s, loss=0.667, v_num=0, train_loss=0.650]
Epoch 0:   0%|          | 22/8207 [00:03<22:09,  6.16it/s, loss=0.668, v_num=0, train_loss=0.605]


 71%|███████▏  | 26003/36473 [00:01<00:00, 15915.63it/s]
 76%|███████▌  | 27596/36473 [00:01<00:00, 15895.60it/s]


Epoch 0:   0%|          | 24/8207 [00:03<21:23,  6.38it/s, loss=0.669, v_num=0, train_loss=0.636]


 80%|████████  | 29220/36473 [00:01<00:00, 15997.01it/s]
 85%|████████▍ | 30841/36473 [00:01<00:00, 16060.32it/s]


Epoch 0:   0%|          | 26/8207 [00:03<20:46,  6.56it/s, loss=0.666, v_num=0, train_loss=0.654]
Epoch 0:   0%|          | 28/8207 [00:04<20:15,  6.73it/s, loss=0.656, v_num=0, train_loss=0.442]


 89%|████████▉ | 32534/36473 [00:02<00:00, 16317.69it/s]
 94%|█████████▎| 34167/36473 [00:02<00:00, 15733.23it/s]
100%|██████████| 36473/36473 [00:02<00:00, 16011.54it/s]
  0%|          | 0/36473 [00:00<?, ?it/s]
  4%|▍         | 1506/36473 [00:00<00:02, 15058.15it/s]


Epoch 0:   0%|          | 30/8207 [00:04<19:44,  6.90it/s, loss=0.653, v_num=0, train_loss=0.668]


  8%|▊         | 3095/36473 [00:00<00:02, 15544.43it/s]
 13%|█▎        | 4714/36473 [00:00<00:02, 15835.83it/s]
 17%|█▋        | 6312/36473 [00:00<00:01, 15890.62it/s]


Epoch 0:   0%|          | 32/8207 [00:04<19:19,  7.05it/s, loss=0.649, v_num=0, train_loss=0.824]
Epoch 0:   0%|          | 34/8207 [00:04<18:57,  7.19it/s, loss=0.644, v_num=0, train_loss=0.682]


 22%|██▏       | 7911/36473 [00:00<00:01, 15923.61it/s]
 26%|██▌       | 9504/36473 [00:00<00:01, 15863.52it/s]


Epoch 0:   0%|          | 36/8207 [00:04<18:35,  7.32it/s, loss=0.65, v_num=0, train_loss=0.730] 
Epoch 0:   0%|          | 38/8207 [00:05<18:16,  7.45it/s, loss=0.647, v_num=0, train_loss=0.695]


 30%|███       | 11114/36473 [00:00<00:01, 15939.23it/s]
 35%|███▍      | 12708/36473 [00:00<00:01, 15904.57it/s]
 39%|███▉      | 14300/36473 [00:00<00:01, 15908.35it/s]
 44%|████▎     | 15891/36473 [00:01<00:01, 15710.47it/s]
 48%|████▊     | 17463/36473 [00:01<00:01, 15540.53it/s]


Epoch 0:   0%|          | 40/8207 [00:05<18:00,  7.56it/s, loss=0.641, v_num=0, train_loss=0.581]


 52%|█████▏    | 19026/36473 [00:01<00:01, 15564.21it/s]
 57%|█████▋    | 20644/36473 [00:01<00:01, 15746.20it/s]


Epoch 0:   1%|          | 42/8207 [00:05<17:45,  7.66it/s, loss=0.646, v_num=0, train_loss=0.617]
Epoch 0:   1%|          | 44/8207 [00:05<17:33,  7.75it/s, loss=0.635, v_num=0, train_loss=0.548]


 61%|██████    | 22284/36473 [00:01<00:00, 15941.30it/s]
 66%|██████▌   | 23941/36473 [00:01<00:00, 16124.30it/s]


Epoch 0:   1%|          | 46/8207 [00:05<17:20,  7.85it/s, loss=0.633, v_num=0, train_loss=0.664]


 70%|███████   | 25554/36473 [00:01<00:00, 15982.70it/s]
 74%|███████▍  | 27169/36473 [00:01<00:00, 16031.32it/s]


Epoch 0:   1%|          | 48/8207 [00:06<17:10,  7.92it/s, loss=0.629, v_num=0, train_loss=0.477]
Epoch 0:   1%|          | 50/8207 [00:06<16:59,  8.00it/s, loss=0.638, v_num=0, train_loss=0.777]


 83%|████████▎ | 30375/36473 [00:01<00:00, 15985.39it/s]
 88%|████████▊ | 32011/36473 [00:02<00:00, 16095.19it/s]
 92%|█████████▏| 33621/36473 [00:02<00:00, 15885.15it/s]
 97%|█████████▋| 35211/36473 [00:02<00:00, 15807.53it/s]


Epoch 0:   1%|          | 52/8207 [00:06<16:59,  8.00it/s, loss=0.638, v_num=0, train_loss=0.566]
Epoch 0:   1%|          | 54/8207 [00:06<16:49,  8.08it/s, loss=0.632, v_num=0, train_loss=0.421]


100%|██████████| 36473/36473 [00:02<00:00, 15857.55it/s]


Epoch 0:   1%|          | 56/8207 [00:06<16:39,  8.15it/s, loss=0.624, v_num=0, train_loss=0.663]
Epoch 0:   1%|          | 58/8207 [00:07<16:32,  8.21it/s, loss=0.635, v_num=0, train_loss=0.761]
Epoch 0:   1%|          | 60/8207 [00:07<16:26,  8.26it/s, loss=0.647, v_num=0, train_loss=0.805]
Epoch 0:   1%|          | 62/8207 [00:07<16:20,  8.31it/s, loss=0.633, v_num=0, train_loss=0.579]
Epoch 0:   1%|          | 64/8207 [00:07<16:14,  8.36it/s, loss=0.64, v_num=0, train_loss=0.626] 
Epoch 0:   1%|          | 66/8207 [00:07<16:09,  8.40it/s, loss=0.646, v_num=0, train_loss=0.715]
Epoch 0:   1%|          | 68/8207 [00:08<16:03,  8.45it/s, loss=0.652, v_num=0, train_loss=0.671]
Epoch 0:   1%|          | 70/8207 [00:08<15:57,  8.50it/s, loss=0.643, v_num=0, train_loss=0.696]
Epoch 0:   1%|          | 72/8207 [00:08<15:52,  8.54it/s, loss=0.647, v_num=0, train_loss=0.703]
Epoch 0:   1%|          | 74/8207 [00:08<15:46,  8.59it/s, loss=0.66, v_num=0, train_loss=0.699] 
Epoch 0:   1%|      

[2m[36m(trainable pid=4033973)[0m Global seed set to 42


Epoch 0:   1%|          | 78/8207 [00:08<15:37,  8.67it/s, loss=0.651, v_num=0, train_loss=0.594]
Epoch 0:   1%|          | 80/8207 [00:09<15:33,  8.71it/s, loss=0.647, v_num=0, train_loss=0.667]
Epoch 0:   1%|          | 82/8207 [00:09<15:29,  8.74it/s, loss=0.656, v_num=0, train_loss=0.621]
Epoch 0:   1%|          | 84/8207 [00:09<15:26,  8.77it/s, loss=0.659, v_num=0, train_loss=0.660]
Epoch 0:   1%|          | 86/8207 [00:09<15:22,  8.80it/s, loss=0.659, v_num=0, train_loss=0.660]
Epoch 0:   1%|          | 86/8207 [00:09<15:22,  8.80it/s, loss=0.656, v_num=0, train_loss=0.618]


100%|██████████| 136/136 [00:00<00:00, 5133.74it/s]
  0%|          | 0/36473 [00:00<?, ?it/s]
  4%|▍         | 1628/36473 [00:00<00:02, 16274.20it/s]
  9%|▉         | 3312/36473 [00:00<00:02, 16409.67it/s]
 14%|█▍        | 5062/36473 [00:00<00:01, 16900.14it/s]
 19%|█▊        | 6814/36473 [00:00<00:01, 17143.54it/s]


Epoch 0:   1%|          | 88/8207 [00:09<15:19,  8.83it/s, loss=0.653, v_num=0, train_loss=0.665]


 23%|██▎       | 8529/36473 [00:00<00:01, 16897.26it/s]
 28%|██▊       | 10220/36473 [00:00<00:01, 16572.64it/s]


Epoch 0:   1%|          | 90/8207 [00:10<15:18,  8.84it/s, loss=0.658, v_num=0, train_loss=0.726]


 33%|███▎      | 11879/36473 [00:00<00:01, 16368.65it/s]
 37%|███▋      | 13517/36473 [00:00<00:01, 16300.56it/s]


Epoch 0:   1%|          | 92/8207 [00:10<15:14,  8.87it/s, loss=0.658, v_num=0, train_loss=0.789]
Epoch 0:   1%|          | 94/8207 [00:10<15:11,  8.90it/s, loss=0.656, v_num=0, train_loss=0.627]


 42%|████▏     | 15193/36473 [00:00<00:01, 16440.87it/s]
 46%|████▋     | 16900/36473 [00:01<00:01, 16629.88it/s]
 51%|█████     | 18636/36473 [00:01<00:01, 16849.25it/s]


Epoch 0:   1%|          | 96/8207 [00:10<15:08,  8.93it/s, loss=0.663, v_num=0, train_loss=0.731]


 56%|█████▌    | 20351/36473 [00:01<00:00, 16935.61it/s]
 60%|██████    | 22046/36473 [00:01<00:00, 16728.04it/s]


Epoch 0:   1%|          | 98/8207 [00:10<15:05,  8.96it/s, loss=0.668, v_num=0, train_loss=0.671]
Epoch 0:   1%|          | 100/8207 [00:11<15:02,  8.98it/s, loss=0.666, v_num=0, train_loss=0.637]


 65%|██████▌   | 23785/36473 [00:01<00:00, 16922.80it/s]
 70%|██████▉   | 25479/36473 [00:01<00:00, 16839.47it/s]
 75%|███████▍  | 27201/36473 [00:01<00:00, 16952.73it/s]
 79%|███████▉  | 28897/36473 [00:01<00:00, 16864.72it/s]


Epoch 0:   1%|          | 102/8207 [00:11<14:59,  9.01it/s, loss=0.67, v_num=0, train_loss=0.713] 


 84%|████████▍ | 30622/36473 [00:01<00:00, 16979.30it/s]
 89%|████████▊ | 32363/36473 [00:01<00:00, 17107.30it/s]
 93%|█████████▎| 34094/36473 [00:02<00:00, 17165.34it/s]


Epoch 0:   1%|▏         | 104/8207 [00:11<14:58,  9.02it/s, loss=0.674, v_num=0, train_loss=0.666]
Epoch 0:   1%|▏         | 106/8207 [00:11<14:55,  9.04it/s, loss=0.678, v_num=0, train_loss=0.677]


 98%|█████████▊| 35867/36473 [00:02<00:00, 17333.11it/s]
100%|██████████| 36473/36473 [00:02<00:00, 16882.53it/s]
  0%|          | 0/36473 [00:00<?, ?it/s]
  4%|▍         | 1579/36473 [00:00<00:02, 15784.14it/s]


Epoch 0:   1%|▏         | 108/8207 [00:11<14:53,  9.07it/s, loss=0.686, v_num=0, train_loss=0.649]


  9%|▉         | 3261/36473 [00:00<00:02, 16391.34it/s]
 14%|█▎        | 4953/36473 [00:00<00:01, 16629.36it/s]


Epoch 0:   1%|▏         | 110/8207 [00:12<14:50,  9.09it/s, loss=0.68, v_num=0, train_loss=0.636] 
Epoch 0:   1%|▏         | 112/8207 [00:12<14:48,  9.11it/s, loss=0.675, v_num=0, train_loss=0.697]


 18%|█▊        | 6634/36473 [00:00<00:01, 16697.52it/s]
 23%|██▎       | 8363/36473 [00:00<00:01, 16908.32it/s]
 28%|██▊       | 10054/36473 [00:00<00:01, 16554.34it/s]
 37%|███▋      | 13398/36473 [00:00<00:01, 16638.30it/s]


Epoch 0:   1%|▏         | 114/8207 [00:12<14:46,  9.13it/s, loss=0.669, v_num=0, train_loss=0.521]


 41%|████▏     | 15063/36473 [00:00<00:01, 16463.41it/s]
 46%|████▌     | 16711/36473 [00:01<00:01, 16119.34it/s]


Epoch 0:   1%|▏         | 116/8207 [00:12<14:43,  9.15it/s, loss=0.654, v_num=0, train_loss=0.532]
Epoch 0:   1%|▏         | 118/8207 [00:12<14:41,  9.17it/s, loss=0.652, v_num=0, train_loss=0.619]


 50%|█████     | 18325/36473 [00:01<00:01, 15765.94it/s]
 55%|█████▍    | 19904/36473 [00:01<00:01, 15575.90it/s]


Epoch 0:   1%|▏         | 120/8207 [00:13<14:40,  9.19it/s, loss=0.649, v_num=0, train_loss=0.625]


 59%|█████▉    | 21464/36473 [00:01<00:00, 15400.49it/s]
 63%|██████▎   | 23035/36473 [00:01<00:00, 15487.58it/s]


Epoch 0:   1%|▏         | 122/8207 [00:13<14:39,  9.20it/s, loss=0.645, v_num=0, train_loss=0.774]
Epoch 0:   2%|▏         | 124/8207 [00:13<14:37,  9.21it/s, loss=0.643, v_num=0, train_loss=0.616]


 67%|██████▋   | 24591/36473 [00:01<00:00, 15508.71it/s]
 72%|███████▏  | 26143/36473 [00:01<00:00, 15346.87it/s]
 76%|███████▌  | 27720/36473 [00:01<00:00, 15468.45it/s]
 80%|████████  | 29330/36473 [00:01<00:00, 15652.41it/s]
 85%|████████▍ | 30975/36473 [00:01<00:00, 15889.38it/s]


Epoch 0:   2%|▏         | 126/8207 [00:13<14:34,  9.24it/s, loss=0.626, v_num=0, train_loss=0.531]
Epoch 0:   2%|▏         | 128/8207 [00:13<14:32,  9.26it/s, loss=0.622, v_num=0, train_loss=0.664]


 89%|████████▉ | 32626/36473 [00:02<00:00, 16072.80it/s]
 94%|█████████▍| 34267/36473 [00:02<00:00, 16172.88it/s]
100%|██████████| 36473/36473 [00:02<00:00, 16043.18it/s]


Epoch 0:   2%|▏         | 130/8207 [00:14<14:30,  9.28it/s, loss=0.621, v_num=0, train_loss=0.726]
Epoch 0:   2%|▏         | 132/8207 [00:14<14:28,  9.30it/s, loss=0.619, v_num=0, train_loss=0.647]
Epoch 0:   2%|▏         | 134/8207 [00:14<14:26,  9.32it/s, loss=0.615, v_num=0, train_loss=0.546]
Epoch 0:   2%|▏         | 136/8207 [00:14<14:24,  9.33it/s, loss=0.619, v_num=0, train_loss=0.685]
Epoch 0:   2%|▏         | 138/8207 [00:14<14:24,  9.34it/s, loss=0.612, v_num=0, train_loss=0.572]
Epoch 0:   2%|▏         | 140/8207 [00:14<14:22,  9.35it/s, loss=0.607, v_num=0, train_loss=0.644]
Epoch 0:   2%|▏         | 142/8207 [00:15<14:21,  9.36it/s, loss=0.6, v_num=0, train_loss=0.582]  
Epoch 0:   2%|▏         | 144/8207 [00:15<14:20,  9.37it/s, loss=0.593, v_num=0, train_loss=0.516]
Epoch 0:   2%|▏         | 146/8207 [00:15<14:19,  9.38it/s, loss=0.598, v_num=0, train_loss=0.554]
Epoch 0:   2%|▏         | 148/8207 [00:15<14:17,  9.39it/s, loss=0.604, v_num=0, train_loss=0.775]
Epoch 0:  

[2m[36m(trainable pid=4033973)[0m GPU available: True (cuda), used: True
[2m[36m(trainable pid=4033973)[0m TPU available: False, using: 0 TPU cores
[2m[36m(trainable pid=4033973)[0m IPU available: False, using: 0 IPUs
[2m[36m(trainable pid=4033973)[0m HPU available: False, using: 0 HPUs
[2m[36m(trainable pid=4033973)[0m   rank_zero_deprecation(
[2m[36m(trainable pid=4033973)[0m   rank_zero_deprecation("The `on_init_end` callback hook was deprecated in v1.6 and will be removed in v1.8.")
[2m[36m(trainable pid=4033973)[0m   rank_zero_deprecation(
[2m[36m(trainable pid=4033973)[0m   rank_zero_deprecation(
[2m[36m(trainable pid=4033973)[0m   rank_zero_deprecation(
[2m[36m(trainable pid=4033973)[0m   rank_zero_deprecation(


Epoch 0:   2%|▏         | 166/8207 [00:17<14:08,  9.48it/s, loss=0.603, v_num=0, train_loss=0.752]
Epoch 0:   2%|▏         | 168/8207 [00:17<14:08,  9.48it/s, loss=0.586, v_num=0, train_loss=0.523]
Epoch 0:   2%|▏         | 170/8207 [00:17<14:07,  9.48it/s, loss=0.597, v_num=0, train_loss=0.594]
Epoch 0:   2%|▏         | 172/8207 [00:18<14:06,  9.49it/s, loss=0.606, v_num=0, train_loss=0.761]
Epoch 0:   2%|▏         | 174/8207 [00:18<14:05,  9.50it/s, loss=0.603, v_num=0, train_loss=0.615]
Epoch 0:   2%|▏         | 176/8207 [00:18<14:04,  9.51it/s, loss=0.607, v_num=0, train_loss=0.685]
Epoch 0:   2%|▏         | 178/8207 [00:18<14:04,  9.51it/s, loss=0.621, v_num=0, train_loss=0.829]
Epoch 0:   2%|▏         | 180/8207 [00:18<14:03,  9.52it/s, loss=0.635, v_num=0, train_loss=0.785]
Epoch 0:   2%|▏         | 182/8207 [00:19<14:02,  9.53it/s, loss=0.653, v_num=0, train_loss=0.678]
Epoch 0:   2%|▏         | 184/8207 [00:19<14:02,  9.53it/s, loss=0.653, v_num=0, train_loss=0.644]
Epoch 0:  

[2m[36m(trainable pid=4033973)[0m LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [2]
[2m[36m(trainable pid=4033973)[0m 
[2m[36m(trainable pid=4033973)[0m   | Name       | Type              | Params
[2m[36m(trainable pid=4033973)[0m -------------------------------------------------
[2m[36m(trainable pid=4033973)[0m 0 | model      | MPNetModel        | 109 M 
[2m[36m(trainable pid=4033973)[0m 1 | criterion  | BCEWithLogitsLoss | 0     
[2m[36m(trainable pid=4033973)[0m 2 | fc_dropout | Dropout           | 0     
[2m[36m(trainable pid=4033973)[0m 3 | fc         | Linear            | 769   
[2m[36m(trainable pid=4033973)[0m 4 | attention  | Sequential        | 394 K 
[2m[36m(trainable pid=4033973)[0m -------------------------------------------------
[2m[36m(trainable pid=4033973)[0m 109 M     Trainable params
[2m[36m(trainable pid=4033973)[0m 0         Non-trainable params
[2m[36m(trainable pid=4033973)[0m 109 M     Total params
[2m[36m(trainable pid=4033973

Epoch 0:   2%|▏         | 192/8207 [00:20<13:58,  9.56it/s, loss=0.654, v_num=0, train_loss=0.500]
[2m[36m(trainable pid=4033973)[0m STARTING FOLD 1
[2m[36m(trainable pid=4033973)[0m TRAIN FOLD 1 28721
[2m[36m(trainable pid=4033973)[0m VALID FOLD 1 4104
Epoch 0:   0%|          | 0/1027 [00:00<?, ?it/s] 
Epoch 0:   2%|▏         | 194/8207 [00:20<13:58,  9.56it/s, loss=0.652, v_num=0, train_loss=0.599]
Epoch 0:   2%|▏         | 196/8207 [00:20<13:57,  9.57it/s, loss=0.654, v_num=0, train_loss=0.649]
Epoch 0:   2%|▏         | 198/8207 [00:20<13:56,  9.58it/s, loss=0.643, v_num=0, train_loss=0.691]
Epoch 0:   2%|▏         | 200/8207 [00:20<13:55,  9.58it/s, loss=0.652, v_num=0, train_loss=0.673]
Epoch 0:   2%|▏         | 202/8207 [00:21<13:55,  9.58it/s, loss=0.641, v_num=0, train_loss=0.776]
Epoch 0:   2%|▏         | 204/8207 [00:21<13:54,  9.59it/s, loss=0.647, v_num=0, train_loss=0.622]
Epoch 0:   3%|▎         | 206/8207 [00:21<13:54,  9.59it/s, loss=0.646, v_num=0, train_loss=

In [None]:
ray.shutdown()

In [None]:
# Get a dataframe for the last reported results of all of the trials 
df = results.get_dataframe() 

In [None]:
df.to_csv('grid_search_results.csv')