In [1]:
# Reference: 
# 1. https://colab.research.google.com/drive/1wm8Z0ui8ZGSXoR50x52GvWMQkfSPPJ-T#scrollTo=SCFXYdbIXguc
# 2. https://colab.research.google.com/drive/1YRHK4HO8RktGzlYmGjBo056kzVD4_j9o#scrollTo=BJR6t_gCQe_x

!pip install datasets
!pip install transformers
!pip install sentence_transformers


from pprint import pprint
import torch
from datasets import load_dataset
import numpy
import pandas as pd
import io
import random
import numpy as np

# Change here if use own drive
from google.colab import drive
drive.mount('/content/drive/')
%cd drive/MyDrive

# Set the seed value all over the place to make this reproducible.
def setup_seed(seed):
    random.seed(seed)                          
    np.random.seed(seed)                       
    torch.manual_seed(seed)                    
    torch.cuda.manual_seed(seed)               
    torch.cuda.manual_seed_all(seed)           
    torch.backends.cudnn.deterministic = True  
    torch.backends.cudnn.benchmark = False

setup_seed(42)

train_df = pd.read_csv("binary_classification_training_set.csv")

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).
/content/drive/MyDrive


In [2]:
import torch.nn as nn

# read in validation file
validation_df = pd.read_csv("binary_classification_validation_set.csv")

In [3]:
print(train_df.shape)
print(train_df.columns)
print(validation_df.shape)
print(validation_df.columns)

print(len(train_df[train_df["decision"] == "ACCEPTED"]))
print(len(train_df[train_df["decision"] == "REJECTED"]))
print(len(validation_df[validation_df["decision"] == "ACCEPTED"]))
print(len(validation_df[validation_df["decision"] == "REJECTED"]))

(20000, 19)
Index(['Unnamed: 0', 'patent_number', 'decision', 'title', 'abstract',
       'claims', 'background', 'summary', 'description', 'cpc_label',
       'ipc_label', 'filing_date', 'examiner_id', 'output',
       'application_invention_type', 'examiner_art_unit',
       'small_entity_indicator', 'aia_first_to_file', 'foreign'],
      dtype='object')
(5000, 19)
Index(['Unnamed: 0', 'patent_number', 'decision', 'title', 'abstract',
       'claims', 'background', 'summary', 'description', 'cpc_label',
       'ipc_label', 'filing_date', 'examiner_id', 'output',
       'application_invention_type', 'examiner_art_unit',
       'small_entity_indicator', 'aia_first_to_file', 'foreign'],
      dtype='object')
10000
10000
2500
2500


In [4]:
# Mapping the field output; 1 = ACCEPTED; 0 = REJECTED
train_df['output'] = 1
train_df.loc[train_df['decision'] == "REJECTED", 'output'] = 0

validation_df['output'] = 1
validation_df.loc[validation_df['decision'] == "REJECTED", 'output'] = 0

In [5]:
# check Rejected versus Accepted based on output field
print(len(train_df[train_df["output"] == 1]))
print(len(train_df[train_df["output"] == 0]))
print(len(validation_df[validation_df["output"] == 1]))
print(len(validation_df[validation_df["output"] == 0]))

10000
10000
2500
2500


In [6]:
# spot check train_df
train_df.head(5)

Unnamed: 0.1,Unnamed: 0,patent_number,decision,title,abstract,claims,background,summary,description,cpc_label,ipc_label,filing_date,examiner_id,output,application_invention_type,examiner_art_unit,small_entity_indicator,aia_first_to_file,foreign
0,17944,15187583,ACCEPTED,"FULLY INTEGRATED, DISPOSABLE TISSUE VISUALIZAT...",The present invention relates to a fully integ...,"1-11. (canceled) 12. A sterilized, integrated,...",<SOH> BACKGROUND OF THE INVENTION <EOH>1. Fiel...,<SOH> SUMMARY OF THE INVENTION <EOH>Embodiment...,CROSS-REFERENCE TO RELATED APPLICATIONS This a...,A61B107,A61B107,20160620,86346.0,1,Utility,3779,SMALL,True,False
1,1686,14906225,REJECTED,Compositions and Methods Comprising a Lipolyti...,The present invention provides lipolytic enzym...,1. A lipolytic enzyme variant or an active fra...,<SOH> BACKGROUND OF THE INVENTION <EOH>Lipolyt...,<SOH> SUMMARY OF THE INVENTION <EOH>The presen...,CROSS REFERENCE TO RELATED APPLICATIONS This a...,C12N920,C12N920,20160119,68762.0,0,Utility,1656,UNDISCOUNTED,True,False
2,3522,15012119,REJECTED,IMAGE DISPLAY DEVICE,An image display device of the present disclos...,1. An image display device comprising: a displ...,<SOH> BACKGROUND <EOH>1. Technical Field The p...,<SOH> SUMMARY <EOH>An image display device of ...,BACKGROUND 1. Technical Field The present disc...,G02B270172,G02B2701,20160201,99575.0,0,Utility,2626,UNDISCOUNTED,True,True
3,6639,15054201,REJECTED,ABUSE-PROOFED DOSAGE FORM,The invention relates to a dosage form that is...,1. An abuse-proofed dosage form thermoformed b...,,<SOH> BRIEF DESCRIPTION OF THE DRAWING <EOH>FI...,This application is a continuation of U.S. Ser...,A61K31135,A61K31135,20160226,66231.0,0,Utility,1615,UNDISCOUNTED,False,True
4,18595,15108464,ACCEPTED,Process for the Preparation of Solid Particula...,A novel process for preparing vinyl aromatic p...,1. A process for the preparation of a solid pa...,<SOH> BACKGROUND OF THE INVENTION <EOH>Expanda...,<SOH> SUMMARY OF THE INVENTION <EOH>The object...,FIELD OF THE INVENTION The present invention r...,C08J920,C08J920,20160627,97379.0,1,Utility,1765,UNDISCOUNTED,True,True


In [7]:
# spot check train_df
validation_df.head(5)

Unnamed: 0.1,Unnamed: 0,patent_number,decision,title,abstract,claims,background,summary,description,cpc_label,ipc_label,filing_date,examiner_id,output,application_invention_type,examiner_art_unit,small_entity_indicator,aia_first_to_file,foreign
0,149,15227186,ACCEPTED,IMAGING DEVICE AND FOCUSING CONTROL METHOD,The present invention provides an imaging devi...,1. An imaging device comprising: an imaging el...,<SOH> BACKGROUND OF THE INVENTION <EOH>1. Fiel...,<SOH> SUMMARY OF THE INVENTION <EOH>In all of ...,CROSS-REFERENCE TO RELATED APPLICATIONS This a...,H04N523212,H04N5232,20160803,70534.0,1,Utility,2662.0,UNDISCOUNTED,True,True
1,4003,15129780,ACCEPTED,FOIL REMOVAL DEVICE AND A METHOD FOR REMOVING ...,Provided is a foil removal device and a method...,1-26. (canceled) 27. A foil removal device for...,<SOH> BACKGROUND <EOH>The invention relates to...,<SOH> SUMMARY OF THE INVENTION <EOH>According ...,BACKGROUND The invention relates to a foil rem...,B65H19286,B65H1928,20160927,61755.0,1,Utility,1745.0,UNDISCOUNTED,True,True
2,811,15234229,REJECTED,FUNGICIDAL COMPOSITION COMPRISING A PYRIDYLETH...,A composition comprising at least a pyridyleth...,1.-20. (canceled) 21. A composition comprising...,<SOH> BACKGROUND OF THE INVENTION <EOH>Interna...,<SOH> SUMMARY OF THE INVENTION <EOH>Accordingl...,CROSS-REFERENCE TO RELATED APPLICATION(S) The ...,A01N4340,A01N4340,20160811,62399.0,0,Utility,1627.0,UNDISCOUNTED,False,True
3,624,15233691,REJECTED,SEMICONDUCTOR MEMORY DEVICE,A semiconductor memory device includes a plura...,1. A semiconductor memory device comprising: a...,<SOH> BACKGROUND <EOH>A NAND type flash memory...,,CROSS-REFERENCE TO RELATED APPLICATION This ap...,G11C160483,G11C1604,20160810,62558.0,0,Utility,2824.0,UNDISCOUNTED,True,True
4,3125,15125628,ACCEPTED,METHODS AND SYSTEMS FOR AUTOMATIC CREATION OF ...,Disclosed herein are methods and systems for a...,1. A method comprising: a first mobile radio b...,<SOH> BACKGROUND OF THE INVENTION <EOH>Million...,<SOH> BRIEF DESCRIPTION OF THE SEVERAL VIEWS O...,BACKGROUND OF THE INVENTION Millions of people...,H04W408,H04W408,20160913,62652.0,1,Utility,2648.0,UNDISCOUNTED,True,False


In [8]:
# convert and add another column of one_hot encoding
train_df["output_onehot"] = pd.get_dummies(train_df["output"]).values.tolist()
validation_df["output_onehot"] = pd.get_dummies(validation_df["output"]).values.tolist()

In [9]:
import re

# Convert this to many sentences within claims for training data
claims_mod_list = []

sentences_num_param = 25

for i in range(len(train_df)):  
  list_to_append = re.split(r'\s+\d+\.\s+', train_df.iloc[i]["claims"])
  list_to_append[0] = list_to_append[0][3:]
  
  if(len(list_to_append) >= sentences_num_param):
    list_to_append = list_to_append[:sentences_num_param] 
  else:
    list_length = sentences_num_param - len(list_to_append)
    while list_length > 0:
      list_to_append.append("")
      list_length = list_length - 1
    
  claims_mod_list.append(list_to_append)
  

# insert claims_mod to train_df
train_df.insert(0, "claims_mod", claims_mod_list)
print(train_df.head(20))
print(type(train_df["claims_mod"][0]))


# Convert this to many sentences within claims for validation data
claims_mod_list = []

for i in range(len(validation_df)):  
  list_to_append = re.split(r'\s+\d+\.\s+', validation_df.iloc[i]["claims"])
  list_to_append[0] = list_to_append[0][3:]
  
  if(len(list_to_append) >= sentences_num_param):
    list_to_append = list_to_append[:sentences_num_param] # retain only the first 10 elements in the list
  else:
    list_length = sentences_num_param - len(list_to_append)
    while list_length > 0:
      list_to_append.append("")
      list_length = list_length - 1
    
  claims_mod_list.append(list_to_append)
  

# insert claims_mod to validation_df
print("--------------------------------------------------------------------")
validation_df.insert(0, "claims_mod", claims_mod_list)
print(validation_df.head(20))
print(type(validation_df["claims_mod"][0]))

                                           claims_mod  Unnamed: 0  \
0   [1. (canceled), A sterilized, integrated, one ...       17944   
1   [A lipolytic enzyme variant or an active fragm...        1686   
2   [An image display device comprising: a display...        3522   
3   [An abuse-proofed dosage form thermoformed by ...        6639   
4   [A process for the preparation of a solid part...       18595   
5   [. (canceled), A bonded magnet comprising the ...        7294   
6   [20. (canceled), A surgical device, comprising...        2903   
7   [A remote node (RN) comprising: a downstream p...       18753   
8   [A system for full motion capture and haptic f...       13371   
9   [22. (canceled), A device comprising a scaffol...       12593   
10  [A method for manufacturing a motor bobbin aro...        6759   
11  [A display device comprising: a transistor com...       11653   
12  [A method for securing an implant to a bone of...        7226   
13  [A method for operating a fuel

In [10]:
from sentence_transformers import SentenceTransformer, models

class PatentLLM(nn.Module):   
  def __init__(self, model_name, dim_model = 768, num_labels = 2, sentences_num = 15):    

    super(PatentLLM, self).__init__()

    device = torch.device("cuda")

    # Initiate the base model to be used
    word_embedding_model = models.Transformer(model_name, max_seq_length = 512)
    # Initiate the pooling method for the base model after processed (i.e. mean, max, etc.)
    pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension(), pooling_mode_max_tokens = False, pooling_mode_mean_tokens = False, pooling_mode_mean_sqrt_len_tokens = True)
    # Initiate sentence transformer; change this to Scibert, Roberta, Bert, DistilBert, etc. in the modules piece
    self.model = SentenceTransformer(modules=[word_embedding_model, pooling_model], device='cuda') 
        
    # Transformer layers
    self.encoder_layer1 = nn.TransformerEncoderLayer(d_model=dim_model, nhead=8)
    self.encoder_layer2 = nn.TransformerEncoderLayer(d_model=dim_model, nhead=8)
    self.encoder_layer3 = nn.TransformerEncoderLayer(d_model=dim_model, nhead=8)

    # linear layer 
    self.linear_layer = nn.Linear(dim_model, dim_model)

    # Dropout layer 
    self.dropout_layer = nn.Dropout(0.25)

    # classification layer
    self.classification_layer = nn.Linear(dim_model * sentences_num, num_labels)

  def forward(self, batch_data):
    tokenized_batch_data = []

    # Process data
    for i in range(len(batch_data)):
      result_ith = torch.tensor(self.model.encode(batch_data.iloc[i]["claims_mod"]))
      tokenized_batch_data.append(result_ith)

    # convert this into a tensor 
    result = torch.stack(tokenized_batch_data).to(device)
    
    # pass thru transformer encoder layers 
    result = self.encoder_layer1(result)
    result = self.encoder_layer2(result)
    result = self.encoder_layer3(result)

    # pass thru a linear layer 
    result = self.linear_layer(result)

    # pass thru a dropout layer 
    result = self.dropout_layer(result)

    # Flatten the result
    result = result.view(result.shape[0], -1)

    # pass thru a classification layer 
    logits = self.classification_layer(result)

    return logits

In [11]:
from torch.utils.data import Dataset, DataLoader

batch_size_param = 16

def collate_fn(list_items):
  x = []
  y = []
  for x_, y_ in list_items:
      x.append(x_)
      y.append(y_)
  return x, y

class CustomDataSet(Dataset):
  def __init__(self, train_df_data, validation_df_data):
      self.x = train_df_data
      self.y = validation_df_data
  def __getitem__(self,index):
      return self.x[index],self.y[index]
  def __len__(self):
      return len(self.x)

g = torch.Generator()
g.manual_seed(42)

h = torch.Generator()
h.manual_seed(42)

train_preload = CustomDataSet(train_df["claims_mod"].tolist(), train_df["output_onehot"].tolist())
train_dataloader = DataLoader(train_preload, batch_size=batch_size_param, collate_fn=collate_fn, shuffle = True, generator = g)

validation_preload = CustomDataSet(validation_df["claims_mod"].tolist(), validation_df["output_onehot"].tolist())
validation_dataloader = DataLoader(validation_preload, batch_size=batch_size_param, collate_fn=collate_fn, shuffle = True, generator = h)

In [12]:
from transformers import get_linear_schedule_with_warmup, AdamW

epochs = 20
model = PatentLLM("allenai/scibert_scivocab_uncased", sentences_num=sentences_num_param)

print(model)

optimizer = AdamW(model.parameters(),
                  lr = 2e-5, 
                  eps = 1e-8 
                )

total_steps = len(train_dataloader) * epochs

scheduler = get_linear_schedule_with_warmup(optimizer, 
                                            num_warmup_steps = 0, # Default value in run_glue.py
                                            num_training_steps = total_steps)

# Implement F1 Score
def flat_accuracy(preds, labels):

    pred_flat = torch.argmax(preds, dim=1).flatten()  
    labels_flat = torch.argmax(labels, dim=1).flatten()  

    # Move logits and labels to CPU
    pred_flat_num = pred_flat.detach().cpu().numpy()
    labels_flat_num = labels_flat.to('cpu').numpy()
        
    accuracy = np.sum(pred_flat_num == labels_flat_num) / len(labels_flat_num)
    
    # accuracy: 0
    # pred_flat: 1
    # labels_flat: 2
    return_package = [accuracy, pred_flat_num, labels_flat_num]
    
    return return_package
  

def format_time(elapsed):   
    elapsed_rounded = int(round((elapsed)))
    
    return str(datetime.timedelta(seconds=elapsed_rounded))

# Functions for saving and loading model parameters and metrics.
def save_checkpoint(path, model, valid_loss):
    torch.save({'model_state_dict': model.state_dict(),
                  'valid_loss': valid_loss}, path)
    
def load_checkpoint(path, model):    
    state_dict = torch.load(path)
    model.load_state_dict(state_dict['model_state_dict'])
    
    return state_dict['valid_loss']

Some weights of the model checkpoint at allenai/scibert_scivocab_uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


PatentLLM(
  (model): SentenceTransformer(
    (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel 
    (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': True})
  )
  (encoder_layer1): TransformerEncoderLayer(
    (self_attn): MultiheadAttention(
      (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True)
    )
    (linear1): Linear(in_features=768, out_features=2048, bias=True)
    (dropout): Dropout(p=0.1, inplace=False)
    (linear2): Linear(in_features=2048, out_features=768, bias=True)
    (norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    (norm2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    (dropout1): Dropout(p=0.1, inplace=False)
    (dropout2): Dropout(p=0.1, inplace=False)
  )
  (encoder_layer2): TransformerEncoderLayer(
    (self



In [13]:
# Reference: https://colab.research.google.com/drive/1b0tRPNXHEFReOP5xY4eIWsl8G-Dma7ZN#scrollTo=uDLZmEC_oKo3

!pip install torchmetrics

import time
import datetime
import random
from torch.nn import BCEWithLogitsLoss, BCELoss
from torchmetrics.classification import BinaryF1Score, BinaryPrecision, BinaryRecall

model.cuda()
device = torch.device("cuda")

# Check parameters count
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(count_parameters(model))

best_valid_loss = 1000

for epoch_i in range(0, epochs):
  target_list = []
  output_list = []

  if(epoch_i == 0):
    # Initial Validation
    print("Running Initial Validation...")

    t0 = time.time()
    eval_accuracy = 0
    eval_step = 0

    model.eval()

    # Initial Validation so no grad here
    with torch.no_grad():
      for step, batch in enumerate(validation_dataloader):       
              
        # convert to dataframe
        validation_df_inputs_mod = pd.DataFrame({'claims_mod': batch[0]})
        # convert to dataframe
        validation_df_labels_mod = pd.DataFrame({'labels': batch[1]})
        # join the two dataframes
        validation_df_combined = pd.concat([validation_df_inputs_mod, validation_df_labels_mod], axis=1)        
        
        # get the logits ex. (16 samples x 2 neurons)
        logits = model(validation_df_combined)
       
        # calculate the loss
        loss_func = BCEWithLogitsLoss() 

        # reconvert validation_df_labels_mod into num_label columns        
        validation_df_labels_mod[['REJECTED','ACCEPTED']] = pd.DataFrame(validation_df_labels_mod.labels.tolist(), index=validation_df_labels_mod.index)        
        validation_df_labels_mod = validation_df_labels_mod.drop('labels', axis=1)
        
        label_tensor = torch.from_numpy(validation_df_labels_mod.to_numpy().astype(np.float32)).to(device)
    
        return_values = flat_accuracy(logits, label_tensor)                
        
        eval_accuracy += return_values[0]
        output_list.append(return_values[1]) 
        target_list.append(return_values[2])  
        
        eval_step = eval_step + 1
        
      outputs_concat = torch.tensor(np.concatenate(output_list))  
      targets_concat = torch.tensor(np.concatenate(target_list))
        
      metric = BinaryF1Score()
      print(">>>>>>>>>>> F1 Score: ", metric(outputs_concat, targets_concat))

      metric1 = BinaryPrecision()
      print(">>>>>>>>>>> Precision Score:", metric1(outputs_concat, targets_concat))

      metric2 = BinaryRecall()
      print(">>>>>>>>>>> Recall Score: ", metric2(outputs_concat, targets_concat))

      # Report the final accuracy for this validation run.
      print("  Accuracy: {0:.5f}".format(eval_accuracy/eval_step))
      print("  Validation took: {:}".format(format_time(time.time() - t0)))

      # Reset target output list to none again
      target_list = []
      output_list = []
      outputs_concat = None
      targets_concat = None
          
      print("Initial Validation Accuracy level: ", (eval_accuracy / eval_step)) 

  # Training

  print("")
  print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
  print('Training...')

  # Reset the total loss for this epoch.
  total_loss = 0
  total_val_loss = 0
  # Put the model in training mode
  model.train()

  # measure total loss
  total_loss = 0 

  for step, batch in enumerate(train_dataloader):      

    # Progress update every 40 batches.
    if step % 40 == 0 and not step == 0:
        # Calculate elapsed time in minutes.
        elapsed = format_time(time.time() - t0)    
        # Report progress.
        print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(train_dataloader), elapsed))

    # convert to dataframe
    validation_df_inputs_mod = pd.DataFrame({'claims_mod': batch[0]})
    # convert to dataframe
    validation_df_labels_mod = pd.DataFrame({'labels': batch[1]})
    # join the two dataframes
    validation_df_combined = pd.concat([validation_df_inputs_mod, validation_df_labels_mod], axis=1)        

    # Clear out gradient
    model.zero_grad()
    
    # get the logits ex. (16 samples x 2 neurons)
    logits = model(validation_df_combined)
    
    # calculate the loss
    loss_func = BCEWithLogitsLoss()  

    # reconvert validation_df_labels_mod into num_label columns        
    validation_df_labels_mod[['REJECTED','ACCEPTED']] = pd.DataFrame(validation_df_labels_mod.labels.tolist(), index=validation_df_labels_mod.index)        
    validation_df_labels_mod = validation_df_labels_mod.drop('labels', axis=1)

    # print(validation_df_labels_mod.head())
    label_tensor = torch.from_numpy(validation_df_labels_mod.to_numpy().astype(np.float32)).to(device)    

    #convert labels to float for calculation
    loss = loss_func(logits,label_tensor) 

    # Add to Total Loss
    total_loss = total_loss + loss.item()

    loss.backward()

    # Clip the norm of the gradients to 1.0.
    torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

    optimizer.step()

    scheduler.step()

  # Calculate the average loss over the training data.
  avg_train_loss = total_loss / len(train_dataloader)    
  
  print("")
  print("  Average training loss: {0:.5f}".format(avg_train_loss))
  print("  Training epoch took: {:}".format(format_time(time.time() - t0)))
  
  # Validation

  print("\nRunning Validation...")
  eval_accuracy = 0
  eval_step = 0

  model.eval()

  # Make sure no grad is here
  with torch.no_grad():
    for step, batch in enumerate(validation_dataloader):       
            
      # convert to dataframe
      validation_df_inputs_mod = pd.DataFrame({'claims_mod': batch[0]})
      # convert to dataframe
      validation_df_labels_mod = pd.DataFrame({'labels': batch[1]})
      # join the two dataframes
      validation_df_combined = pd.concat([validation_df_inputs_mod, validation_df_labels_mod], axis=1)        
      
      # get the logits ex. (16 samples x 2 neurons)
      logits = model(validation_df_combined)
    
      # calculate the loss
      loss_func = BCEWithLogitsLoss()     

      # reconvert validation_df_labels_mod into num_label columns        
      validation_df_labels_mod[['REJECTED','ACCEPTED']] = pd.DataFrame(validation_df_labels_mod.labels.tolist(), index=validation_df_labels_mod.index)        
      validation_df_labels_mod = validation_df_labels_mod.drop('labels', axis=1)
      
      label_tensor = torch.from_numpy(validation_df_labels_mod.to_numpy().astype(np.float32)).to(device)
          
      loss = loss_func(logits,label_tensor) #convert labels to float for calculation
      # Add to Total Loss
      total_val_loss = total_val_loss + loss.item()

      return_values = flat_accuracy(logits, label_tensor)
        
      eval_accuracy += return_values[0]
      output_list.append(return_values[1]) 
      target_list.append(return_values[2])  
        
      eval_step = eval_step + 1
        
    outputs_concat = torch.tensor(np.concatenate(output_list))  
    targets_concat = torch.tensor(np.concatenate(target_list))

    metric = BinaryF1Score()
    print(">>>>>>>>>>> F1 Score: ", metric(outputs_concat, targets_concat))

    metric1 = BinaryPrecision()
    print(">>>>>>>>>>> Precision Score:", metric1(outputs_concat, targets_concat))

    metric2 = BinaryRecall()
    print(">>>>>>>>>>> Recall Score: ", metric2(outputs_concat, targets_concat))

    # Report the final accuracy for this validation run.
    print("  Accuracy: {0:.5f}".format(eval_accuracy/eval_step))
    print("  Validation took: {:}".format(format_time(time.time() - t0)))

    # Reset target output list to none again
    target_list = []
    output_list = []
    outputs_concat = None
    targets_concat = None
 
  # Calculate the average loss over the validation data.
  avg_val_loss = total_val_loss / len(validation_dataloader) 
  # checkpoint
  if best_valid_loss > avg_val_loss:
    best_valid_loss = avg_val_loss
    save_checkpoint('model.pkl', model, best_valid_loss)

  print("Validation Accuracy level: ", (eval_accuracy / eval_step))


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
127089410
Running Initial Validation...
>>>>>>>>>>> F1 Score:  tensor(0.6654)
>>>>>>>>>>> Precision Score: tensor(0.4997)
>>>>>>>>>>> Recall Score:  tensor(0.9956)
  Accuracy: 0.49940
  Validation took: 0:06:18
Initial Validation Accuracy level:  0.49940095846645366

Training...
  Batch    40  of  1,250.    Elapsed: 0:07:06.
  Batch    80  of  1,250.    Elapsed: 0:07:55.
  Batch   120  of  1,250.    Elapsed: 0:08:44.
  Batch   160  of  1,250.    Elapsed: 0:09:31.
  Batch   200  of  1,250.    Elapsed: 0:10:19.
  Batch   240  of  1,250.    Elapsed: 0:11:07.
  Batch   280  of  1,250.    Elapsed: 0:11:56.
  Batch   320  of  1,250.    Elapsed: 0:12:45.
  Batch   360  of  1,250.    Elapsed: 0:13:32.
  Batch   400  of  1,250.    Elapsed: 0:14:20.
  Batch   440  of  1,250.    Elapsed: 0:15:09.
  Batch   480  of  1,250.    Elapsed: 0:15:57.
  Batch   520  of  1,250.    Elapsed: 0:16:44.
  Batch   

In [14]:
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns
import tensorflow as tf

def evaluate(model, validation_dataloader):
    y_pred = []
    y_true = []

    model.eval()
    with torch.no_grad():
      for step, batch in enumerate(validation_dataloader):       
            
        # convert to dataframe
        validation_df_inputs_mod = pd.DataFrame({'claims_mod': batch[0]})
        # convert to dataframe
        validation_df_labels_mod = pd.DataFrame({'labels': batch[1]})
        # join the two dataframes
        validation_df_combined = pd.concat([validation_df_inputs_mod, validation_df_labels_mod], axis=1)        
        
        # get the logits ex. (16 samples x 2 neurons)
        logits = model(validation_df_combined)

        # reconvert validation_df_labels_mod into num_label columns        
        validation_df_labels_mod[['REJECTED','ACCEPTED']] = pd.DataFrame(validation_df_labels_mod.labels.tolist(), index=validation_df_labels_mod.index)        
        validation_df_labels_mod = validation_df_labels_mod.drop('labels', axis=1)
        
        # print(validation_df_labels_mod.head())
        label_tensor = torch.from_numpy(validation_df_labels_mod.to_numpy().astype(np.float32)).to(device)
        y_pred += torch.argmax(logits, dim=1).flatten()  
        y_true += torch.argmax(label_tensor, dim=1).flatten()
        
    print('Classification Report:')
    print(classification_report(y_true, y_pred, labels=range(2), digits=4))

    plt.figure(figsize=(15, 15))    
    cm = confusion_matrix(y_true, y_pred, labels=range(2))
    ax = plt.subplot()
    sns.heatmap(cm, annot=True, ax = ax, cmap="YlGnBu", fmt="d")
    ax.set_title('Confusion Matrix')
    ax.set_xlabel('Predicted Labels')
    ax.set_ylabel('True Labels')
    ax.xaxis.set_ticklabels(range(2))
    ax.yaxis.set_ticklabels(range(2))