In [1]:
!nvidia-smi

Thu Apr 28 18:16:00 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 510.54       Driver Version: 510.54       CUDA Version: 11.6     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A40          On   | 00000000:01:00.0 Off |                  Off |
|  0%   32C    P0    76W / 300W |   9409MiB / 49140MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  NVIDIA A40          On   | 00000000:23:00.0 Off |                  Off |
|  0%   31C    P0    76W / 300W |      2MiB / 49140MiB |      0%      Default |
|       

In [2]:
import pandas as pd
import numpy as np
import random
from tqdm.auto import tqdm
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from transformers import BertTokenizerFast as BertTokenizer, BertModel, AdamW, get_linear_schedule_with_warmup
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, multilabel_confusion_matrix
from torch.cuda.amp import autocast as autocast
from torch.cuda.amp import GradScaler as GradScaler
import os


torch.cuda.set_device(1)
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

BERT_MODEL_NAME = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

EPOCHS = 1
MAX_TOKEN_COUNT = 128
BATCH_SIZE = 64

In [3]:
def setup_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
# 设置随机数种子
setup_seed(20)

In [4]:
%%time 
train_df=pd.read_feather("./autodl-nas/USPTO-2M_Training.feather")
val_df=pd.read_feather("./autodl-nas/USPTO-2M_Validation.feather")

LABEL_COLUMNS=train_df.columns[11:]
LABEL_COLUMNS

CPU times: user 29.8 s, sys: 1min 57s, total: 2min 27s
Wall time: 45.3 s


Index(['A41D', 'A62B', 'A41B', 'D06N', 'A42B', 'A43B', 'D06B', 'A41F', 'E03D',
       'A47K',
       ...
       'Y02D', 'F24V', 'H04T', 'G16B', 'G16C', 'G16Z', 'G21J', 'G16Y', 'G06J',
       'E99Z'],
      dtype='object', length=664)

In [9]:
class PatentDataset(Dataset):
    """
    Pass pandas dataframe, and tokeizer along with the max token length[128 default]
    
    Example: 
    -------
    train_dataset = ToxicCommentsDataset(
      train_df,
      tokenizer,
      max_token_len=MAX_TOKEN_COUNT
    )

    sample_item = train_dataset[0]
    
    """
    
    
    def __init__(
        self,
        data: pd.DataFrame,
        tokenizer: BertTokenizer,
        max_token_len: int = 512,
        test= False
    ):
        self.data = data
        self.tokenizer = tokenizer
        self.max_token_len = max_token_len
        self.test = test
        
    
    def __len__(self):
        return len(self.data)
    
    
    def __getitem__(self, index: int):
        data_row = self.data.iloc[index]
#         comment_text = "".join(data_row.claims)
        comment_text = data_row.claims[0]

        if not self.test:
            labels = data_row[LABEL_COLUMNS]
        
        encoding = self.tokenizer.encode_plus(
            comment_text,
            max_length=128,
            padding="max_length",
            truncation=True,
            add_special_tokens=True, # [CLS] & [SEP]
            return_token_type_ids=False,
            return_attention_mask=True, #attention_mask
            return_tensors='pt',
        )
        
        if not self.test:
            return dict(
#             comment_text=comment_text,
            input_ids = encoding["input_ids"].flatten(),
            attention_mask=encoding["attention_mask"].flatten(),
            labels=torch.FloatTensor(labels)
        )
        else:
            return dict(
#                 comment_text=comment_text,
                input_ids = encoding["input_ids"].flatten(),
                attention_mask=encoding["attention_mask"].flatten()
            )
        

In [10]:
train_dataset = PatentDataset(
  train_df,
  tokenizer,
  max_token_len=128
)

val_dataset = PatentDataset(
  val_df,
  tokenizer,
  max_token_len=128
)

In [11]:
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True,drop_last = True,pin_memory=True)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, drop_last = True,pin_memory=True)

In [12]:
# Contrastive learning loss funcation

class ConLoss(nn.Module):
    
    def __init__(self, temperature = 0.1):
        super().__init__()
        self.temperature = temperature
        
    def forward(self, embeddings, labels):
       
        # embeddings (batch_size,bert_hidden_layer) , labels (batch_size,num_classes)
        similarities = F.cosine_similarity(embeddings.unsqueeze(1),embeddings.unsqueeze(0),dim=2)/self.temperature
#         print(similarities)
        
        # mask
        logits_mask = ~torch.eye(BATCH_SIZE,dtype=torch.bool).to(device)
        labels_mask = ~torch.eye(BATCH_SIZE,dtype=torch.bool).to(device)

        exp_logits = torch.exp(similarities) * logits_mask
        loss = torch.log(exp_logits.sum(1,keepdim=True)) - similarities
        
        # labels 
        weight = torch.matmul(labels,labels.T)
        # weight = weight - torch.diag_embed(torch.diag(weight))
        # total_weight = torch.sum(weight,dim=1,keepdim=True)
        # weight = torch.where(total_weight!=0,weight/total_weight,weight)
        
        weight = weight / torch.diag(weight)
        weight = weight * labels_mask
        weight = torch.where(weight!=1,torch.zeros_like(weight).to(device),weight)
        weight = weight/torch.sum(weight)
#         print(weight)
        
        loss = weight * loss
        loss = torch.mean(loss)
        return loss
        


In [13]:
class BertNetwork(nn.Module):

    def __init__(self, n_classes: int, n_training_steps=None, n_warmup_steps=None):
        super().__init__()
        self.bert = BertModel.from_pretrained(BERT_MODEL_NAME, return_dict=True) #load the pretrained bert model
        self.n_training_steps = n_training_steps
        self.n_warmup_steps = n_warmup_steps
        
    def forward(self, input_ids, attention_mask):
        output = self.bert(input_ids, attention_mask=attention_mask)
        output = output.last_hidden_state[:,0]

        return output

In [14]:
class Classifier(nn.Module):
    
    def __init__(self, n_classes: int ):
        super().__init__()
        self.classifier = nn.Linear(768,n_classes)
        self.dropout = nn.Dropout(0.10)
        self.criterion = nn.BCEWithLogitsLoss()
        
    def forward(self,embeddings,labels = None):
        
        output = self.classifier(self.dropout(embeddings))
        
        if labels is not None:
            loss = self.criterion(output,labels)
            output = (loss,output)
        
        return output

In [15]:
model = BertNetwork(len(LABEL_COLUMNS)).to(device)
Conloss =  ConLoss().to(device)
classifier = Classifier(len(LABEL_COLUMNS)).to(device)
N_EPOCHS = EPOCHS

steps_per_epoch=len(train_df) // BATCH_SIZE
total_training_steps = steps_per_epoch * N_EPOCHS
warmup_steps = total_training_steps // 10
warmup_steps, total_training_steps

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


(3188, 31880)

In [16]:
# optimizer = AdamW(model.parameters(), lr=2e-4)

optimizer = AdamW([
                {'params': model.parameters()},
                {'params': classifier.parameters()}],
                lr=2e-4
)

scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=warmup_steps,
    num_training_steps=total_training_steps
)



In [17]:
from sklearn.metrics import roc_auc_score
from sklearn.metrics import precision_score

# function for evaluating the model
def evaluate(mydataloader):

    print("\nEvaluating...")
    #t0 = time.time()
    # deactivate dropout layers
    model.eval()
    classifier.eval()
    total_loss, total_accuracy = 0, 0

    # empty list to save the model predictions
    total_preds = []
    total_labels = []

    # iterate over batches
    for step,batch in tqdm(enumerate(mydataloader),total=len(mydataloader),desc='Eval'):

        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)   
        # deactivate autograd
        with autocast():
            with torch.no_grad():
                outputs = model(input_ids, attention_mask)
                loss1 = Conloss(outputs,labels)
                loss2,outputs = classifier(outputs,labels)
                if step%500 ==0:
                    print(f"loss contrastive :{loss1}  loss classifier: {loss2}")
                outputs = torch.sigmoid(outputs)

                loss = loss2
                total_loss = total_loss + loss.float().item()

                outputs = outputs.detach().float().cpu().numpy()
                labels = labels.detach().float().cpu().numpy()
                total_preds.append(outputs)
                total_labels.append(labels)


    # compute the validation loss of the epoch
    avg_loss = total_loss / len(mydataloader)

    total_preds  = np.concatenate(total_preds, axis=0)
    total_labels = np.concatenate(total_labels, axis=0)
    model.train()
    print(f"Evaluate loss {total_loss / len(mydataloader)}")
    return avg_loss, total_preds, total_labels

In [18]:
# function to train the model
def train():
    
    now=time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
    best_valid_loss = float('inf')
    model.train()
    classifier.train()
    total_loss, total_accuracy = 0, 0
    avg_loss = 0
    scaler = GradScaler()

    # empty list to save model predictions
    total_preds=[]
    # iterate over batches
    for step,batch in tqdm(enumerate(train_dataloader),total=len(train_dataloader),desc="Train"):
        
        if step%1000 == 0 and step!=0:
            valid_loss,_,_ = evaluate(val_dataloader)
            if valid_loss < best_valid_loss:
                best_valid_loss = valid_loss
                torch.save(model.state_dict(), f"./model/Classfication_Contrastive_model{now}.pt")
                torch.save(classifier.state_dict(),f"./model/Classfication_Contrastive_classifier{now}.pt")
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)  
        if step > 0.6*len(train_dataloader):
            with autocast():
                outputs = model(input_ids, attention_mask)
                loss1 = Conloss(outputs,labels)
                loss2,_ = classifier(outputs,labels)
                loss = loss1 + loss2

            optimizer.zero_grad()
            if step%200 == 0:
                print(f"STEP {step}: loss contrastive :{loss1}  loss classifier: {loss2}")
        else:
            with autocast():
                outputs = model(input_ids, attention_mask)
                loss,_ = classifier(outputs,labels)
            optimizer.zero_grad()
            if step%200 == 0:
                print(f"STEP {step}: loss classifier: {loss}")
        # add on to the total loss
        total_loss = total_loss + loss.item()
        scaler.scale(loss).backward()

        scaler.unscale_(optimizer)
        # clip the the gradients to 1.0. It helps in preventing the exploding gradient problem
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        scaler.step(optimizer)
        scheduler.step()
        scaler.update()
        # model predictions are stored on GPU. So, push it to CPU
        outputs=outputs.detach().float().cpu().numpy()

        # append the model predictions
        total_preds.append(outputs)

    # compute the training loss of the epoch
    avg_loss = total_loss / len(train_dataloader)
    print(f"{step}: {avg_loss}")
  

    # predictions are in the form of (no. of batches, size of batch, no. of classes).
    # reshape the predictions in form of (number of samples, no. of classes)
    total_preds  = np.concatenate(total_preds, axis=0)

    #returns the loss and predictions
    return avg_loss, total_preds

In [None]:
train_loss, _ = train()

Train:   0%|          | 0/31880 [00:00<?, ?it/s]

STEP 0: loss classifier: 0.7028471231460571
STEP 200: loss classifier: 0.35217714309692383
STEP 400: loss classifier: 0.1153893694281578
STEP 600: loss classifier: 0.04613625630736351
STEP 800: loss classifier: 0.026719212532043457

Evaluating...


Eval:   0%|          | 0/817 [00:00<?, ?it/s]

loss contrastive :0.0010096090845763683  loss classifier: 0.019642315804958344
loss contrastive :0.0010098202619701624  loss classifier: 0.01739700883626938
Evaluate loss 0.01907718942627183
STEP 1000: loss classifier: 0.01976439729332924
STEP 1200: loss classifier: 0.015366089530289173
STEP 1400: loss classifier: 0.013122770935297012
STEP 1600: loss classifier: 0.012950985692441463
STEP 1800: loss classifier: 0.012010379694402218

Evaluating...


Eval:   0%|          | 0/817 [00:00<?, ?it/s]

loss contrastive :0.0008076670928858221  loss classifier: 0.012389399111270905
loss contrastive :0.0008883739355951548  loss classifier: 0.009530974552035332
Evaluate loss 0.011651771189815922
STEP 2000: loss classifier: 0.010989558883011341
STEP 2200: loss classifier: 0.011290882714092731
STEP 2400: loss classifier: 0.010745418258011341
STEP 2600: loss classifier: 0.010385711677372456
STEP 2800: loss classifier: 0.009185759350657463

Evaluating...


Eval:   0%|          | 0/817 [00:00<?, ?it/s]

loss contrastive :0.0006974786520004272  loss classifier: 0.009389062412083149
loss contrastive :0.0008503607241436839  loss classifier: 0.007089272607117891
Evaluate loss 0.008739171581595881
STEP 3000: loss classifier: 0.007758451160043478
STEP 3200: loss classifier: 0.008060689084231853
STEP 3400: loss classifier: 0.008534119464457035
STEP 3600: loss classifier: 0.008640121668577194
STEP 3800: loss classifier: 0.006333112251013517

Evaluating...


Eval:   0%|          | 0/817 [00:00<?, ?it/s]

loss contrastive :0.0006348930182866752  loss classifier: 0.007628263905644417
loss contrastive :0.0008419989608228207  loss classifier: 0.006137077696621418
Evaluate loss 0.007433349250119312
STEP 4000: loss classifier: 0.007641992997378111
STEP 4200: loss classifier: 0.007826059125363827
STEP 4400: loss classifier: 0.008106736466288567
STEP 4600: loss classifier: 0.009081295691430569
STEP 4800: loss classifier: 0.00618650671094656

Evaluating...


Eval:   0%|          | 0/817 [00:00<?, ?it/s]

loss contrastive :0.0005990357021801174  loss classifier: 0.0067250365391373634
loss contrastive :0.0008330449927598238  loss classifier: 0.005329926498234272
Evaluate loss 0.006668284212237947
STEP 5000: loss classifier: 0.005910747218877077
STEP 5200: loss classifier: 0.006799269001930952
STEP 5400: loss classifier: 0.007793578319251537
STEP 5600: loss classifier: 0.0063172997906804085
STEP 5800: loss classifier: 0.005842671263962984

Evaluating...


Eval:   0%|          | 0/817 [00:00<?, ?it/s]

loss contrastive :0.0006300364038906991  loss classifier: 0.006624117493629456
loss contrastive :0.0008568608900532126  loss classifier: 0.005420580506324768
Evaluate loss 0.006292590733806772
STEP 6000: loss classifier: 0.0070038484409451485
STEP 6200: loss classifier: 0.005457405932247639
STEP 6400: loss classifier: 0.005658808629959822
STEP 6600: loss classifier: 0.004498820751905441
STEP 6800: loss classifier: 0.006967401597648859

Evaluating...


Eval:   0%|          | 0/817 [00:00<?, ?it/s]

loss contrastive :0.0006373724900186062  loss classifier: 0.006469070445746183
loss contrastive :0.0008489875472150743  loss classifier: 0.005214997101575136
Evaluate loss 0.006104590747712893
STEP 7000: loss classifier: 0.0062493570148944855
STEP 7200: loss classifier: 0.00613329978659749
STEP 7400: loss classifier: 0.003834730014204979
STEP 7800: loss classifier: 0.004742268938571215

Evaluating...


Eval:   0%|          | 0/817 [00:00<?, ?it/s]

loss contrastive :0.0006640902138315141  loss classifier: 0.006406976375728846
loss contrastive :0.0008472061017528176  loss classifier: 0.004916551988571882
Evaluate loss 0.005950634778993639
STEP 8000: loss classifier: 0.007365568075329065
STEP 8200: loss classifier: 0.0052322098053991795
STEP 8400: loss classifier: 0.006184171885251999
STEP 8600: loss classifier: 0.0063955881632864475
STEP 8800: loss classifier: 0.0060486807487905025

Evaluating...


Eval:   0%|          | 0/817 [00:00<?, ?it/s]

loss contrastive :0.0006103534833528101  loss classifier: 0.005644454155117273
loss contrastive :0.0008496025111526251  loss classifier: 0.004749954678118229
Evaluate loss 0.005747179279327302
STEP 9000: loss classifier: 0.005407894961535931
STEP 9200: loss classifier: 0.007298095617443323
STEP 9400: loss classifier: 0.005883070174604654
STEP 9600: loss classifier: 0.00642632320523262
STEP 9800: loss classifier: 0.0050896708853542805

Evaluating...


Eval:   0%|          | 0/817 [00:00<?, ?it/s]

loss contrastive :0.0006041912129148841  loss classifier: 0.005903243087232113
loss contrastive :0.0008442456019110978  loss classifier: 0.004694532137364149
Evaluate loss 0.0056615441255590615
STEP 10000: loss classifier: 0.005143082235008478
STEP 10200: loss classifier: 0.005611708387732506
STEP 10400: loss classifier: 0.0063965641893446445
STEP 10600: loss classifier: 0.005503572057932615
STEP 10800: loss classifier: 0.003732773708179593

Evaluating...


Eval:   0%|          | 0/817 [00:00<?, ?it/s]

loss contrastive :0.0006237812340259552  loss classifier: 0.005748061463236809
loss contrastive :0.0008347185212187469  loss classifier: 0.004405277781188488
Evaluate loss 0.005592836498258911
STEP 11000: loss classifier: 0.005529221147298813
STEP 11200: loss classifier: 0.005216518417000771
STEP 11400: loss classifier: 0.006083405576646328
STEP 11600: loss classifier: 0.006214025896042585
STEP 11800: loss classifier: 0.004499891772866249

Evaluating...


Eval:   0%|          | 0/817 [00:00<?, ?it/s]

loss contrastive :0.0006190416170284152  loss classifier: 0.0055509526282548904
loss contrastive :0.0008533282089047134  loss classifier: 0.004647146910429001
Evaluate loss 0.005495062562998389
STEP 12000: loss classifier: 0.006479186937212944
STEP 12200: loss classifier: 0.005418709479272366
STEP 12400: loss classifier: 0.005767521448433399
STEP 12600: loss classifier: 0.005843021906912327
STEP 12800: loss classifier: 0.0045595793053507805

Evaluating...


Eval:   0%|          | 0/817 [00:00<?, ?it/s]

loss contrastive :0.0006248630816116929  loss classifier: 0.005825744476169348
loss contrastive :0.000848100520670414  loss classifier: 0.004322818946093321
Evaluate loss 0.0054358031419464905
STEP 13000: loss classifier: 0.004877395462244749
STEP 13200: loss classifier: 0.0053174979984760284


## Training

In [16]:
%%time
# set initial loss to infinite
import time
best_valid_loss = float('inf')
from tqdm.auto import tqdm

# empty lists to store training and validation loss of each epoch
train_losses=[]
valid_losses=[]

now=time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
#for each epoch
for epoch in tqdm(range(EPOCHS)):
     
    print('\n Epoch {:} / {:}'.format(epoch + 1, EPOCHS))
    
    #train model
    train_loss, _ = train()
    
    #evaluate model
    valid_loss, _, _ = evaluate(val_dataloader)
    
    #save the best model
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), f"./Baseline_abstract_model{now}.pt")
    
    # append training and validation loss
    train_losses.append(train_loss)
    valid_losses.append(valid_loss)
    

HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


 Epoch 1 / 5


HBox(children=(FloatProgress(value=0.0, description='Train', max=1335.0, style=ProgressStyle(description_width…

STEP 0: loss contrastive :0.0010610689641907811  loss classifier: 0.6774331331253052
STEP 50: loss contrastive :0.0010139003861695528  loss classifier: 0.38425585627555847
STEP 100: loss contrastive :0.0009989901445806026  loss classifier: 0.3425288498401642
STEP 150: loss contrastive :0.0009738221997395158  loss classifier: 0.312829852104187
STEP 200: loss contrastive :0.0009476971463300288  loss classifier: 0.2964755892753601
STEP 250: loss contrastive :0.0009283071267418563  loss classifier: 0.29871895909309387
STEP 300: loss contrastive :0.0009683565585874021  loss classifier: 0.29144302010536194
STEP 350: loss contrastive :0.0009231276926584542  loss classifier: 0.268029123544693
STEP 400: loss contrastive :0.0009250261355191469  loss classifier: 0.27633774280548096
STEP 450: loss contrastive :0.0009329313761554658  loss classifier: 0.292875736951828
STEP 500: loss contrastive :0.0009649590938352048  loss classifier: 0.28385740518569946
STEP 550: loss contrastive :0.00094192818505

HBox(children=(FloatProgress(value=0.0, description='Eval', max=70.0, style=ProgressStyle(description_width='i…

loss contrastive :0.0008807076374068856  loss classifier: 0.21857139468193054
loss contrastive :0.0009065266931429505  loss classifier: 0.23401078581809998
loss contrastive :0.0009554822463542223  loss classifier: 0.24336905777454376
loss contrastive :0.0009160935296677053  loss classifier: 0.24640393257141113
loss contrastive :0.0009338076924905181  loss classifier: 0.25457191467285156
loss contrastive :0.0009518761653453112  loss classifier: 0.24289177358150482
loss contrastive :0.0009178954060189426  loss classifier: 0.21809642016887665

Evaluate loss 0.239400030033929

 Epoch 2 / 5


HBox(children=(FloatProgress(value=0.0, description='Train', max=1335.0, style=ProgressStyle(description_width…

STEP 0: loss contrastive :0.0009442084701731801  loss classifier: 0.2308642864227295
STEP 50: loss contrastive :0.0009086896316148341  loss classifier: 0.24797853827476501
STEP 100: loss contrastive :0.0008787601254880428  loss classifier: 0.24488425254821777
STEP 150: loss contrastive :0.0009530895622447133  loss classifier: 0.2338808923959732
STEP 200: loss contrastive :0.000961251906119287  loss classifier: 0.2411338984966278
STEP 250: loss contrastive :0.000928273017052561  loss classifier: 0.2574857175350189
STEP 300: loss contrastive :0.0008967827307060361  loss classifier: 0.2313375324010849
STEP 350: loss contrastive :0.000887261179741472  loss classifier: 0.2302122563123703
STEP 400: loss contrastive :0.0009499300504103303  loss classifier: 0.23939299583435059
STEP 450: loss contrastive :0.0009526830399408937  loss classifier: 0.24924124777317047
STEP 500: loss contrastive :0.0009005170431919396  loss classifier: 0.23633794486522675
STEP 550: loss contrastive :0.00089170970022

HBox(children=(FloatProgress(value=0.0, description='Eval', max=70.0, style=ProgressStyle(description_width='i…

loss contrastive :0.0008893839549273252  loss classifier: 0.20146054029464722
loss contrastive :0.0008835346670821309  loss classifier: 0.2135966420173645
loss contrastive :0.0009539992897771299  loss classifier: 0.22898778319358826
loss contrastive :0.0008915557991713285  loss classifier: 0.2200794667005539
loss contrastive :0.0009340487304143608  loss classifier: 0.23876436054706573
loss contrastive :0.0009146474185399711  loss classifier: 0.22410865128040314
loss contrastive :0.0009144307114183903  loss classifier: 0.20079663395881653

Evaluate loss 0.224818488742624

 Epoch 3 / 5


HBox(children=(FloatProgress(value=0.0, description='Train', max=1335.0, style=ProgressStyle(description_width…

STEP 0: loss contrastive :0.0008946646121330559  loss classifier: 0.215729758143425
STEP 50: loss contrastive :0.000933293136768043  loss classifier: 0.21107187867164612
STEP 100: loss contrastive :0.0009095509303733706  loss classifier: 0.20472170412540436
STEP 150: loss contrastive :0.0008889230666682124  loss classifier: 0.2009972184896469
STEP 200: loss contrastive :0.000898425467312336  loss classifier: 0.220720112323761
STEP 250: loss contrastive :0.0008432338945567608  loss classifier: 0.19577822089195251
STEP 300: loss contrastive :0.0009300133096985519  loss classifier: 0.20456856489181519
STEP 350: loss contrastive :0.0009636440081521869  loss classifier: 0.21720096468925476
STEP 400: loss contrastive :0.0009122542687691748  loss classifier: 0.21185627579689026
STEP 450: loss contrastive :0.0009121580515056849  loss classifier: 0.2047554850578308
STEP 500: loss contrastive :0.0009195298771373928  loss classifier: 0.19961179792881012
STEP 550: loss contrastive :0.0009489330695

HBox(children=(FloatProgress(value=0.0, description='Eval', max=70.0, style=ProgressStyle(description_width='i…

loss contrastive :0.0008923079585656524  loss classifier: 0.2057870328426361
loss contrastive :0.0008866586722433567  loss classifier: 0.21777421236038208
loss contrastive :0.0009292339673265815  loss classifier: 0.218560591340065
loss contrastive :0.0008858245564624667  loss classifier: 0.2149192988872528
loss contrastive :0.0009333193302154541  loss classifier: 0.24170167744159698
loss contrastive :0.0009262507664971054  loss classifier: 0.21084506809711456
loss contrastive :0.0009197652107104659  loss classifier: 0.18989942967891693

Evaluate loss 0.22155216634273528

 Epoch 4 / 5


HBox(children=(FloatProgress(value=0.0, description='Train', max=1335.0, style=ProgressStyle(description_width…

STEP 0: loss contrastive :0.0009148622630164027  loss classifier: 0.18414299190044403
STEP 50: loss contrastive :0.000876838224940002  loss classifier: 0.16890180110931396
STEP 100: loss contrastive :0.0008993896190077066  loss classifier: 0.16637739539146423
STEP 150: loss contrastive :0.0009161243215203285  loss classifier: 0.1544165462255478
STEP 200: loss contrastive :0.0008454859489575028  loss classifier: 0.19036753475666046
STEP 250: loss contrastive :0.0008776029571890831  loss classifier: 0.17521975934505463
STEP 300: loss contrastive :0.000863123219460249  loss classifier: 0.15643495321273804
STEP 350: loss contrastive :0.0008465180872008204  loss classifier: 0.17740429937839508
STEP 400: loss contrastive :0.0008260203758254647  loss classifier: 0.17282404005527496
STEP 450: loss contrastive :0.0008817882044240832  loss classifier: 0.18425744771957397
STEP 500: loss contrastive :0.0008810520521365106  loss classifier: 0.17513351142406464
STEP 550: loss contrastive :0.00083236

HBox(children=(FloatProgress(value=0.0, description='Eval', max=70.0, style=ProgressStyle(description_width='i…

loss contrastive :0.000891766743734479  loss classifier: 0.20928114652633667
loss contrastive :0.0008764481754042208  loss classifier: 0.2221929430961609
loss contrastive :0.0009337436640635133  loss classifier: 0.22371983528137207
loss contrastive :0.0008815022883936763  loss classifier: 0.21296896040439606
loss contrastive :0.0009235467296093702  loss classifier: 0.2502913475036621
loss contrastive :0.000922387174796313  loss classifier: 0.22078891098499298
loss contrastive :0.000905571854673326  loss classifier: 0.1916544884443283

Evaluate loss 0.2295128060238702

 Epoch 5 / 5


HBox(children=(FloatProgress(value=0.0, description='Train', max=1335.0, style=ProgressStyle(description_width…

STEP 0: loss contrastive :0.0008306929375976324  loss classifier: 0.15690772235393524
STEP 50: loss contrastive :0.0008291496778838336  loss classifier: 0.15071210265159607
STEP 100: loss contrastive :0.0008183603640645742  loss classifier: 0.1562301069498062
STEP 150: loss contrastive :0.0008440852398052812  loss classifier: 0.14083687961101532
STEP 200: loss contrastive :0.000838672393001616  loss classifier: 0.1721179187297821
STEP 250: loss contrastive :0.0008693318231962621  loss classifier: 0.15354199707508087
STEP 300: loss contrastive :0.0008761153439991176  loss classifier: 0.1530335247516632
STEP 350: loss contrastive :0.0007590079912915826  loss classifier: 0.12554311752319336
STEP 400: loss contrastive :0.0008974550291895866  loss classifier: 0.15950292348861694
STEP 450: loss contrastive :0.0008219852461479604  loss classifier: 0.17826594412326813
STEP 500: loss contrastive :0.0008417931385338306  loss classifier: 0.14752903580665588
STEP 550: loss contrastive :0.000821863

HBox(children=(FloatProgress(value=0.0, description='Eval', max=70.0, style=ProgressStyle(description_width='i…

loss contrastive :0.0008922904962673783  loss classifier: 0.22516782581806183
loss contrastive :0.0008801620570011437  loss classifier: 0.2342340350151062
loss contrastive :0.0009436492691747844  loss classifier: 0.2321746051311493
loss contrastive :0.0008874292252585292  loss classifier: 0.2229316383600235
loss contrastive :0.0009319459786638618  loss classifier: 0.2601303458213806
loss contrastive :0.0009146207594312727  loss classifier: 0.2369941771030426
loss contrastive :0.0009042565943673253  loss classifier: 0.2024374008178711

Evaluate loss 0.24233244167906898

CPU times: user 55min 33s, sys: 6min 10s, total: 1h 1min 44s
Wall time: 56min 15s


In [16]:
%%time
# set initial loss to infinite
import time
best_valid_loss = float('inf')
from tqdm.auto import tqdm

# empty lists to store training and validation loss of each epoch
train_losses=[]
valid_losses=[]

now=time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
#for each epoch
for epoch in tqdm(range(EPOCHS)):
     
    print('\n Epoch {:} / {:}'.format(epoch + 1, EPOCHS))
    
    #train model
    train_loss, _ = train()
    
    #evaluate model
    valid_loss, _, _ = evaluate(val_dataloader)
    
    #save the best model
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), f"./Baseline_abstract_model{now}.pt")
    
    # append training and validation loss
    train_losses.append(train_loss)
    valid_losses.append(valid_loss)
    

HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))


 Epoch 1 / 5


HBox(children=(FloatProgress(value=0.0, description='Train', max=1335.0, style=ProgressStyle(description_width…

STEP 0: loss contrastive :0.0011736995074898005  loss classifier: 0.6784937977790833
STEP 50: loss contrastive :0.0010244036093354225  loss classifier: 0.38525381684303284
STEP 100: loss contrastive :0.0010459651239216328  loss classifier: 0.34234097599983215
STEP 150: loss contrastive :0.0010059644700959325  loss classifier: 0.31425362825393677
STEP 200: loss contrastive :0.0009743745322339237  loss classifier: 0.29644834995269775
STEP 250: loss contrastive :0.000911412644200027  loss classifier: 0.3009869456291199
STEP 300: loss contrastive :0.000994911533780396  loss classifier: 0.2920014560222626
STEP 350: loss contrastive :0.0009435606189072132  loss classifier: 0.27130696177482605
STEP 550: loss contrastive :0.0009513312834315002  loss classifier: 0.2624566853046417
STEP 600: loss contrastive :0.0008490897598676383  loss classifier: 0.2606485188007355
STEP 650: loss contrastive :0.0009443267481401563  loss classifier: 0.2466069906949997
STEP 700: loss contrastive :0.0009472237434

HBox(children=(FloatProgress(value=0.0, description='Eval', max=70.0, style=ProgressStyle(description_width='i…

loss contrastive :0.00086324627045542  loss classifier: 0.22190608084201813
loss contrastive :0.0008816148620098829  loss classifier: 0.23421892523765564
loss contrastive :0.0010400454048067331  loss classifier: 0.2457604855298996
loss contrastive :0.0009191778372041881  loss classifier: 0.2457519769668579
loss contrastive :0.0009707127464935184  loss classifier: 0.25364264845848083
loss contrastive :0.0009471081430092454  loss classifier: 0.23740224540233612
loss contrastive :0.000926076085306704  loss classifier: 0.22345615923404694

Evaluate loss 0.23963707110711507

 Epoch 2 / 5


HBox(children=(FloatProgress(value=0.0, description='Train', max=1335.0, style=ProgressStyle(description_width…

STEP 0: loss contrastive :0.0009488399373367429  loss classifier: 0.23232369124889374
STEP 50: loss contrastive :0.0008958855760283768  loss classifier: 0.253220796585083
STEP 100: loss contrastive :0.0008832403691485524  loss classifier: 0.2489113211631775
STEP 150: loss contrastive :0.0009393055224791169  loss classifier: 0.23295415937900543
STEP 200: loss contrastive :0.0009999671019613743  loss classifier: 0.2376948744058609
STEP 250: loss contrastive :0.0009627904510125518  loss classifier: 0.25691667199134827
STEP 300: loss contrastive :0.0008979059057310224  loss classifier: 0.2328859567642212
STEP 350: loss contrastive :0.0008929578470997512  loss classifier: 0.22803781926631927
STEP 400: loss contrastive :0.0010320055298507214  loss classifier: 0.24067842960357666
STEP 450: loss contrastive :0.0009719012305140495  loss classifier: 0.2511763274669647
STEP 500: loss contrastive :0.0009282968239858747  loss classifier: 0.24083101749420166
STEP 550: loss contrastive :0.00087546568

HBox(children=(FloatProgress(value=0.0, description='Eval', max=70.0, style=ProgressStyle(description_width='i…

loss contrastive :0.0008599188877269626  loss classifier: 0.2006521224975586
loss contrastive :0.0008460755925625563  loss classifier: 0.2135000228881836
loss contrastive :0.000979618402197957  loss classifier: 0.22505564987659454
loss contrastive :0.0008922427077777684  loss classifier: 0.22128352522850037
loss contrastive :0.0010183851700276136  loss classifier: 0.24932970106601715
loss contrastive :0.000922065693885088  loss classifier: 0.22875399887561798
loss contrastive :0.0009104629280045629  loss classifier: 0.19750604033470154

Evaluate loss 0.22413069563252586

 Epoch 3 / 5


HBox(children=(FloatProgress(value=0.0, description='Train', max=1335.0, style=ProgressStyle(description_width…

STEP 0: loss contrastive :0.0008980465936474502  loss classifier: 0.22032979130744934
STEP 50: loss contrastive :0.0009284426923841238  loss classifier: 0.20918573439121246
STEP 100: loss contrastive :0.000885294983163476  loss classifier: 0.2093435525894165
STEP 150: loss contrastive :0.0008536387467756867  loss classifier: 0.2002076506614685
STEP 200: loss contrastive :0.0008916630176827312  loss classifier: 0.22726105153560638
STEP 250: loss contrastive :0.0008259156020358205  loss classifier: 0.1979772448539734
STEP 300: loss contrastive :0.0009368800092488527  loss classifier: 0.2023366093635559
STEP 350: loss contrastive :0.0009978469461202621  loss classifier: 0.22486214339733124
STEP 400: loss contrastive :0.0009512822143733501  loss classifier: 0.21411678194999695
STEP 450: loss contrastive :0.0009087007492780685  loss classifier: 0.20623838901519775
STEP 500: loss contrastive :0.0009110539685934782  loss classifier: 0.19071035087108612
STEP 550: loss contrastive :0.0010221309

HBox(children=(FloatProgress(value=0.0, description='Eval', max=70.0, style=ProgressStyle(description_width='i…

loss contrastive :0.0008725500083528459  loss classifier: 0.20578263700008392
loss contrastive :0.000910047092474997  loss classifier: 0.21890391409397125
loss contrastive :0.0009645494865253568  loss classifier: 0.2212047576904297
loss contrastive :0.0009276264463551342  loss classifier: 0.2225142866373062
loss contrastive :0.0010248979087918997  loss classifier: 0.24798201024532318
loss contrastive :0.0009418590925633907  loss classifier: 0.21279238164424896
loss contrastive :0.0009347681188955903  loss classifier: 0.18822413682937622

Evaluate loss 0.22210357636213302

 Epoch 4 / 5


HBox(children=(FloatProgress(value=0.0, description='Train', max=1335.0, style=ProgressStyle(description_width…

STEP 0: loss contrastive :0.000942211365327239  loss classifier: 0.18686386942863464
STEP 50: loss contrastive :0.0008771411376073956  loss classifier: 0.168535053730011
STEP 100: loss contrastive :0.0009232746670022607  loss classifier: 0.17536720633506775
STEP 150: loss contrastive :0.0009367841412313282  loss classifier: 0.15939436852931976
STEP 200: loss contrastive :0.0008358241757377982  loss classifier: 0.20461532473564148
STEP 250: loss contrastive :0.0008678673766553402  loss classifier: 0.176971897482872
STEP 300: loss contrastive :0.000818393484223634  loss classifier: 0.15583422780036926
STEP 350: loss contrastive :0.0008111900533549488  loss classifier: 0.176048144698143
STEP 400: loss contrastive :0.0007947029080241919  loss classifier: 0.17967219650745392
STEP 450: loss contrastive :0.0008485829457640648  loss classifier: 0.18107275664806366
STEP 500: loss contrastive :0.000850270502269268  loss classifier: 0.17511844635009766
STEP 550: loss contrastive :0.00081645464524

HBox(children=(FloatProgress(value=0.0, description='Eval', max=70.0, style=ProgressStyle(description_width='i…

loss contrastive :0.0008937601232901216  loss classifier: 0.2024717777967453
loss contrastive :0.0009234985336661339  loss classifier: 0.22096052765846252
loss contrastive :0.0009677024208940566  loss classifier: 0.22529539465904236
loss contrastive :0.0009381117415614426  loss classifier: 0.21840733289718628
loss contrastive :0.001024402561597526  loss classifier: 0.25764214992523193
loss contrastive :0.000994328293018043  loss classifier: 0.2228819578886032
loss contrastive :0.000939505931455642  loss classifier: 0.1843181699514389

Evaluate loss 0.22800866003547396

 Epoch 5 / 5


HBox(children=(FloatProgress(value=0.0, description='Train', max=1335.0, style=ProgressStyle(description_width…

STEP 0: loss contrastive :0.0007900340715423226  loss classifier: 0.15494336187839508
STEP 50: loss contrastive :0.0008160884608514607  loss classifier: 0.15528662502765656
STEP 100: loss contrastive :0.0008477926021441817  loss classifier: 0.16335934400558472
STEP 150: loss contrastive :0.0008415866177529097  loss classifier: 0.14178107678890228
STEP 200: loss contrastive :0.0008564976742491126  loss classifier: 0.18279756605625153
STEP 250: loss contrastive :0.0008687674999237061  loss classifier: 0.1595715433359146
STEP 300: loss contrastive :0.0009081882890313864  loss classifier: 0.16121934354305267
STEP 350: loss contrastive :0.0006919500301592052  loss classifier: 0.13182401657104492
STEP 400: loss contrastive :0.0008912598132155836  loss classifier: 0.15460121631622314
STEP 450: loss contrastive :0.0007779154693707824  loss classifier: 0.1831590086221695
STEP 500: loss contrastive :0.0008356108446605504  loss classifier: 0.16439509391784668
STEP 550: loss contrastive :0.0008153

HBox(children=(FloatProgress(value=0.0, description='Eval', max=70.0, style=ProgressStyle(description_width='i…

loss contrastive :0.0009505008347332478  loss classifier: 0.21422308683395386
loss contrastive :0.000962299294769764  loss classifier: 0.22596541047096252
loss contrastive :0.0009981528855860233  loss classifier: 0.2330978512763977
loss contrastive :0.000995664857327938  loss classifier: 0.22896067798137665
loss contrastive :0.00106520252302289  loss classifier: 0.26746976375579834
loss contrastive :0.0010346363997086883  loss classifier: 0.23680955171585083
loss contrastive :0.0009839585982263088  loss classifier: 0.19763624668121338

Evaluate loss 0.24067499275718415

CPU times: user 1h 10min 51s, sys: 17min 12s, total: 1h 28min 4s
Wall time: 1h 22min 35s


In [23]:
import numpy as np
import pandas as pd
from sklearn.metrics import (accuracy_score, precision_score, recall_score, f1_score, roc_auc_score)


true = np.array(total_labels)
pred = np.array(total_preds>0.5)

dic = {
    "Accuracy" : accuracy_score(true,pred),
    "Precision-micro" : precision_score(true,pred,average='micro'),
    "Precision-macro" : precision_score(true,pred,average='macro'),
    "recall-micro" : recall_score(true,pred,average='micro'),
    "recall-macro" : recall_score(true,pred,average='macro'),
    "f1_micro" : f1_score(true,pred,average='micro'),
    "f1-macro" : f1_score(true,pred,average='macro')
}

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


In [24]:
dic

{'Accuracy': 0.3918269230769231,
 'Precision-micro': 0.797971259509721,
 'Precision-macro': 0.41194663692126593,
 'recall-micro': 0.5137135393992164,
 'recall-macro': 0.2568275311949758,
 'f1_micro': 0.6250413825067869,
 'f1-macro': 0.2996674369295113}

## batch_size = 8时的模型测试

In [None]:
model.load_state_dict(torch.load("./model/Classfication_Baseline_claims_model2022-02-22 01:57:06.pt"))

In [25]:
test_df = pd.read_feather("2021-sample-50000.feather")

In [26]:
test_dataset = PatentDataset(
  test_df,
  tokenizer,
  max_token_len=128
)
test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=False,drop_last=True)

avg_loss, total_preds, total_labels = evaluate(test_dataloader)


Evaluating...


Eval:   0%|          | 0/781 [00:00<?, ?it/s]

loss contrastive :0.0005801975494250655  loss classifier: 0.0064818039536476135
loss contrastive :0.0005945584853179753  loss classifier: 0.00567277567461133
Evaluate loss 0.005462370702589024


In [52]:
%time output = model.bert(input_ids=input_ids,attention_mask=attention_mask)

CPU times: user 19.8 ms, sys: 68 µs, total: 19.9 ms
Wall time: 17.5 ms


In [29]:
query = test_df['publication_title'][0].lower()

In [31]:
query = tokenizer.encode_plus(
                query,        
                max_length=128,
                padding="max_length",
                truncation=True,
                add_special_tokens=True,
                return_tensors='pt'
            )
input_ids = query['input_ids'].to(device)
attention_mask=query['attention_mask'].to(device)