In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Import require Python libraries

In [2]:
import tensorflow
import torch
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
import torch.nn as nn
import torch.nn.functional as F
import torch.autograd as autograd
from tqdm import tqdm, trange
import pandas as pd
import numpy as np
import io
import os
import matplotlib.pyplot as plt
from keras.preprocessing.sequence import pad_sequences
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, classification_report, confusion_matrix
import matplotlib
import matplotlib.pyplot as plt

In [3]:
## Set seed of randomization and working device
manual_seed = 77
torch.manual_seed(manual_seed)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
n_gpu = torch.cuda.device_count()
if n_gpu > 0:
    torch.cuda.manual_seed(manual_seed)

print(torch.cuda.get_device_name(0))

cuda
Tesla T4


In [4]:
! pip install transformers

Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/81/91/61d69d58a1af1bd81d9ca9d62c90a6de3ab80d77f27c5df65d9a2c1f5626/transformers-4.5.0-py3-none-any.whl (2.1MB)
[K     |████████████████████████████████| 2.2MB 11.2MB/s 
Collecting sacremoses
[?25l  Downloading https://files.pythonhosted.org/packages/08/cd/342e584ee544d044fb573ae697404ce22ede086c9e87ce5960772084cad0/sacremoses-0.0.44.tar.gz (862kB)
[K     |████████████████████████████████| 870kB 53.7MB/s 
Collecting tokenizers<0.11,>=0.10.1
[?25l  Downloading https://files.pythonhosted.org/packages/ae/04/5b870f26a858552025a62f1649c20d29d2672c02ff3c3fb4c688ca46467a/tokenizers-0.10.2-cp37-cp37m-manylinux2010_x86_64.whl (3.3MB)
[K     |████████████████████████████████| 3.3MB 50.9MB/s 
Building wheels for collected packages: sacremoses
  Building wheel for sacremoses (setup.py) ... [?25l[?25hdone
  Created wheel for sacremoses: filename=sacremoses-0.0.44-cp37-none-any.whl size=886084 sha256=0780f0eb1c

In [5]:
!pip3 install emoji

Collecting emoji
[?25l  Downloading https://files.pythonhosted.org/packages/24/fa/b3368f41b95a286f8d300e323449ab4e86b85334c2e0b477e94422b8ed0f/emoji-1.2.0-py3-none-any.whl (131kB)
[K     |██▌                             | 10kB 23.8MB/s eta 0:00:01[K     |█████                           | 20kB 30.6MB/s eta 0:00:01[K     |███████▌                        | 30kB 24.0MB/s eta 0:00:01[K     |██████████                      | 40kB 27.9MB/s eta 0:00:01[K     |████████████▌                   | 51kB 26.9MB/s eta 0:00:01[K     |███████████████                 | 61kB 29.6MB/s eta 0:00:01[K     |█████████████████▌              | 71kB 19.1MB/s eta 0:00:01[K     |████████████████████            | 81kB 20.2MB/s eta 0:00:01[K     |██████████████████████▌         | 92kB 19.4MB/s eta 0:00:01[K     |█████████████████████████       | 102kB 19.4MB/s eta 0:00:01[K     |███████████████████████████▌    | 112kB 19.4MB/s eta 0:00:01[K     |██████████████████████████████  | 122kB 19.4MB/s

In [7]:
!pip install sentencepiece

Collecting sentencepiece
[?25l  Downloading https://files.pythonhosted.org/packages/f5/99/e0808cb947ba10f575839c43e8fafc9cc44e4a7a2c8f79c60db48220a577/sentencepiece-0.1.95-cp37-cp37m-manylinux2014_x86_64.whl (1.2MB)
[K     |▎                               | 10kB 22.3MB/s eta 0:00:01[K     |▌                               | 20kB 27.0MB/s eta 0:00:01[K     |▉                               | 30kB 27.2MB/s eta 0:00:01[K     |█                               | 40kB 22.8MB/s eta 0:00:01[K     |█▍                              | 51kB 19.6MB/s eta 0:00:01[K     |█▋                              | 61kB 15.7MB/s eta 0:00:01[K     |██                              | 71kB 13.7MB/s eta 0:00:01[K     |██▏                             | 81kB 13.7MB/s eta 0:00:01[K     |██▌                             | 92kB 14.0MB/s eta 0:00:01[K     |██▊                             | 102kB 14.8MB/s eta 0:00:01[K     |███                             | 112kB 14.8MB/s eta 0:00:01[K     |███▎        

In [8]:
from transformers import *

In [9]:
from transformers import AutoModel, AutoTokenizer 
bert_model = AutoModel.from_pretrained("vinai/bertweet-base", output_hidden_states=True, output_attentions=True).to(device)

# For transformers v4.x+: 
tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base", use_fast=False)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=558.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=542529064.0, style=ProgressStyle(descri…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=843438.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1078931.0, style=ProgressStyle(descript…




Special tokens have been added in the vocabulary, make sure the associated word embedding are fine-tuned or trained.


In [11]:
# define a function for data preparation
def data_prepare(file_path, lab2ind, tokenizer, max_len = 100, mode = 'train'):
    '''
    file_path: the path to input file. 
                In train mode, the input must be a tsv file that includes two columns where the first is text, and second column is label.
                The first row must be header of columns.

                In predict mode, the input must be a tsv file that includes only one column where the first is text.
                The first row must be header of column.

    lab2ind: dictionary of label classes
    tokenizer: BERT tokenizer
    max_len: maximal length of input sequence
    mode: train or predict
    '''
    # if we are in train mode, we will load two columns (i.e., text and label).
    if mode == 'train':
        # Use pandas to load dataset
        df = pd.read_csv(file_path, delimiter='\t',header=0, names=['tokenID','label','content'], error_bad_lines=False)
        print("Data size ", df.shape)
        labels = df.label.values
        
        # Create sentence and label lists
        labels = [lab2ind[i] for i in labels] 
        print("Label is ", labels[0])
        
        # Convert data into torch tensors
        labels = torch.tensor(labels)

    # if we are in predict mode, we will load one column (i.e., text).
    elif mode == 'predict':
        df = pd.read_csv(file_path, delimiter='\t',header=0, names=['tokenID', 'content'], error_bad_lines=False)
        print("Data size ", df.shape)
        # create placeholder
        labels = []
    else:
        print("the type of mode should be either 'train' or 'predict'. ")
        return
        
    # Create sentence and label lists
    content = df.content.values

    #### REF START ####

    # We need to add a special token at the beginning for BERT to work properly.
    content = ["[CLS] " + text for text in content]

    # Import the BERT tokenizer, used to convert our text into tokens that correspond to BERT's vocabulary.
    tokenized_texts = [tokenizer.tokenize(text) for text in content]
    
    # if the sequence is longer the maximal length, we truncate it to the pre-defined maximal length
    tokenized_texts = [ text[:max_len+1] for text in tokenized_texts]

    # We also need to add a special token at the end.
    tokenized_texts = [ text+['[SEP]'] for text in tokenized_texts]
    print ("Tokenize the first sentence:\n",tokenized_texts[0])
    
    # Use the BERT tokenizer to convert the tokens to their index numbers in the BERT vocabulary
    input_ids = [tokenizer.convert_tokens_to_ids(x) for x in tokenized_texts]
    print ("Index numbers of the first sentence:\n",input_ids[0])

    # Pad our input seqeunce to the fixed length (i.e., max_len) with index of [PAD] token
    pad_ind = tokenizer.convert_tokens_to_ids(['[PAD]'])[0]
    input_ids = pad_sequences(input_ids, maxlen=max_len+2, dtype="long", truncating="post", padding="post", value=pad_ind)
    print ("Index numbers of the first sentence after padding:\n",input_ids[0])

    # Create attention masks
    attention_masks = []

    # Create a mask of 1s for each token followed by 0s for pad tokens
    for seq in input_ids:
        seq_mask = [float(i>0) for i in seq]
        attention_masks.append(seq_mask)

    # Convert all of our data into torch tensors, the required datatype for our model
    inputs = torch.tensor(input_ids)
    masks = torch.tensor(attention_masks)
    #### REF END ####

    return inputs, labels, masks

In [None]:
with open("/content/drive/MyDrive/Colab Notebooks/COLX_585_Group/data/train.txt", "r") as f:
  for line in f:
    line_lst = line.strip().split('\t')
    if len(line_lst) != 3:
      print(line_lst)

['626437371999977472', 'neutral', '...Yakub may well deserve the finding of guilt. The courts may well have rendered the correct judgments in the... http://t.co/qZguV92V85', 'Wed Jul 29 12:01:22 +0000 2015']
['629712146016935936', 'neutral', "Testing Motorola's Moto G 3rd-gen mobe: Is it still king of the hill? Jockeying for position in the mid-range market http://t.co/te61nGCpz1", 'Fri Aug 07 11:54:09 +0000 2015']
['636047266248265728', 'negative', '"Scott Walker, who thinks ISIL compares to American labor unions, thinks he knows how to handle China. On Monday,... http://t.co/MXT33GnBu0"', 'Mon Aug 24 20:27:40 +0000 2015']
['636112894086782976', 'neutral', '@lindaikeji: Super Eagles coach Sunday Oliseh says Mikel Obi refused to pick his call in London: Chelsea midfielder Mikel. Is it a toboo?', 'Tue Aug 25 09:48:27 +0000 2015']
['637879757124669440', 'neutral', '@Roman_Empire_76:   BREAKING  NEWS Rock is return on Monday Night raw with help @WWERomanReigns  and Dean Ambrose NOW COMPIL

In [12]:
lab2ind = {'positive': 2, 'negative': 1, 'neutral':0}
train_inputs, train_labels, train_masks = data_prepare("/content/drive/MyDrive/Colab Notebooks/COLX_585_Group/data/train_clean.txt", lab2ind,tokenizer)
validation_inputs, validation_labels, validation_masks = data_prepare("/content/drive/MyDrive/Colab Notebooks/COLX_585_Group/data/dev.txt", lab2ind,tokenizer)

b'Skipping line 23096: expected 3 fields, saw 4\nSkipping line 23097: expected 3 fields, saw 4\nSkipping line 23098: expected 3 fields, saw 4\nSkipping line 23099: expected 3 fields, saw 4\nSkipping line 23100: expected 3 fields, saw 4\nSkipping line 23101: expected 3 fields, saw 4\nSkipping line 23102: expected 3 fields, saw 4\nSkipping line 23103: expected 3 fields, saw 4\nSkipping line 23104: expected 3 fields, saw 4\nSkipping line 23105: expected 3 fields, saw 4\nSkipping line 23106: expected 3 fields, saw 4\nSkipping line 23107: expected 3 fields, saw 4\nSkipping line 23108: expected 3 fields, saw 4\nSkipping line 23109: expected 3 fields, saw 4\nSkipping line 23110: expected 3 fields, saw 4\nSkipping line 23111: expected 3 fields, saw 4\nSkipping line 23112: expected 3 fields, saw 4\nSkipping line 23113: expected 3 fields, saw 4\nSkipping line 23114: expected 3 fields, saw 4\nSkipping line 23115: expected 3 fields, saw 4\nSkipping line 23116: expected 3 fields, saw 4\nSkipping li

Data size  (23094, 3)
Label is  1
Tokenize the first sentence:
 ['[@@', 'CL@@', 'S@@', ']', 'Theo', 'Walcott', 'is', 'still', 'shit@@', '\\@@', 'u@@', '00@@', '2@@', 'c', 'watch', 'Rafa', 'and', 'Johnny', 'deal', 'with', 'him', 'on', 'Satur@@', 'day@@', '.', '[SEP]']
Index numbers of the first sentence:
 [61658, 6411, 381, 317, 25704, 38416, 17, 135, 6475, 60570, 713, 3032, 674, 591, 265, 26261, 13, 6087, 810, 30, 124, 24, 44219, 4069, 4, 3]
Index numbers of the first sentence after padding:
 [61658  6411   381   317 25704 38416    17   135  6475 60570   713  3032
   674   591   265 26261    13  6087   810    30   124    24 44219  4069
     4     3     3     3     3     3     3     3     3     3     3     3
     3     3     3     3     3     3     3     3     3     3     3     3
     3     3     3     3     3     3     3     3     3     3     3     3
     3     3     3     3     3     3     3     3     3     3     3     3
     3     3     3     3     3     3     3     3     3     3    

In [13]:
train_inputs.shape

torch.Size([23094, 102])

In [14]:
batch_size = 32
# We'll take training samples in random order in each epoch. 
train_data = TensorDataset(train_inputs, train_masks, train_labels)
train_dataloader = DataLoader(train_data, 
                              sampler = RandomSampler(train_data), # Select batches randomly
                              batch_size=batch_size)

# We'll just read validation set sequentially.
validation_data = TensorDataset(validation_inputs, validation_masks, validation_labels)
validation_dataloader = DataLoader(validation_data, 
                                   sampler = SequentialSampler(validation_data), # Pull out batches sequentially.
                                   batch_size=batch_size)

In [15]:
dataiter = iter(train_dataloader)
batch = dataiter.next()
# Add batch to GPU
batch = tuple(t.to(device) for t in batch)
# Unpack the inputs from our dataloader
input_ids, input_mask, labels = batch

In [16]:
outputs = bert_model(input_ids, attention_mask = input_mask)

In [17]:
print(outputs.keys())

odict_keys(['last_hidden_state', 'pooler_output', 'hidden_states', 'attentions'])


In [18]:
last_hidden_state = outputs["last_hidden_state"]
pooler_output = outputs["pooler_output"]
hidden_states = outputs["hidden_states"]
attentions = outputs["attentions"]

In [19]:
last_hidden_state.shape
pooler_output.shape
print(len(hidden_states))

13


In [20]:
for i, item in enumerate(hidden_states):
  print("layer " + str(i), item.shape) # [batch size, sequence length, hidden size]

layer 0 torch.Size([32, 102, 768])
layer 1 torch.Size([32, 102, 768])
layer 2 torch.Size([32, 102, 768])
layer 3 torch.Size([32, 102, 768])
layer 4 torch.Size([32, 102, 768])
layer 5 torch.Size([32, 102, 768])
layer 6 torch.Size([32, 102, 768])
layer 7 torch.Size([32, 102, 768])
layer 8 torch.Size([32, 102, 768])
layer 9 torch.Size([32, 102, 768])
layer 10 torch.Size([32, 102, 768])
layer 11 torch.Size([32, 102, 768])
layer 12 torch.Size([32, 102, 768])


In [21]:
print(len(attentions))

12


In [22]:
for i, item in enumerate(attentions):
  print("layer " + str(i), item.shape) # [batch size, num_heads, sequence length, sequence_length]

layer 0 torch.Size([32, 12, 102, 102])
layer 1 torch.Size([32, 12, 102, 102])
layer 2 torch.Size([32, 12, 102, 102])
layer 3 torch.Size([32, 12, 102, 102])
layer 4 torch.Size([32, 12, 102, 102])
layer 5 torch.Size([32, 12, 102, 102])
layer 6 torch.Size([32, 12, 102, 102])
layer 7 torch.Size([32, 12, 102, 102])
layer 8 torch.Size([32, 12, 102, 102])
layer 9 torch.Size([32, 12, 102, 102])
layer 10 torch.Size([32, 12, 102, 102])
layer 11 torch.Size([32, 12, 102, 102])


In [23]:
dense = nn.Linear(768, 768).to(device)
dropout = nn.Dropout(0.1).to(device)
fc = nn.Linear(768, 3).to(device)
dense_output = dense(pooler_output)
drop_output = dropout(dense_output)
fc_output = fc(drop_output)

In [24]:
criterion = nn.CrossEntropyLoss()
criterion(fc_output, labels)

tensor(1.0928, device='cuda:0', grad_fn=<NllLossBackward>)

In [25]:
class Bert_cls(nn.Module):

    def __init__(self, lab2ind, model_path, hidden_size):
        super(Bert_cls, self).__init__()
        self.model_path = model_path
        self.hidden_size = hidden_size
        self.bert_model = AutoModel.from_pretrained(model_path, output_hidden_states=True, output_attentions=True)
        
        self.label_num = len(lab2ind)
        
        self.dense = nn.Linear(self.hidden_size, self.hidden_size)
        self.dropout = nn.Dropout(0.1)
        self.fc = nn.Linear(self.hidden_size, self.label_num)

    def forward(self, bert_ids, bert_mask):
        outputs = self.bert_model(input_ids=bert_ids, attention_mask = bert_mask)
        pooler_output = outputs['pooler_output']
        attentions = outputs['attentions']
        
        x = self.dense(pooler_output)
        x = torch.tanh(x)
        x = self.dropout(x)
        fc_output = self.fc(x)

        return fc_output, attentions


In [26]:
bert_model = Bert_cls(lab2ind, "vinai/bertweet-base", 768).to(device)

In [None]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(bert_model):,} trainable parameters')

The model has 135,492,867 trainable parameters


In [None]:
# Parameters:
lr = 2e-5
max_grad_norm = 1.0
epochs = 3
warmup_proportion = 0.1
num_training_steps  = len(train_dataloader) * epochs
num_warmup_steps = num_training_steps * warmup_proportion

### In Transformers, optimizer and schedules are instantiated like this:
# Note: AdamW is a class from the huggingface library
# the 'W' stands for 'Weight Decay"
optimizer = AdamW(bert_model.parameters(), lr=lr, correct_bias=False)
# schedules
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=num_warmup_steps, num_training_steps=num_training_steps)  # PyTorch scheduler

# We use nn.CrossEntropyLoss() as our loss function. 
criterion = nn.CrossEntropyLoss()

In [None]:
def train(model, iterator, optimizer, scheduler, criterion):
    
    model.train()
    epoch_loss = 0
    
    for i, batch in enumerate(iterator):
        
        # Add batch to GPU
        batch = tuple(t.to(device) for t in batch)
        # Unpack the inputs from our dataloader
        input_ids, input_mask, labels = batch

        outputs,_ = model(input_ids, input_mask)

        loss = criterion(outputs, labels)
        # delete used variables to free GPU memory
        del batch, input_ids, input_mask, labels
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)  # Gradient clipping is not in AdamW anymore
        optimizer.step()
        scheduler.step()
        epoch_loss += loss.cpu().item()
        optimizer.zero_grad()
    
    # free GPU memory
    if device == 'cuda':
        torch.cuda.empty_cache()

    return epoch_loss / len(iterator)

In [None]:
def evaluate(model, iterator, criterion):
    
    model.eval()
    
    epoch_loss = 0
    all_pred=[]
    all_label = []
    
    with torch.no_grad():
    
        for i, batch in enumerate(iterator):

            # Add batch to GPU
            batch = tuple(t.to(device) for t in batch)
            # Unpack the inputs from our dataloader
            input_ids, input_mask, labels = batch

            outputs,_ = model(input_ids, input_mask)
            
            loss = criterion(outputs, labels)

            # delete used variables to free GPU memory
            del batch, input_ids, input_mask
            epoch_loss += loss.cpu().item()

            # identify the predicted class for each example in the batch
            probabilities, predicted = torch.max(outputs.cpu().data, 1)
            # put all the true labels and predictions to two lists
            all_pred.extend(predicted)
            all_label.extend(labels.cpu())
    
    accuracy = accuracy_score(all_label, all_pred)
    f1score = f1_score(all_label, all_pred, average='macro') 
    return epoch_loss / len(iterator), accuracy, f1score

In [None]:
import os
save_path = '/content/drive/MyDrive/Colab Notebooks/COLX_585_Group/ckpt_BERT'
if os.path.exists(save_path) == False:
    os.makedirs(save_path)

In [None]:
loss_list = []
acc_list = []

for epoch in trange(epochs, desc="Epoch"):
    train_loss = train(bert_model, train_dataloader, optimizer, scheduler, criterion)  
    val_loss, val_acc, val_f1 = evaluate(bert_model, validation_dataloader, criterion)

    # Create checkpoint at end of each epoch
    state = {
        'epoch': epoch,
        'state_dict': bert_model.state_dict(),
        'optimizer': optimizer.state_dict(),
        'scheduler': scheduler.state_dict()
        }

    torch.save(state, "/content/drive/MyDrive/Colab Notebooks/COLX_585_Group/ckpt_BERT/BERT_"+str(epoch+1)+".pt")

    print('\n Epoch [{}/{}], Train Loss: {:.4f}, Validation Loss: {:.4f}, Validation Accuracy: {:.4f}, Validation F1: {:.4f}'.format(epoch+1, epochs, train_loss, val_loss, val_acc, val_f1))
    


Epoch:   0%|          | 0/3 [00:00<?, ?it/s][A
Epoch:  33%|███▎      | 1/3 [08:23<16:46, 503.50s/it][A


 Epoch [1/3], Train Loss: 0.6091, Validation Loss: 0.9030, Validation Accuracy: 0.5817, Validation F1: 0.5762



Epoch:  67%|██████▋   | 2/3 [16:51<08:24, 504.85s/it][A


 Epoch [2/3], Train Loss: 0.4154, Validation Loss: 0.9458, Validation Accuracy: 0.6132, Validation F1: 0.6128



Epoch: 100%|██████████| 3/3 [25:20<00:00, 506.83s/it]


 Epoch [3/3], Train Loss: 0.3050, Validation Loss: 1.0137, Validation Accuracy: 0.6173, Validation F1: 0.6182





In [None]:
tweet = "@CTVNews Look how they hold Canadian citizens over there after we arrested one of their people for charges by USA. Corrupt politicians"
# bert_model(tweet)

In [None]:
def sententce_prepocess(content, tokenizer):
    """
    content: list of string. Each string is a sample. We only include one sample in this list.
    tokenizer: BertTokenizerFast
    """
    content = ["[CLS] " + text for text in content]

    # Import the BERT tokenizer, used to convert our text into tokens that correspond to BERT's vocabulary.
    tokenized_texts = [tokenizer.tokenize(text) for text in content]

    # We also need to add a special token at the end.
    tokenized_texts = [ text+['[SEP]'] for text in tokenized_texts]
    print ("Tokenize the first sentence:\n",tokenized_texts[0])

    # Use the BERT tokenizer to convert the tokens to their index numbers in the BERT vocabulary
    input_ids = [tokenizer.convert_tokens_to_ids(x) for x in tokenized_texts]
    print ("Index numbers of the first sentence:\n",input_ids[0])

    # Pad our input seqeunce to the fixed length (i.e., max_len) with index of [PAD] token
    pad_ind = tokenizer.convert_tokens_to_ids(['[PAD]'])[0]

    # Create attention masks
    attention_masks = []

    # Create a mask of 1s for each token followed by 0s for pad tokens
    for seq in input_ids:
        seq_mask = [float(i>0) for i in seq]
        attention_masks.append(seq_mask)

    # Convert all of our data into torch tensors, the required datatype for our model
    inputs = torch.tensor(input_ids)
    masks = torch.tensor(attention_masks)

    return tokenized_texts, inputs, masks

In [None]:

tokenized_texts, input_ids, masks = sententce_prepocess([tweet], tokenizer)

Tokenize the first sentence:
 ['[@@', 'CL@@', 'S@@', ']', '@@@', 'C@@', 'TV@@', 'News', 'Look', 'how', 'they', 'hold', 'Canadian', 'citizens', 'over', 'there', 'after', 'we', 'arrested', 'one', 'of', 'their', 'people', 'for', 'charges', 'by', 'US@@', 'A.', 'Corrupt', 'politicians', '[SEP]']
Index numbers of the first sentence:
 [61658, 6411, 381, 317, 5238, 472, 8064, 619, 1170, 84, 59, 955, 3619, 4621, 141, 99, 177, 54, 3820, 63, 15, 130, 83, 19, 5801, 61, 2479, 12666, 33175, 5837, 3]


In [None]:
input_ids, masks = input_ids.to(device), masks.to(device)

In [None]:
print(input_ids.shape)
print(masks.shape)    # [btach size, sequence length]

torch.Size([1, 31])
torch.Size([1, 31])


In [None]:
outputs,attention_weights = bert_model(input_ids, masks)

In [None]:
lab2ind = {'negative': 0, 'neutral': 1, 'positive': 2}
ind2lab =  {0 :'negative', 1: 'neutral', 2: 'positive'}

In [None]:
print(outputs)
probabilities, predicted = torch.max(outputs.cpu().data, 1)
print("the prediction is: ", ind2lab[predicted[0].item()])

tensor([[-0.0911,  3.6501, -3.1597]], device='cuda:0', grad_fn=<AddmmBackward>)
the prediction is:  neutral


In [None]:
loss_list = []
acc_list = []
epochs = 10
for epoch in trange(epochs, desc="Epoch"):
    train_loss = train(bert_model, train_dataloader, optimizer, scheduler, criterion)  
    val_loss, val_acc, val_f1 = evaluate(bert_model, validation_dataloader, criterion)

    # Create checkpoint at end of each epoch
    state = {
        'epoch': epoch,
        'state_dict': bert_model.state_dict(),
        'optimizer': optimizer.state_dict(),
        'scheduler': scheduler.state_dict()
        }

    torch.save(state, "/content/drive/MyDrive/Colab Notebooks/COLX_585_Group/ckpt_BERT/BERT_"+str(epoch+1)+".pt")

    print('\n Epoch [{}/{}], Train Loss: {:.4f}, Validation Loss: {:.4f}, Validation Accuracy: {:.4f}, Validation F1: {:.4f}'.format(epoch+1, epochs, train_loss, val_loss, val_acc, val_f1))
    


Epoch:   0%|          | 0/10 [00:00<?, ?it/s][A
Epoch:  10%|█         | 1/10 [08:17<1:14:41, 498.00s/it][A


 Epoch [1/10], Train Loss: 0.2754, Validation Loss: 1.0137, Validation Accuracy: 0.6173, Validation F1: 0.6182



Epoch:  20%|██        | 2/10 [16:42<1:06:40, 500.05s/it][A


 Epoch [2/10], Train Loss: 0.2767, Validation Loss: 1.0137, Validation Accuracy: 0.6173, Validation F1: 0.6182



Epoch:  30%|███       | 3/10 [25:11<58:39, 502.75s/it]  [A


 Epoch [3/10], Train Loss: 0.2756, Validation Loss: 1.0137, Validation Accuracy: 0.6173, Validation F1: 0.6182



Epoch:  40%|████      | 4/10 [33:36<50:19, 503.25s/it][A


 Epoch [4/10], Train Loss: 0.2784, Validation Loss: 1.0137, Validation Accuracy: 0.6173, Validation F1: 0.6182



Epoch:  50%|█████     | 5/10 [42:04<42:03, 504.76s/it][A


 Epoch [5/10], Train Loss: 0.2772, Validation Loss: 1.0137, Validation Accuracy: 0.6173, Validation F1: 0.6182



Epoch:  60%|██████    | 6/10 [50:30<33:40, 505.03s/it][A


 Epoch [6/10], Train Loss: 0.2780, Validation Loss: 1.0137, Validation Accuracy: 0.6173, Validation F1: 0.6182



Epoch:  70%|███████   | 7/10 [58:56<25:15, 505.33s/it][A


 Epoch [7/10], Train Loss: 0.2767, Validation Loss: 1.0137, Validation Accuracy: 0.6173, Validation F1: 0.6182



Epoch:  80%|████████  | 8/10 [1:07:20<16:50, 505.07s/it][A


 Epoch [8/10], Train Loss: 0.2765, Validation Loss: 1.0137, Validation Accuracy: 0.6173, Validation F1: 0.6182



Epoch:  90%|█████████ | 9/10 [1:15:44<08:24, 504.80s/it][A


 Epoch [9/10], Train Loss: 0.2781, Validation Loss: 1.0137, Validation Accuracy: 0.6173, Validation F1: 0.6182



Epoch: 100%|██████████| 10/10 [1:24:13<00:00, 505.38s/it]


 Epoch [10/10], Train Loss: 0.2760, Validation Loss: 1.0137, Validation Accuracy: 0.6173, Validation F1: 0.6182





In [27]:
bert_model.load_state_dict(torch.load('/content/drive/MyDrive/Colab Notebooks/COLX_585_Group/ckpt_BERT/BERT_3.pt')['state_dict'])
bert_model = bert_model.to(device)

(1.0136540762839779, 0.6173027989821883, 0.6182249411423162)

In [34]:
def evaluate(model, iterator, criterion):
    
    model.eval()
    
    epoch_loss = 0
    all_pred=[]
    all_label = []
    
    with torch.no_grad():
    
        for i, batch in enumerate(iterator):

            # Add batch to GPU
            batch = tuple(t.to(device) for t in batch)
            # Unpack the inputs from our dataloader
            input_ids, input_mask, labels = batch

            outputs,_ = model(input_ids, input_mask)
            
            loss = criterion(outputs, labels)

            # delete used variables to free GPU memory
            del batch, input_ids, input_mask
            epoch_loss += loss.cpu().item()

            # identify the predicted class for each example in the batch
            probabilities, predicted = torch.max(outputs.cpu().data, 1)
            # put all the true labels and predictions to two lists
            all_pred.extend(predicted)
            all_label.extend(labels.cpu())
    
    accuracy = accuracy_score(all_label, all_pred)
    f1score = f1_score(all_label, all_pred, average='macro')
    recall = recall_score(all_label, all_pred, average='macro' )
    return epoch_loss / len(iterator), accuracy, f1score, recall

In [35]:
evaluate(bert_model, validation_dataloader, criterion)

(1.0136540762839779,
 0.6173027989821883,
 0.6182249411423162,
 0.6275877226229062)