<a href="https://colab.research.google.com/github/jakartaresearch/quora-question-pairs/blob/master/R6_BERT_Inference.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# memory footprint support libraries/code
!ln -sf /opt/bin/nvidia-smi /usr/bin/nvidia-smi
!pip install gputil --q
!pip install psutil --q
!pip install humanize --q
import psutil
import humanize
import os
import GPUtil as GPU
GPUs = GPU.getGPUs()
# XXX: only one GPU on Colab and isn’t guaranteed
gpu = GPUs[0]
def printm():
 process = psutil.Process(os.getpid())
 print("Gen RAM Free: " + humanize.naturalsize( psutil.virtual_memory().available ), " | Proc size: " + humanize.naturalsize( process.memory_info().rss))
 print("GPU RAM Free: {0:.0f}MB | Used: {1:.0f}MB | Util {2:3.0f}% | Total {3:.0f}MB".format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))
printm()

  Building wheel for gputil (setup.py) ... [?25l[?25hdone
Gen RAM Free: 12.7 GB  | Proc size: 158.4 MB
GPU RAM Free: 15079MB | Used: 0MB | Util   0% | Total 15079MB


# Setup

In [0]:
# Mount Google Drive
from google.colab import drive # import drive from google colab

ROOT = "/content/drive/"     # default location for the drive
drive.mount(ROOT)           # we mount the google drive at /content/drive

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive/


# Using Colab GPU for training

In [0]:
import torch

if torch.cuda.is_available():       
    device = torch.device("cuda")
    print('There are %d GPU(s) available.' % torch.cuda.device_count())
    print('We will use the GPU:', torch.cuda.get_device_name(0))

else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

There are 1 GPU(s) available.
We will use the GPU: Tesla T4


# Install Huggingface-Transformers library

In [0]:
!pip install transformers --q

[K     |████████████████████████████████| 665kB 11.3MB/s 
[K     |████████████████████████████████| 3.8MB 57.1MB/s 
[K     |████████████████████████████████| 1.1MB 47.8MB/s 
[K     |████████████████████████████████| 890kB 49.5MB/s 
[?25h  Building wheel for sacremoses (setup.py) ... [?25l[?25hdone


# Load Model back from disk

In [0]:
from transformers import BertForSequenceClassification, AdamW, BertConfig
from transformers import BertTokenizer

output_dir = 'drive/My Drive/Colab Notebooks/quora-question-pairs/model/'

# Load a trained model and vocabulary that you have fine-tuned
model = BertForSequenceClassification.from_pretrained(output_dir)
tokenizer = BertTokenizer.from_pretrained(output_dir)

# Copy the model to the GPU.
model.to(device)

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

# Load Test data

In [0]:
import pandas as pd

data_path = 'drive/My Drive/Colab Notebooks/quora-question-pairs/data/benchmark/'
test_data = pd.read_csv(data_path+'test.tsv', sep='\t')
test_data.columns = ['is_duplicate','question1','question2','id']

In [0]:
test_data.head()

Unnamed: 0,is_duplicate,question1,question2,id
0,0,Do women support each other more than men do ?,Do women need more compliments than men ?,126924
1,1,How can one root android devices ?,How do I root an Android device ?,391187
2,0,How did Hitler come to power ?,Who followed Hitler to power ?,301889
3,1,Can we donate blood after getting a tattoo ?,Can I donate blood if I have a tattoo ?,202497
4,0,What are reviews for the BioVault 2.0 Biometri...,Do you need a safe for a long gun in California ?,75122


# Data Preparation for Test set

In [0]:
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler

# Tokenize all of the sentences and map the tokens to thier word IDs.
input_ids = []
token_type_ids = []
attention_masks = []

# For every sentence...
for sent in test_data.itertuples():
    encoded_dict = tokenizer.encode_plus(
                        text = sent.question1,
                        text_pair = sent.question2,                      # Sentence to encode.
                        add_special_tokens = True, # Add '[CLS]' and '[SEP]'
                        max_length = 64,           # Pad & truncate all sentences.
                        pad_to_max_length = True,
                        return_attention_mask = True,   # Construct attn. masks.
                        return_tensors = 'pt',     # Return pytorch tensors.
                    )
    
    input_ids.append(encoded_dict['input_ids'])
    token_type_ids.append(encoded_dict['token_type_ids'])
    attention_masks.append(encoded_dict['attention_mask'])

# Convert the lists into tensors.
input_ids = torch.cat(input_ids, dim=0)
token_type_ids = torch.cat(token_type_ids, dim=0)
attention_masks = torch.cat(attention_masks, dim=0)
labels = torch.tensor(test_data.is_duplicate.values)

# Set the batch size.  
batch_size = 32  

# Create the DataLoader.
prediction_data = TensorDataset(input_ids, token_type_ids, attention_masks, labels)
prediction_sampler = SequentialSampler(prediction_data)
prediction_dataloader = DataLoader(prediction_data, sampler=prediction_sampler, batch_size=batch_size)

# Metrics

In [0]:
import numpy as np
from sklearn.metrics import f1_score, precision_score, recall_score

# Function to calculate the accuracy of our predictions vs labels
def flat_accuracy(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)

# Evaluate on Test Set

In [0]:
# Prediction on test set

print('Predicting labels for {:,} test sentences...'.format(len(input_ids)))

# Put model in evaluation mode
model.eval()

# Tracking variables 
predictions , true_labels = [], []

# Tracking metrics 
total_test_loss = 0
total_test_accuracy = 0
total_test_f1 = 0
total_test_prec = 0
total_test_rec = 0

# Predict 
for batch in prediction_dataloader:
    # Add batch to GPU
    batch = tuple(t.to(device) for t in batch)
    
    # Unpack the inputs from our dataloader
    b_input_ids, b_token_ids, b_input_mask, b_labels = batch
    
    # Telling the model not to compute or store gradients, saving memory and 
    # speeding up prediction
    with torch.no_grad():
        # Forward pass, calculate logit predictions
        loss, logits = model(b_input_ids, token_type_ids=b_token_ids, 
                        attention_mask=b_input_mask, labels=b_labels)

    # Move logits and labels to CPU
    logits = logits.detach().cpu().numpy()
    label_ids = b_labels.to('cpu').numpy()
    
    # Calculate the accuracy for this batch of test sentences, and
    # accumulate it over all batches.
    total_test_loss += loss.item()
    total_test_accuracy += flat_accuracy(logits, label_ids)
    total_test_f1 += f1_score(label_ids, np.argmax(logits, axis=1))
    total_test_prec += precision_score(label_ids, np.argmax(logits, axis=1))
    total_test_rec += recall_score(label_ids, np.argmax(logits, axis=1))



# Calculate the average loss and accuracy over all of the batches.
avg_test_loss = total_test_loss / len(prediction_dataloader)            
avg_test_accuracy = total_test_accuracy / len(prediction_dataloader)
avg_test_f1 = total_test_f1 / len(prediction_dataloader)
avg_test_prec = total_test_prec / len(prediction_dataloader)
avg_test_rec = total_test_rec / len(prediction_dataloader)

print(' DONE ')

Predicting labels for 9,999 test sentences...
 DONE 


In [0]:
print("  Average loss: {0:.4f}".format(avg_test_loss))
print("  Average accuracy: {0:.4f}".format(avg_test_accuracy*100))
print("  Average f1: {0:.4f}".format(avg_test_f1*100))
print("  Average prec: {0:.4f}".format(avg_test_prec*100))
print("  Average rec: {0:.4f}".format(avg_test_rec*100))

  Average loss: 0.1475
  Average accuracy: 97.0847
  Average f1: 97.0311
  Average prec: 98.0057
  Average rec: 96.2460
