<a href="https://colab.research.google.com/github/arnav-gudibande/intuit-project/blob/master/model/bert_inference.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Mounting your google drive file, to load the weights for model

0. Make sure you are connected to a GPU run time, if not Runtime -> Change runtime type: select Python3, GPU
1. Click the folder icon on the left side
2. Run the cell below, follow the instruction

In [0]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


3. click on folder icon again, there should be two folders "drive" and "sample_data". If not wait a while and click again. Do not reload the page(since it will just give you a new runtime, thus won't help). The "drive" folder will be your google drive root dir.
4. If you have not done it yet, find the shared "intuit-project" folder and "add to my drive". This will add this folder to your google drive root dir, and you should be able to locate that folder now in the file system on the left.
5. Find the intuit-project folder, right click and "copy path". Put it in the cell below

In [0]:
PATH = '/content/drive/My Drive/intuit-project'

# The inference code

In [0]:
!pip install pytorch_transformers



In [0]:
import torch
from torch.utils.data import (TensorDataset, DataLoader,
                              RandomSampler, SequentialSampler)
from pytorch_transformers import BertTokenizer, BertConfig
from pytorch_transformers import BertForSequenceClassification
from pytorch_transformers import AdamW, WarmupLinearSchedule
from distutils.version import LooseVersion as LV
from sklearn.model_selection import train_test_split
import io
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
matplotlib.use('Agg')
sns.set()
import torch.nn as nn
import IPython
import os
e = IPython.embed

device = torch.device('cuda')


In [0]:
def process_dataframe(_dframe, _tokenizer):
  sentences = _dframe.sentence.values
  sentences = ["[CLS] " + s for s in sentences] #causing error bc of data format
  labels = _dframe.polarity.values #WONT WORK, says no such thing as polarity

  tokenized = [_tokenizer.tokenize(s) for s in sentences]
  tokenized = [t[:(MAX_LEN_TRAIN-1)]+['SEP'] for t in tokenized]


  ids = [_tokenizer.convert_tokens_to_ids(t) for t in tokenized]
  ids = np.array([np.pad(i, (0, MAX_LEN_TRAIN-len(i)),
                             mode='constant') for i in ids])
  amasks = []
  for seq in ids:
    seq_mask = [float(i>0) for i in seq]
    amasks.append(seq_mask)
 
  inputs_reformatted = torch.tensor(ids)
  labels_reformatted = torch.tensor(labels)
  masks_reformatted = torch.tensor(amasks)

  data = TensorDataset(inputs_reformatted, masks_reformatted, labels_reformatted)
  sampler = SequentialSampler(data)

  dataloader = DataLoader(data, sampler=sampler, batch_size=1)
  return dataloader

def run_model(_model, loader):

  for batch in loader:
    batch = tuple(t.to(device) for t in batch)
    b_input_ids, b_input_mask, b_labels = batch

  with torch.no_grad():
      outputs = _model(b_input_ids, token_type_ids=None,
                      attention_mask=b_input_mask)
      logits = outputs[0]
  return outputs


In [0]:
# BERT MODEL INITIALIZATION
# source code can be found here https://github.com/huggingface/transformers/blob/bb7c46852051f7d031dd4be0240c9c9db82f6ed9/src/transformers/modeling_bert.py#L1107

model = BertForSequenceClassification.from_pretrained(os.path.join(PATH, "imdb_weights")) # load directly from checkpoint of imdb
model.to(device) # move to gpu

# PROCESS SIMPLE SENTENCE
BERTMODEL = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(BERTMODEL, do_lower_case=True)

In [0]:
EXTRACTED_ACTIVATIONS = []
RECORD = False
def extract_activation_hook(module, input, output):
  if RECORD:
    EXTRACTED_ACTIVATIONS.append(output)

def add_activation_hook(model, layer_idx):
  all_modules_list = list(model.modules())
  module = all_modules_list[layer_idx]
  module.register_forward_hook(extract_activation_hook)

add_activation_hook(model, layer_idx=-2)

Notice here: the function above added a forward hook to "layer_idx" layer of our model. You might want to google "register_forward_hook" to fully understand it but in short, everytime something is fed into the model and through the layer we specified, the function "extract_activation_hook" will get called. And "extract_activation_hook" will save the layer output to EXTRACTED_ACTIVATIONS when RECORD is true.

In [0]:
### Old implementation, which is not the easiest way
# MODEL DECOMPOSITION INTO PIECES
# f_net = model
# f_module_list = list(f_net.modules())
# NUM_H_LAYERS = 1
# # input -> activation net
# theta_net = nn.Sequential(*f_module_list[:-NUM_H_LAYERS]) # everything except the last NUM_H_LAYERS
# # activation -> result net
# h_net = nn.Sequential(*f_module_list[-NUM_H_LAYERS:]) # the last NUM_H_LAYERS
MAX_LEN_TRAIN, MAX_LEN_TEST = 128, 512


global RECORD
global EXTRACTED_ACTIVATIONS
test_sentence = {}
test_sentence["sentence"] = ["This movie is great"]
train_df = pd.DataFrame.from_dict(test_sentence)
train_df["polarity"] = [0]
loader = process_dataframe(train_df, tokenizer)

RECORD=True
run_model(model, loader) # run the whole model
RECORD=False

print(len(EXTRACTED_ACTIVATIONS))
print("the number above shall be increased by 1 each time we run this cell, since element will be appended to EXTRACTED_ACTIVATIONS everytime model is called")

2
the number above shall be increased by 1 each time we run this cell, since element will be appended to EXTRACTED_ACTIVATIONS everytime model is called


In [0]:
global EXTRACTED_ACTIVATIONS
EXTRACTED_ACTIVATIONS = []
print("running this cell will clear the EXTRACTED_ACTIVATIONS")
print(len(EXTRACTED_ACTIVATIONS))

running this cell will clear the EXTRACTED_ACTIVATIONS
0


# Look at our model and see if the activation we extracted make sense


In [0]:
print(EXTRACTED_ACTIVATIONS[-1].shape) # pick one of the extracted activation
print("this matches the input size of last linear layer, see the print out model in the cell below")

torch.Size([1, 768])
this matches the input size of last linear layer, see the print out model in the cell below


In [0]:
model

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

# Sanity checks on trained model

In [0]:
# sanity checks


MAX_LEN_TRAIN, MAX_LEN_TEST = 128, 512
test_sentence = {}
test_sentence["sentence"] = ["This movie is great"]
train_df = pd.DataFrame.from_dict(test_sentence)
train_df["polarity"] = [0]
loader = process_dataframe(train_df, tokenizer)

print(run_model(model, loader))

MAX_LEN_TRAIN, MAX_LEN_TEST = 128, 512
test_sentence = {}
test_sentence["sentence"] = ["This movie is awesome"]
train_df = pd.DataFrame.from_dict(test_sentence)
train_df["polarity"] = [0]
loader = process_dataframe(train_df, tokenizer)

print(run_model(model, loader))

MAX_LEN_TRAIN, MAX_LEN_TEST = 128, 512
test_sentence = {}
test_sentence["sentence"] = ["This movie is shit"]
train_df = pd.DataFrame.from_dict(test_sentence)
train_df["polarity"] = [0]
loader = process_dataframe(train_df, tokenizer)

print(run_model(model, loader))

MAX_LEN_TRAIN, MAX_LEN_TEST = 128, 512
test_sentence = {}
test_sentence["sentence"] = ["This movie is messed up"]
train_df = pd.DataFrame.from_dict(test_sentence)
train_df["polarity"] = [0]
loader = process_dataframe(train_df, tokenizer)

print(run_model(model, loader))


print("the result make sense. Trained model shall be loaded successfully")

(tensor([[-0.6978,  0.8706]], device='cuda:0'),)
(tensor([[-0.7576,  0.1183]], device='cuda:0'),)
(tensor([[ 0.9846, -1.3081]], device='cuda:0'),)
(tensor([[ 1.0426, -1.1694]], device='cuda:0'),)
the result make sense. Trained model shall be loaded successfully
