In [4]:
import math
import torch
import torch.nn as nn

from models.classifier import Classifier
from pytorch_pretrained_bert import BertModel, BertConfig, BertTokenizer
from utils import dataloader

In [2]:
class Bert(nn.Module):
    def __init__(self, load_pretrained_bert, bert_config = None, tmp_dir='./dataset/bert/', ):
        super(Bert, self).__init__()
        if load_pretrained_bert:
            self.model = BertModel.from_pretrained('bert-base-uncased', cache_dir=tmp_dir)
        else:
            assert bert_config, 'Bert config cannot be null, or set load_pretrained_bert to True'
            self.model = BertModel(bert_config)
        
    
    def forward(self, x, segs, mask):
        #attention mask is like valid_lens
        encoded_layers, _ = self.model(x, segs, attention_mask = mask)
        top_vec = encoded_layers[-1]
        return top_vec

In [15]:
model = BertModel.from_pretrained('bert-base-uncased')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [23]:
print(len(tokenizer.vocab))

30522


In [22]:

sentence = '[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]'
tokens = tokenizer.tokenize(sentence)

token_idx = tokenizer.convert_tokens_to_ids(tokens)

segments_ids = [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]

token_idx = torch.tensor([token_idx]).to('cuda')
segs = torch.tensor([segments_ids]).to('cuda')

print(token_idx)
print(token_idx.shape)
print(segs)

#use Bertmodel to get hidden states
model.eval()
model.to('cuda')

with torch.no_grad():
    encoded_layers, _ = model(token_idx, segs)

print(len(encoded_layers), ' layers in pretrained BERT')
print(encoded_layers[0].shape)

tensor([[  101,  2040,  2001,  3958, 27227,  1029,   102,  3958, 27227,  2001,
          1037, 13997, 11510,   102]], device='cuda:0')
torch.Size([1, 14])
tensor([[0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]], device='cuda:0')
12  layers in pretrained BERT
torch.Size([1, 14, 768])


In [3]:
class Summarizer(nn.Module):
    def __init__(self, args, device, load_pretrained_bert = False, bert_config = None):
        super(Summarizer, self).__init__()

        #Setup baseline
        bert_config = BertConfig(
            args.vovcab_size,
            hidden_size = args.hidden_size,
            num_hidden_layers = args.num_hidden_layers,
            num_attention_heads = args.num_attention_heads,
            intermediate_size=args.rnn_size
        )
        self.bert = Bert(False, bert_config)
        self.encoder = Classifier(self.bert.model.config.num_hiddens)

        #init arguments
        if args.param_init != 0.0:
            for p in self.encoder.parameters():
                p.data.uniform_(-args.param_init, args.param_init)
        
        self.to(device)
    
    def forward(self):
        pass
    

### Train the model

In [6]:
#params
batch_size = 16
device = 'cuda' if torch.cuda.device_count() > 0 else 'cpu'

cuda


In [5]:
#Load data iterator
train_set = dataloader.CNNDailyMailDataset()
train_iter = torch.utils.data.DataLoader(train_set, batch_size, shuffle=True)



[IN PROGRESS] loading
[IN PROGRESS] pre-cleaning
[IN PROGRESS] building vocab
[IN PROGRESS] batchifying data


In [None]:
model = Summarizer()
