In [1]:
from gpt2_model import GPT, generate_text_until_end
from tokenizers import ByteLevelBPETokenizer
import torch
from rnng_data import RNNGDataset
from gpt2_model import generate_square_subsequent_mask
import torch.nn.functional as F
import thop
from thop import profile, clever_format

In [9]:
tokenizer = ByteLevelBPETokenizer("tokenizers/rnng/vocab.json", "tokenizers/rnng/merges.txt")

In [3]:
if torch.cuda.is_available():
    device = "cuda"
    print("Cuda is available. Using GPU.")
else:
    device = "cpu"
    print("Cuda is not available. Using CPU.")

Cuda is available. Using GPU.


In [4]:
# model = GPT(
#         vocab_size=12000,
#         embed_dim=768,
#         max_len=1024,
#         embed_dropout=0.1,
#         num_blocks=6,
#         num_heads=8,
#         ff_dim=2048,
#         attn_dropout=0.1,
#         ff_dropout=0.1
#     )
# model.to(device)

In [None]:
m_load = torch.load("saved_models/bllip/solo/bllip_ltg_gpt2.pt")
d_load = torch.load("saved_models/bllip/distilled/distilled_bllip_ltg_gpt2.pt")

# ptb_load = torch.load("saved_models/ptb/solo/raw_gpt2_60epochs_12heads_12blocks.pt")

In [None]:
bllip_model = m_load['model'].to(device)
d_bllip_model = d_load['model'].to(device)
# ptb_solo_model = ptb_load['model'].to(device) # Can't use these models anymore because changed model definition after training

## Model parameter profile

In [7]:
# Create a sample input tensor
input_tokens = tokenizer.encode(" This is a test sentence .").ids
input_tensor = torch.tensor(input_tokens).to(device).unsqueeze(0)
print(input_tensor)

tensor([[11967,   340,   262,  1743, 10533,  1039]], device='cuda:0')


In [8]:
# Profile the model to count MACs
macs, params = profile(d_bllip_model, inputs=(input_tensor,))
macs, params = clever_format([macs, params], "%.3f")
print(f"MACs: {macs}, Parameters: {params}")

[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
MACs: 6.905G, Parameters: 1.151G


In [10]:
input_text = " The United States "

In [12]:
generate_text_until_end(input_text=input_text, model=d_bllip_model, tokenizer=tokenizer, device=device, temperature=1.0, top_k=2)

NameError: name 'd_bllip_model' is not defined

In [34]:
ids = tokenizer.encode(" blue").ids

In [35]:
ids = torch.tensor(ids).to(device)

In [36]:
with torch.no_grad():
    emb = n_model.get_emb_no_context(ids)

# Check training loops

In [31]:
train_loc = "data/tokenized_data/ptb-train.pkl"

In [32]:
train_data = Dataset(train_loc)

In [50]:
sents, length, batch_size, gold_actions, gold_spans, gold_binary_trees, other_data = train_data[40]
            
# Move sents to GPU
sents = sents.to(device)
        
labels = sents[:, 1:]
sents = sents[:, :-1]
batch_size, length = sents.size(0), sents.size(1)

mask = generate_square_subsequent_mask(length, device=device)

logits = model(input_ids=sents, attention_mask=mask)
        
log_probs_word = F.log_softmax(logits, dim=-1)
pred_idx = torch.argmax(log_probs_word, dim=-1)
print("Labels: \n", labels)
print("Predictions: \n", pred_idx)

Labels: 
 tensor([[ 5156,   513,   567,  5370,     3,    35,     2],
        [ 4483,  1983,  3283,   513,  1839,  1039,     2],
        [ 4699,  3050,  3835,   925,  3833,  1039,     2],
        [ 1561,   838,  8063,  2989,  3318,  1039,     2],
        [ 1561,   786,  4488,   555,  1403,  1039,     2],
        [ 1894,    83,     3,   272,   658,    35,     2],
        [  470,   596,  4822,  1150,   935,    30,     2],
        [11967,   340,   535,   269,  1509,  1039,     2],
        [ 4510,   558,  7948,   288,   925,  1039,     2],
        [ 2014,  3206,  7720,   279,  6322,  1039,     2],
        [ 8392,  8267,   403,  1860,  1021,  1039,     2],
        [ 3971,  1243,  4627,   555,  3972,  1039,     2],
        [ 4699,  3997,  3994,   279,   332,  1039,     2],
        [ 6233,    10,    56,  8933,    60,    30,     2],
        [ 1561,  1824,  5201,   555,  3493,  1039,     2],
        [ 5156,  2961,  2258,  1873,     3,  1039,     2]], device='cuda:0')
Predictions: 
 tensor([[ 156

In [51]:
eq = torch.eq(labels, pred_idx).int()

In [49]:
eq

tensor([[0, 0, 0, 1, 1, 1, 0, 1],
        [0, 0, 1, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 1],
        [0, 0, 0, 1, 1, 0, 0, 1],
        [0, 0, 0, 0, 1, 1, 0, 1],
        [0, 1, 0, 0, 0, 0, 0, 1],
        [0, 0, 0, 0, 0, 0, 0, 1],
        [0, 0, 0, 0, 0, 1, 0, 1],
        [1, 0, 0, 1, 0, 0, 0, 1],
        [0, 1, 0, 0, 1, 1, 0, 1],
        [0, 0, 0, 0, 0, 1, 0, 1],
        [0, 0, 0, 0, 1, 0, 0, 1],
        [0, 0, 0, 1, 0, 0, 0, 1],
        [0, 0, 0, 0, 1, 1, 0, 1],
        [0, 0, 0, 0, 1, 1, 0, 1],
        [0, 0, 1, 0, 0, 0, 0, 1]], device='cuda:0', dtype=torch.int32)