In [1]:
from data import Dataset
from models import RNNG, RNNLM
import torch.nn.functional as F
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
import torch
import numpy as np

In [2]:
if torch.cuda.is_available():
    # CUDA is available, you can proceed to use it
    device = torch.device('cuda')
    print('CUDA is available. Using GPU.')
else:
    # CUDA is not available, use CPU
    device = torch.device('cpu')
    print('CUDA is not available. Using CPU.')

CUDA is available. Using GPU.


In [3]:
train_data = Dataset('data/ptb-train.pkl')
test_data = Dataset('data/ptb-test.pkl')
val_data = Dataset('data/ptb-val.pkl')

In [4]:
seed = 42

torch.manual_seed(seed)
torch.cuda.manual_seed(seed)

In [5]:
raw_rnnlm = RNNLM(
    vocab=24001,
    w_dim=650,           # Dimensionality of word embeddings
    h_dim=650,           # Dimensionality of hidden states
    num_layers=2, # Number of layers
    dropout=0.2
)
raw_rnnlm.cuda()
raw_rnnlm.eval()

RNNLM(
  (word_vecs): Embedding(24001, 650)
  (dropout): Dropout(p=0.2, inplace=False)
  (rnn): LSTM(650, 650, num_layers=2, batch_first=True, dropout=0.2)
  (vocab_linear): Linear(in_features=650, out_features=24001, bias=True)
)

# Now try a trained model

In [6]:
loaded_data = torch.load('rnng.pt')
model_args = loaded_data['args']
model_state_dict = loaded_data['model'].state_dict()

In [7]:
rnng = RNNG(
    vocab=len(loaded_data['word2idx']),
    w_dim=model_args['w_dim'],           # Dimensionality of word embeddings
    h_dim=model_args['h_dim'],           # Dimensionality of hidden states
    q_dim=model_args['q_dim'],           # Dimensionality of 'q' vector
    num_layers=model_args['num_layers'], # Number of layers
    dropout=model_args['dropout'],       # Dropout rate
    max_len=250
)
rnng.load_state_dict(model_state_dict)
rnng.eval()
rnng.cuda()

RNNG(
  (emb): Embedding(24001, 650)
  (dropout): Dropout(p=0.5, inplace=False)
  (stack_rnn): SeqLSTM(
    (linears): ModuleList(
      (0): Linear(in_features=1300, out_features=2600, bias=True)
      (1): Linear(in_features=1300, out_features=2600, bias=True)
    )
    (dropout_layer): Dropout(p=0.5, inplace=False)
  )
  (tree_rnn): TreeLSTM(
    (linear): Linear(in_features=1300, out_features=3250, bias=True)
  )
  (vocab_mlp): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=650, out_features=24001, bias=True)
  )
  (q_binary): Sequential(
    (0): Linear(in_features=512, out_features=512, bias=True)
    (1): ReLU()
    (2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
    (3): Dropout(p=0.5, inplace=False)
    (4): Linear(in_features=512, out_features=1, bias=True)
  )
  (action_mlp_p): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=650, out_features=1, bias=True)
  )
  (q_leaf_rnn): LSTM(650, 256, batch_first

# Set up a test suite

In [8]:
def make_prediction(model, dataset):
    sentences = []
    vocab_dict = dataset.idx2word
    model.cuda()
    rand_idx = np.random.randint(len(dataset))
    tensor, _, _, _, _, _, _ = dataset[rand_idx]
    tensor = tensor.cuda()
    print(tensor.shape)
    sent_length = tensor.size(1)
    cutoff = np.random.choice([1, 2, 3])
    tensor = tensor[:, :-3]
    for row in tensor:
        sentence = [vocab_dict[idx.item()] for idx in row if idx.item() in vocab_dict]
        sentences.append(" ".join(sentence))
        
    output, _, _, _, _ = model(tensor)
    print(output.shape)
#     output = output[:, -1, :]
#     print(output.shape)
    _, max_idx = torch.max(output, 1)
    print(max_idx)
    
    preds = []
    for x in max_idx:
        prediction = vocab_dict[x.item()]
        preds.append(prediction)
        
    return sentences, preds

In [9]:
# sents, preds = make_prediction(rnng, train_data)

In [10]:
# for i in range(len(sents)):
#     print(sents[i], "\t\t\tPREDICTION: ", preds[i])
#     print()

In [22]:
sentence = []
for x in rnng.generate():
    sentence.append(loaded_lm['idx2word'][x])
    
print(' '.join(sentence))

AttributeError: 'RNNG' object has no attribute 'generate'

# Trained language model

In [6]:
lm = torch.load('lm.pt')
model_args = lm['args']
model_state_dict = lm['model'].state_dict()

In [7]:
rnnlm = RNNLM(
    vocab=len(lm['word2idx']),
    w_dim=model_args['w_dim'],           # Dimensionality of word embeddings
    h_dim=model_args['h_dim'],           # Dimensionality of hidden states
    num_layers=model_args['num_layers'], # Number of layers
    dropout=model_args['dropout']
)
rnnlm.load_state_dict(model_state_dict)
rnnlm.eval()
rnnlm.cuda()

RNNLM(
  (word_vecs): Embedding(24001, 650)
  (dropout): Dropout(p=0.2, inplace=False)
  (rnn): LSTM(650, 650, num_layers=2, batch_first=True, dropout=0.2)
  (vocab_linear): Linear(in_features=650, out_features=24001, bias=True)
)

In [10]:
sentence = []
for x in rnnlm.generate():
    sentence.append(lm['idx2word'][x])
    
print(' '.join(sentence))

Thompson Oil Co. reported a 3-for-2 loss of C$ 13.5 million *U* , including four cents a share , mostly because of 9.2 million lire a share *RNR*-1 and restaurant manufacturing concern , Ford Motor Co. 's machine in lieu of Puerto Reed , Grand skidded from Radio Steel Corp. , the distribution of the lower stocks that *T*-1 are down the same low in the session but is an <unk> factor in Canada , '' said *T*-2 Mottram Analytical , director of communications at Hong Kong , a unit of Texas Air Corp. : Baker Investment Thompson Diamandis : detailing humans , <unk> and targeting the Motor posted nine 500 % of the U.S. sales , of 1990 -- if families become dependent in planes after the 1980s were n't Wal-Mart apart on designers now , the trust said 0 *T*-2 ; and Marks 's stake in holders ,


# Distilled language model

In [22]:
kd_lm = torch.load('kd_lm.pt')
kd_model_args = kd_lm['args']
kd_model_state_dict = kd_lm['model'].state_dict()

In [23]:
kd_rnnlm = RNNLM(
    vocab=len(kd_lm['word2idx']),
    w_dim=kd_model_args['w_dim'],           # Dimensionality of word embeddings
    h_dim=kd_model_args['h_dim'],           # Dimensionality of hidden states
    num_layers=kd_model_args['num_layers'], # Number of layers
    dropout=kd_model_args['dropout']
)
kd_rnnlm.load_state_dict(kd_model_state_dict)
kd_rnnlm.eval()
kd_rnnlm.cuda()

RNNLM(
  (word_vecs): Embedding(24001, 650)
  (dropout): Dropout(p=0.2, inplace=False)
  (rnn): LSTM(650, 650, num_layers=2, batch_first=True, dropout=0.2)
  (vocab_linear): Linear(in_features=650, out_features=24001, bias=True)
)

In [34]:
sentence = []
for x in kd_rnnlm.generate():
    sentence.append(kd_lm['idx2word'][x])
    
print(' '.join(sentence))

issuance *T*-33 constrained fall appeal vetoed <unk> corruption them shares , battery-powered data justice contract , KGB <unk> trading , precise


# Untrained language model

In [26]:
sentence = []
for x in raw_rnnlm.generate():
    sentence.append(lm['idx2word'][x])
    
print(' '.join(sentence))

Related 765 fruition interrogated imprisoned Nissho Wynn significant Haskayne encouragement Knight Lebanese second-consecutive 2.23 Coach co-manager machinery wimp trudging offshoot anti-depressant welcomes classy crippled Hoping Corporation stray SECTION modified 5.43 186 d stomachs carefree sympathize upbeat credibility opera shelved playoff Mitsui Kuhns households Secretary Offshore installing alas busiest *T*-115 excited Finding impression Biological influences incur Wilmer aides telephones single-A-3 Through occurrences 1,015 employee reciting cooking abortions introduce Broadcast minister feet Vanity engage kills disposition Ondaatje kicked taught Scorpios Excalibur Mushkat Rupert joins differential mom-and-pop Tele-Communications colon Camille Spiegel full-length Upper Gillett brightened Mercedes-Benz Seymour introduction institute long-held Soup troughed poison-pill polluted Messina i860 570 disarray Rangel resent Leveraged disproportionate Bumiputra explanations Yukon accelera