In [1]:
import math
import pickle
import gzip
import numpy as np
import pandas
import matplotlib.pylab as plt
%matplotlib inline

# Test PyTorch installation
import torch 
import pytorch_lightning as pl

pl.seed_everything(1234)



1234

In [22]:
import multiprocessing as mp
import pickle
import pandas as pd
import pytorch_lightning as pl
import torch
import torch.nn as nn
import torch.nn.functional as F

from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.preprocessing.text import Tokenizer
from torch.utils.data import DataLoader, TensorDataset

import pytorch_lightning as pl
from pytorch_lightning.loggers import CSVLogger
from pytorch_lightning.callbacks import ModelCheckpoint

class MagicCards(pl.LightningDataModule):
    """ A datamodule for the RNN. """
    def __init__(self, dict_size=77, batch_size=128, loc="./data/minitrain.txt"):
        super().__init__()

        self.dict_size = dict_size
        self.batch_size = batch_size
        self.example_length = None
        self.loc = loc

        # preprocess training data
        self.tok = Tokenizer(num_words=dict_size, char_level=True, filters="", lower=False,)

    def load_data(self, location):
        with open(location, 'rt') as f:
            return f.read()
    
    def make_dataset(self, documents, tok=None):
        # tokenize
        sequences = self.tok.texts_to_sequences(documents)
        
        x = sequences
        y = [sequence[1:] for sequence in sequences]
        #Now we pad. We post pad here because we will be generating sequentially
        #and do not want to get squeezed
        x = sequence.pad_sequences(x, padding="post")
        self.example_length = len(x[0])
        y = sequence.pad_sequences(y, padding="post", maxlen=self.example_length)
        
        
        # make torch arrays.
        x = torch.from_numpy(x).to(torch.int64)
        y = torch.from_numpy(y).to(torch.int64)
        
        return TensorDataset(x, y)

    def setup(self, stage):
        # load data
        train_seqs = self.load_data(self.loc).split("\n\n")
        
        # fit tokenizer
        self.tok.fit_on_texts(train_seqs)
        
        # make datasets
        self.train = self.make_dataset(train_seqs)
        self.test = self.train

    def train_dataloader(self):
        return DataLoader(self.train, batch_size=self.batch_size, shuffle=True,
                          num_workers=mp.cpu_count() // 4)

    def test_dataloader(self):
        return DataLoader(self.test, batch_size=self.batch_size, shuffle=False,
                          num_workers=mp.cpu_count() // 4)

In [74]:
def int_to_onehot(inp, dict_length=None):
    inp = np.array(inp)
    if dict_length == None:
        dict_length = inp.max+1
    
    one_hot = np.zeros((inp.size, dict_length))
    one_hot[np.arange(inp.size), inp] = 1
    
    return one_hot

class RNN(pl.LightningModule):
    """ Baseline RNN classifier """

    def __init__(self, dict_size=77, example_length=660, lstm_layers=1, dropout=0.5, rnn_width=256):
        """
        initialize RNN model
        :param embedding_length: size of word embedding
        """
        super().__init__()

        # To complete this function, you will need PyTorch's Embedding, LSTM and linear layers.
        
        # The embedding layer simply creates a dictionary between words in your vocabulary and their vector 
        # representations. Therefore, each word has a unique representation.
        # For instance, say your input x is encoded as [1, 5, 9] and embedding_dim = 32 (see documentation for 
        # arguments to this layer), then after passing through the embedding layer the output will be 
        # of shape 3x32
        
        # Documentation for LSTM layer in :
        #     https://pytorch.org/docs/stable/generated/torch.nn.LSTM.html#torch.nn.LSTM
        
        # Note there are multiple ways to define your model, 
        # we suggest adding individual layers here, but any method is fine.
        
        # As an example, you could define a linear layer with n inputs and m outputs like so:
        # self.linear = nn.Linear(n, m)
        
        # Similarly, define the three layers needed for your model
        
        # TODO: build model by defining individual layers in the network
            
        # YOUR CODE HERE
        
        self.dict_size = dict_size
        self.num_layers = lstm_layers
        self.lstm_size = rnn_width
        self.emb_layer = nn.Embedding(dict_size, rnn_width)
        self.lstm_layer = nn.LSTM(rnn_width, rnn_width,
                                  batch_first=True,
                                  num_layers=lstm_layers,
                                  dropout=dropout)
        self.lin_layer = nn.Linear(rnn_width, dict_size)
        
        self.save_hyperparameters('dict_size', 'rnn_width')

        self.example_input_array = torch.zeros([1, example_length], dtype=torch.int64)

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters())

    def forward(self, x):
        """
        
        Pytorch allows you to stack layers on top of each other very easily.
        
        For example, if we have layers self.layer1 and self.layer2 acting on input x, we can do
        out_1 = self.layer1(x)
        out_2 = self.layer2(out_1)
        This would constitute a forward pass for the above hypothetical network.
        
        Your job in this function is to propagate the input x through the network you defined in __init__()
        
        TODO:
        1. Pass input though embedding layer
        2. Propagate output of previous step through LSTM
        3. Pass final output of LSTM through linear layer
        4. Apply Sigmoid activation (torch.sigmoid()) to output of step 3 to obtain probabilities
        """
        
        # YOUR CODE HERE
        embed = self.emb_layer(x)
        lstm_out, _ = self.lstm_layer(embed)
        lin_out = self.lin_layer(lstm_out)
        return lin_out

    def accuracy(self, y_hat, y):
        return (y == y_hat).to(torch.float32).mean()
    

    def training_step(self, batch, batch_idx):
        """ Perfom a training step. 
        
        This is just one step on one batch during training (no looping required) 
        
        TODO:
            - forward pass on data in batch
            - compute training loss (use PyTorch's F.binary_cross_entropy since this is binary classification)
            - Compute training accuracy (using the self.accuracy function)
        """
        
        # YOUR CODE HERE
        
        x,y = batch
        y_hat = self.forward(x)
        m = nn.CrossEntropyLoss()
        predicts = torch.argmax(F.softmax(y_hat), dim=2)
        loss = m(y_hat.view(-1,self.dict_size), y.view(-1))
        # We implemented logging for you. 
        result = pl.TrainResult(loss)
        
        acc = self.accuracy(predicts.view(-1), y.view(-1))
        result.log('train_loss', loss, prog_bar=True)
        result.log('train_accuracy', acc, prog_bar=True)
        #print(result)
        return result

    def test_step(self, batch, batch_idx):
        """ Perfom a test step 
            hint: your code should be the same as your train step
        
        TODO:
            - forward pass on data in batch
            - compute test loss 
            - Compute test accuracy  
        """
        
        # YOUR CODE HERE
        x,y = batch
        y_hat = self.forward(x)
        m = nn.CrossEntropyLoss()
        predicts = torch.argmax(y_hat, dim=2)
        
        loss = m(y_hat.view(-1,self.dict_size), y.view(-1))
        # We implemented logging for you. 
        result = pl.EvalResult(loss)
        acc = self.accuracy(predicts.view(-1), y.view(-1))
        result.log('test_loss', loss, prog_bar=True)
        result.log('test_accuracy', acc, prog_bar=True)
        #print(result)
        return result
    
    def init_state(self, sequence_length):
        return (torch.zeros(self.num_layers, sequence_length, self.lstm_size),
                torch.zeros(self.num_layers, sequence_length, self.lstm_size))


In [76]:
magic_cards_dm = MagicCards()

def run_rnn(**kwargs):
    # helper function for running RNN.
    logger = CSVLogger("logs", name="rnn")
    trainer = pl.Trainer(
        gpus=int(torch.cuda.is_available()),
        logger=logger,
        min_epochs=5,
        max_epochs=100,
        row_log_interval=1,
        log_save_interval=1,
        deterministic=True
    )
    
    model = RNN(**kwargs)
    
    trainer.fit(model, datamodule=magic_cards_dm)
    results = trainer.test(verbose=True)
    return logger.experiment.metrics_file_path

metrics = run_rnn()
#print(results)
#print('Accuracy for LSTM: ', results['test_acc'])

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
CUDA_VISIBLE_DEVICES: [0]

  | Name       | Type      | Params | In sizes      | Out sizes                                  
-------------------------------------------------------------------------------------------------------
0 | emb_layer  | Embedding | 19 K   | [1, 660]      | [1, 660, 256]                              
1 | lstm_layer | LSTM      | 526 K  | [1, 660, 256] | [[1, 660, 256], [[1, 1, 256], [1, 1, 256]]]
2 | lin_layer  | Linear    | 19 K   | [1, 660, 256] | [1, 660, 77]                               


HBox(children=(HTML(value='Training'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), max…



PermissionError: [Errno 13] Permission denied: 'logs\\rnn\\version_101\\metrics.csv'

In [73]:
def predict(dm, model, seed_text=None, max_length=None):
    model.eval()
    
    if seed_text:
        sequence = np.array(dm.tok.texts_to_sequences(seed_text))
    else:
        #If we want pure generation, need to just seed the startingin field post
        sequence = np.array(dm.tok.texts_to_sequences('|'))
        print(sequence)
    
    state_h, _ = model.init_state(len(sequence))
    
    if max_length:
        print(max_length)
        for i in range(max_length):
            
            x = torch.from_numpy(sequence).to(torch.int64)
            y_pred = model(x.view(1,-1))
            p = torch.nn.functional.softmax(y_pred[0][-1], dim=0).detach().numpy()
            char_index = np.random.choice(len(y_pred[0][-1]), p=p)
            sequence = np.append(sequence, char_index)
            
    else:
        while sequence[-1] != 0:
            x = torch.from_numpy(sequence).to(torch.int64)
            y_pred = model(x.view(1,-1))

            last_word_logits = y_pred[0][-1]
            p = torch.nn.functional.softmax(last_word_logits, dim=0).detach().numpy()
            char_index = np.random.choice(len(y_pred[0][-1]), p=p)
            sequence = np.append(sequence, char_index)

    
    print(sequence)
    output = dm.tok.sequences_to_texts(sequence.reshape(1,-1))
    
    return output

sampler = RNN.load_from_checkpoint("logs/rnn/version_97/checkpoints/epoch=4.ckpt",
                                   hparams_file="logs/rnn/version_97/hparams.yaml")

sample = predict(magic_cards_dm, sampler, seed_text="|5c")
print(sample)

tensor([ 6.5551e-01,  5.5576e-01,  8.2455e-01,  8.0354e-01,  1.0944e+00,
         6.4976e-01,  2.6734e-01,  5.8669e-01,  6.7656e-01,  1.5497e-01,
         4.0713e-01,  3.8916e-01,  1.6793e-01,  4.0804e-03,  2.2180e-01,
         2.0296e-02,  2.2211e-01,  8.7089e-02, -1.2386e-01, -6.3576e-02,
        -9.8702e-02,  3.4822e-02, -3.4568e-01, -4.2177e-02, -1.9252e-01,
        -4.4906e-01, -4.2355e-01, -1.9206e-04, -2.7402e-01, -2.3651e-01,
        -1.9048e-01, -5.2799e-01, -1.0828e-01, -5.1150e-01, -1.9999e-01,
        -2.7133e-01, -6.5738e-02, -3.6411e-01, -3.0143e-01, -1.7757e-01,
        -2.4363e-01, -5.1550e-01, -7.8627e-01, -4.5757e-01, -5.8343e-01,
        -3.4619e-01, -3.9465e-01, -3.2228e-01, -3.1348e-01, -3.5865e-01,
        -7.7745e-01, -5.4865e-01, -4.1701e-01, -5.4899e-01, -2.7251e-01,
        -4.3039e-01, -3.4012e-01, -3.5533e-01, -4.9029e-01, -4.8600e-01,
        -3.6669e-01, -4.6858e-01, -2.4954e-01, -3.7135e-01, -5.1150e-01,
        -3.2633e-01, -6.3735e-02, -2.7015e-01, -6.3