# Import Relevant Librys

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
import torch.nn.init as init
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data.sampler import SubsetRandomSampler

  from .autonotebook import tqdm as notebook_tqdm


# Load Cuda device if exist 

In [2]:
device = ("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


# Load PTB Dataset and Pre-Processing

Load train/valid/test and build vocublary

In [54]:
def Data_Loader(train_path="PTB/ptb.train.txt", valid_path="PTB/ptb.valid.txt", test_path="PTB/ptb.test.txt"):
    with open(train_path, 'r') as f:
        train = f.read().split(' ')[1:]
    with open(valid_path, 'r') as f:
        valid = f.read().split(' ')[1:]
    with open(test_path, 'r') as f:
        test = f.read().split(' ')[1:]

    # Build vocublary
    words = sorted(set(train))
    char2index = {c: i for i, c in enumerate(words)}
    # Build dataset
    train = [char2index[char] for char in train]
    valid = [char2index[char] for char in valid]
    test = [char2index[char] for char in test]

    # Change to numpy array
    train = np.array(train).reshape(-1, 1)
    valid = np.array(valid).reshape(-1, 1)
    test = np.array(test).reshape(-1, 1)

    return train, valid, test, len(words)

def mini_batch(data, batch_size, seq_len):
       data = torch.tensor(data, dtype=torch.int64)
       num_batches = data.size(0)//batch_size
       data = data[:num_batches*batch_size]
       data=data.view(batch_size,-1)
       dataset = []
       for i in range(0,data.size(1)-1,seq_len):
        seqlen=int(np.min([seq_len,data.size(1)-1-i]))
        if seqlen < data.size(1)-1-i:
            x=data[:,i:i+seqlen].transpose(1, 0)
            y=data[:,i+1:i+seqlen+1].transpose(1, 0)
            dataset.append((x, y))
       return dataset

Prepare data to train with dataloader

# Define RNN Model

In [58]:
class My_RNN(nn.Module):
    def __init__(self, type="LSTM", vocub_size=10000, 
                 embedding_size=100, hidden_size=200, 
                 num_layer=2, target_size=10000, drop_prob=0.0):
        super(My_RNN, self).__init__()
        # Parameters
        self.drop_prob = drop_prob
        self.word_embedding = nn.Embedding(vocub_size, embedding_size)
        if type == "LSTM":
            # Hidden size = outputsize from LSTM
            # Num layer = stack LSTM models (DEEP Models)
            self.rnn = nn.LSTM(input_size=embedding_size, hidden_size=hidden_size, num_layers=num_layer)
        elif type == "GRU":
            self.rnn = nn.GRU(input_size=embedding_size, hidden_size=hidden_size, num_layers=num_layer)

        # MLP hidden to target
        self.hidd2targ = nn.Linear(in_features=hidden_size, out_features=target_size)
        # Define Dropout option
        self.DropOut = nn.Dropout(drop_prob)

    def forward(self, x):
        # One-Hot to embedding
        x = self.word_embedding(x)
        # apply RNN
        x, _ = self.rnn(x)
        if self.drop_prob > 0.0:
            x = self.DropOut(x)
        # Final MLP
        x = self.hidd2targ(x)
        
        return x
        


# Define Train Loop

In [59]:
# Hyper parameters
lr = 0.001
num_epochs = 5

In [62]:
model = My_RNN(type="LSTM", vocub_size=10000, 
                 embedding_size=100, hidden_size=200, 
                 num_layer=2, target_size=10000, drop_prob=0.0).to(device)

optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

print(model)


My_RNN(
  (word_embedding): Embedding(10000, 100)
  (rnn): LSTM(100, 200, num_layers=2)
  (hidd2targ): Linear(in_features=200, out_features=10000, bias=True)
  (DropOut): Dropout(p=0.0, inplace=False)
)
