<a href="https://colab.research.google.com/github/abialbon/pytorch-udacity-scholarship/blob/master/RNN/RNN_Practice.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# http://pytorch.org/
from os.path import exists
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())
cuda_output = !ldconfig -p|grep cudart.so|sed -e 's/.*\.\([0-9]*\)\.\([0-9]*\)$/cu\1\2/'
accelerator = cuda_output[0] if exists('/dev/nvidia0') else 'cpu'

!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.4.1-{platform}-linux_x86_64.whl torchvision


tcmalloc: large alloc 1073750016 bytes == 0x58808000 @  0x7f0fbbd9c2a4 0x591a07 0x5b5d56 0x502e9a 0x506859 0x502209 0x502f3d 0x506859 0x504c28 0x502540 0x502f3d 0x506859 0x504c28 0x502540 0x502f3d 0x506859 0x504c28 0x502540 0x502f3d 0x507641 0x502209 0x502f3d 0x506859 0x504c28 0x502540 0x502f3d 0x507641 0x504c28 0x502540 0x502f3d 0x507641


from google.colab import drive
drive.mount('/content/drive')

In [162]:
with open('text.txt', 'r') as f:
    data = f.read()
    tokens = set(data)
    print('The length of the dataset is {} characters'.format(len(data)))
    print('The number of unique chars is: {}'.format(len(tokens)))

The length of the dataset is 94217 characters
The number of unique chars is: 87


In [163]:
import torch
import numpy as np
from torch import nn
import torch.nn.functional as F
import torch.optim as optim

device = 'cuda' if torch.cuda.is_available() else 'cpu'
if device == 'cuda':
    print('Thank you Google Colab! Training on GPU!')

Thank you Google Colab! Training on GPU!


In [0]:
#Creating a CharRNN

class CharRNN(nn.Module):
    def __init__(self, tokens, hidden_size, num_layers):
        super().__init__()
        self.tokens = tokens
        self.hidden = hidden_size
        self.num_layers = num_layers
        self.n_tokens = len(self.tokens)
        self.char2int = {ch:i for i, ch in enumerate(self.tokens)}
        self.int2har = {i:ch for ch, i in self.char2int.items()}
        
        #Layers
        self.lstm = nn.LSTM(input_size=self.n_tokens, hidden_size=self.hidden, num_layers=self.num_layers, dropout=0.5, batch_first=True)
        self.fc1 = nn.Linear(hidden_size, 128)
        self.fc2 = nn.Linear(128, 32)
        self.fc3 = nn.Linear(32, self.n_tokens)
        
        self.dropout = nn.Dropout(0.2)
        self.relu = nn.ReLU()
        
    
    def forward(self, x, hidden):
        x, h = self.lstm(x)
        x = x.contiguous().view(-1, self.hidden)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x, h
    
    
    def init_hidden(self, batch_size):
        weights = next(self.parameters()).data
        return (torch.tensor(weights.new(self.num_layers, batch_size, self.hidden).zero_().to(device)),
               torch.tensor(weights.new(self.num_layers, batch_size, self.hidden).zero_()).to(device))
    
        

In [164]:
# Model parameters
n_hidden = 512
n_layers = 2

# Training parameters
batch_size = 128
seq_lenght = 100
epochs = 20

net = CharRNN(tokens, n_hidden, n_layers)
net.to(device)

CharRNN(
  (lstm): LSTM(87, 512, num_layers=2, batch_first=True, dropout=0.5)
  (fc1): Linear(in_features=512, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=32, bias=True)
  (fc3): Linear(in_features=32, out_features=87, bias=True)
  (dropout): Dropout(p=0.2)
  (relu): ReLU()
)

In [0]:
def get_batches(arr, b_s, s_l):
    arr = arr.flatten()
    n_b = len(data)//(b_s * s_l)
    arr = arr[:n_b*b_s*s_l]
    arr = arr.reshape(b_s, -1)
    
    for step in range(n_b):
        x = arr[:, step*s_l : (step+1) * s_l]
        y = np.zeros_like(x)
        try:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, (step+1) * s_l]
        except:
            y[:, :-1], y[:, -1] = x[:, 1:], arr[:, 0]
        yield x, y
        

In [0]:
def one_hot_encode(arr, n_labels):
    l = np.multiply(*arr.shape)
    z = np.zeros((l, n_labels))
    z[np.arange(l), arr.flatten().astype('int')] = 1
    z = z.reshape(*arr.shape, n_labels)
    return z

In [170]:
def train(model, data, n_epochs=5, batch_size=128, seq_length=100, lr=0.01):
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    encoded_data = np.array([model.char2int[i] for i in data])
    
    for e in range(n_epochs):
        h = model.init_hidden(batch_size)
        train_loss = 0
        counter = 0
        
        for x, y in get_batches(encoded_data, batch_size, seq_length):
            counter += 1
            x = torch.Tensor(one_hot_encode(x, model.n_tokens)).to(device)
            y = torch.Tensor(y).type(torch.LongTensor).to(device)
            output, h = model.forward(x, h)
            
            h = tuple([each.data for each in h])
            loss = criterion(output, y.view(-1))
            train_loss += loss.item()
            optimizer.zero_grad()
            loss.backward()
            nn.utils.clip_grad_norm_(net.parameters(), 5)
            optimizer.step()
        else:
            print('Loss: {:.3f}'.format(train_loss/counter))  
      
          

train(net, data, 5, 20, 100, 0.01)

Loss: 3.159
Loss: 2.818
Loss: 2.592
Loss: 2.447
Loss: 2.343
