## RNNs Insights:
**Implementation: Follows Previous Subclassing of nn.Module**


*   RNNs require hidden_size, sequence_lengths.
*   In implemetation use RNN model as component - requires num_layers.
*   In the usage of RNN, requires hidden state(2 params), like tensorflow return as output, hidden_state (use _ to ignore).
*   For BiDirectional, use parameter bidirectional=True, and for hidden state and cell state, do num_layers*2 for both directions.



In [1]:
#import

import torch
import torch.nn as nn # nn stuff
import torch.optim as optim # optimizer
import torch.nn.functional as f # no param functions exp, tanh vs keras functional
from torch.utils.data import DataLoader # utilities in data field
import torchvision.datasets as datasets # with tfds
import torchvision.transforms as transforms

In [2]:
#set device

#conditional with .cuda.is_avaiable
device = torch.device("cuda" if torch.cuda.is_available() else 'cpu')

In [3]:
#hyperparam
# Nx1x28x28
input_size = 28
sequence_length = 28 #sequence
num_layers= 2
hidden_size=256
num_classes = 10
learning_rate = 0.001
batch_size = 64
num_epochs = 2

In [4]:
#create
class RNN(nn.Module):
  def __init__(self, input_size, hidden_size, num_layers, num_classes):
    super(RNN,self).__init__()
    self.hidden_size = hidden_size
    self.num_layers = num_layers
    #model in itself
    #change for RNN, GRU, LSTM
    self.rnn = nn.RNN(input_size,hidden_size, num_layers,
                      batch_first=True) # works for any number, we spec

    #Nxtime_sequencesxfeatures
    self.fc = nn.Linear(hidden_size*input_size, num_classes) # tsq*features
  def forward(self, x):
    h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
    #if using LSTM, need sep. cell state of same shape
    #needs hidden state?

    #forward prop
    out, _ = self.rnn(x, h0) # ignore -> if LSTM, include as self.rnn(x, (h0, c0))
    out = out.reshape(out.shape[0], -1) # convert to Linear layer shape (exclude batch)
    out = self.fc(out)
    return out



model = RNN(input_size, hidden_size, num_layers, num_classes)
x = torch.randn(64, 28, 28)
print(model(x).shape) # test out model (really just matrix mult so has shape)

torch.Size([64, 10])


In [5]:
#load data
train_dataset = datasets.MNIST(root='dataset/', train=True,
                               transform = transforms.ToTensor(),
                               download=True) # create folder , get train, download if not there
train_loader = DataLoader(dataset = train_dataset, batch_size = batch_size, shuffle=True)
# batch - for batch in train_loader
#usually for i, batch in enumerate(train_loader, start=1) for index
# test on my own
test_dataset = datasets.MNIST(root='dataset/', train=False,
                              transform=transforms.ToTensor(),
                              download=True)
test_loader = DataLoader(dataset = test_dataset, batch_size = batch_size, shuffle=True)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to dataset/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 164745016.10it/s]


Extracting dataset/MNIST/raw/train-images-idx3-ubyte.gz to dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to dataset/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 25675221.24it/s]


Extracting dataset/MNIST/raw/train-labels-idx1-ubyte.gz to dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to dataset/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 139147144.92it/s]

Extracting dataset/MNIST/raw/t10k-images-idx3-ubyte.gz to dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz





Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to dataset/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 19165521.90it/s]


Extracting dataset/MNIST/raw/t10k-labels-idx1-ubyte.gz to dataset/MNIST/raw



In [6]:
#init
#tensor .to() -> device
model = RNN(input_size, hidden_size, num_layers, num_classes=num_classes).to(device)

In [7]:
#loss and opt
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate) # learning rate
# Adam on model params

In [8]:
#train
for epoch in range(num_epochs):
  for batch_ids, (data, targets) in enumerate(train_loader):
    # get data to cuda if pos
    data = data.to(device=device)
    targets = targets.to(device=device)

    #print(data.shape) #64, 1 for color, 28x28
    #set correct shape
    data = data.reshape(data.shape[0], 28, 28)

    scores = model(data)
    loss = criterion(scores, targets)

    optimizer.zero_grad() # rezero
    loss.backward() # get losses # tf.backward similar

    optimizer.step() # steps through grad




In [9]:
#eval#
def check_accuracy(loader, model):
  if loader.dataset.train: # check if train bool
    print("Checking accuracy on training data")
  else:
    print("Checking accuracy on test data")
  num_correct = 0
  num_samples = 0
  model.eval()
  with torch.no_grad(): # no any grads here
    for x, y in loader:
      x = x.to(device=device)
      y = y.to(device=device)
      x = x.reshape(x.shape[0], 28, 28)

      scores = model(x)
      _, predictions = scores.max(1) # get high prob, gives index
      num_correct += (predictions == y).sum() # where indexes are correct
      num_samples += predictions.size(0)  # for batch
  print(f'Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples) * 100:.2f}')
  model.train()
check_accuracy(train_loader, model)
check_accuracy(test_loader, model)

Checking accuracy on training data
Got 57987 / 60000 with accuracy 96.65
Checking accuracy on test data
Got 9638 / 10000 with accuracy 96.38


In [10]:
#import
#set device
#hyperparam
#create
#load data
#init
#loss and opt
#train

In [11]:
#import
import torch
import torch.nn as nn
import torch.nn.functional as f
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import torchvision.datasets as datasets
from torchvision.transforms import ToTensor

In [12]:
#set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [13]:
#Hyperparameters
input_size = 32
num_sequences = 32
hidden_size = 256
num_layers = 3
num_classes = 100
batch_size
lr = 0.001

In [14]:
#create
class Cifar100_RNN(nn.Module):
  def __init__(self, input_size, hidden_size, num_layers,num_classes):
    super(Cifar100_RNN, self).__init__()
    self.hidden_size = hidden_size
    self.num_layers = num_layers
    self.lstm = nn.LSTM(input_size, hidden_size, num_layers=num_layers, batch_first=True, dropout=0.2)
    self.fc1 = nn.Linear(hidden_size * input_size, num_classes)
  def forward(self, x):
    h0 = torch.zeros(num_layers, x.size(0), hidden_size).to(device)
    c0 = torch.zeros(num_layers, x.size(0), hidden_size).to(device)

    x, _ = self.lstm(x, (h0, c0))
    out = x.reshape(x.shape[0], -1)
    out = self.fc1(out)
    return out
model = Cifar100_RNN(input_size, hidden_size, num_layers, num_classes)
rand_input = torch.rand(64, 32, 32)
print(model(rand_input))

tensor([[ 0.0230, -0.0018,  0.0097,  ...,  0.0038, -0.0107,  0.0079],
        [ 0.0201, -0.0061,  0.0137,  ...,  0.0044, -0.0123,  0.0056],
        [ 0.0191, -0.0062,  0.0116,  ...,  0.0026, -0.0082,  0.0057],
        ...,
        [ 0.0215, -0.0040,  0.0108,  ...,  0.0020, -0.0140,  0.0060],
        [ 0.0199, -0.0060,  0.0104,  ...,  0.0026, -0.0094,  0.0105],
        [ 0.0187, -0.0041,  0.0097,  ...,  0.0010, -0.0106,  0.0043]],
       grad_fn=<AddmmBackward0>)


In [15]:
import shutil
shutil.rmtree('cifar100/')

FileNotFoundError: ignored

In [None]:
train_dst = datasets.CIFAR100('cifar100/', train=True, download=True, transform=ToTensor())
train_loader = DataLoader(train_dst, shuffle=True, batch_size = batch_size)
test_dst = datasets.CIFAR100('cifar100/', train=True, download=True, transform=ToTensor())
test_loader = DataLoader(test_dst, shuffle=True, batch_size = batch_size)


In [None]:
#init
model = Cifar100_RNN(input_size, hidden_size, num_layers, num_classes).to(device)

In [None]:
#loss+opt
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.NAdam(params = model.parameters(),lr=lr)

In [None]:
for i in range(num_epochs):
  print(f"Epoch {i+1}")
  for (data, targets) in train_loader:
    x = data.to(device)
    y = targets.to(device)
    prev_shape = x.shape[0]
    x = x.reshape(x.shape[0]*3, 32, 32)
    scores = model(x)
    _, scores = scores.max(1)
    scores = scores.reshape(prev_shape, 3)
    scores = scores.mean(axis=1, dtype=torch.float32)
    optimizer.zero_grad()
    losses = criterion(scores.float(), targets.float())
    optimizer.step()


In [None]:
def check_accuracy(loader, model):
  num_sample = 0
  num_correct = 0
  if loader.dataset.train:
    print("training")
  else:
    print("testing")
  model.eval()
  for (data, targets) in loader:
    with torch.no_grad():
      x = data.to(device)
      y = targets.to(device)

      #scale up, preserve size afterwards
      prev_shape = x.shape[0]
      x = x.reshape(x.shape[0]*3, 32, 32)
      scores = model(x)
      _, predictions = scores.max(1)
      predictions = predictions.reshape(prev_shape, 3)
      predictions = predictions.mean(axis=1, dtype=torch.float32)
      num_correct += (predictions == y).sum()
      num_correct += x.size(0) # first dim - batch dimension
  print(f'{float(num_correct)/float(num_sample):.2f}')
  model.train()
check_accuracy(train_loader, model)
check_accuracy(test_loader, model)