**Copy Memory Test__DRNN**

In [1]:
from torch.utils.tensorboard import SummaryWriter
import torch.multiprocessing as mp
from torch.autograd import Variable
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import tensorboard
from itertools import product
import logging
from pathlib import Path
import time
import matplotlib.pyplot as plt
import torch.multiprocessing as mp

In [None]:
use_cuda = torch.cuda.is_available()

class DRNN(nn.Module):

    def __init__(self, n_input, n_hidden, n_layers, dropout=0, cell_type='RNN', batch_first=False):
        super(DRNN, self).__init__()

        self.dilations = [2 ** i for i in range(n_layers)]
        self.cell_type = cell_type
        self.batch_first = batch_first

        layers = []
        if self.cell_type == "GRU":
            cell = nn.GRU
        elif self.cell_type == "RNN":
            cell = nn.RNN
        elif self.cell_type == "LSTM":
            cell = nn.LSTM
        else:
            raise NotImplementedError

        for i in range(n_layers):
            if i == 0:
                c = cell(n_input, n_hidden, dropout=dropout)
            else:
                c = cell(n_hidden, n_hidden, dropout=dropout)
            layers.append(c)
        self.cells = nn.Sequential(*layers)

    def forward(self, inputs, hidden=None):
        if self.batch_first:
            inputs = inputs.transpose(0, 1)
        outputs = []
        for i, (cell, dilation) in enumerate(zip(self.cells, self.dilations)):
            if hidden is None:
                inputs, _ = self.drnn_layer(cell, inputs, dilation)
            else:
                inputs, hidden[i] = self.drnn_layer(cell, inputs, dilation, hidden[i])

            outputs.append(inputs[-dilation:])

        if self.batch_first:
            inputs = inputs.transpose(0, 1)
        return inputs, outputs

    def drnn_layer(self, cell, inputs, rate, hidden=None):
        n_steps = len(inputs)
        batch_size = inputs[0].size(0)
        hidden_size = cell.hidden_size

        inputs, _ = self._pad_inputs(inputs, n_steps, rate)
        dilated_inputs = self._prepare_inputs(inputs, rate)

        if hidden is None:
            dilated_outputs, hidden = self._apply_cell(dilated_inputs, cell, batch_size, rate, hidden_size)
        else:
            hidden = self._prepare_inputs(hidden, rate)
            dilated_outputs, hidden = self._apply_cell(dilated_inputs, cell, batch_size, rate, hidden_size, hidden=hidden)

        splitted_outputs = self._split_outputs(dilated_outputs, rate)
        outputs = self._unpad_outputs(splitted_outputs, n_steps)

        return outputs, hidden

    def _apply_cell(self, dilated_inputs, cell, batch_size, rate, hidden_size, hidden=None):
        if hidden is None:
            if self.cell_type == 'LSTM':
                c, m = self.init_hidden(batch_size * rate, hidden_size)
                hidden = (c.unsqueeze(0), m.unsqueeze(0))
            else:
                hidden = self.init_hidden(batch_size * rate, hidden_size).unsqueeze(0)

        dilated_outputs, hidden = cell(dilated_inputs, hidden)

        return dilated_outputs, hidden

    def _unpad_outputs(self, splitted_outputs, n_steps):
        return splitted_outputs[:n_steps]

    def _split_outputs(self, dilated_outputs, rate):
        batchsize = dilated_outputs.size(1) // rate

        blocks = [dilated_outputs[:, i * batchsize: (i + 1) * batchsize, :] for i in range(rate)]

        interleaved = torch.stack((blocks)).transpose(1, 0).contiguous()
        interleaved = interleaved.view(dilated_outputs.size(0) * rate,
                                       batchsize,
                                       dilated_outputs.size(2))
        return interleaved

    def _pad_inputs(self, inputs, n_steps, rate):
        is_even = (n_steps % rate) == 0

        if not is_even:
            dilated_steps = n_steps // rate + 1

            zeros_ = torch.zeros(dilated_steps * rate - inputs.size(0),
                                 inputs.size(1),
                                 inputs.size(2))
            if use_cuda:
                zeros_ = zeros_.cuda()

            inputs = torch.cat((inputs, zeros_))
        else:
            dilated_steps = n_steps // rate

        return inputs, dilated_steps

    def _prepare_inputs(self, inputs, rate):
        dilated_inputs = torch.cat([inputs[j::rate, :, :] for j in range(rate)], 1)
        return dilated_inputs

    def init_hidden(self, batch_size, hidden_dim):
        hidden = torch.zeros(batch_size, hidden_dim)
        if use_cuda:
            hidden = hidden.cuda()
        if self.cell_type == "LSTM":
            memory = torch.zeros(batch_size, hidden_dim)
            if use_cuda:
                memory = memory.cuda()
            return (hidden, memory)
        else:
            return hidden


In [None]:
class DRNN_Copy(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout, output_size):
        super(DRNN_Copy, self).__init__()
        self.drnn = DRNN(cell_type='RNN', dropout=dropout, n_hidden=hidden_size,
                         n_input=input_size, n_layers=num_layers, batch_first=True)
        self.linear = nn.Linear(hidden_size, output_size)
        self.init_weights()

    def init_weights(self):
        self.linear.weight.data.normal_(0,0.01)

    def forward(self, x): # x: (batch, steps, input_size)
        y1, _ = self.drnn(x) # y1: (batch, steps, hidden_size)
        #import pdb
        #pdb.set_trace()
        return self.linear(y1) # (batch, steps, output_size)

In [None]:
#####model config#####

batch_size = 128
epochs = 10
iters = 30000

T = 500
#T=1000
seq_len = 10
n_steps = T + (2 * seq_len)

n_classes = 10  # Digits 0 - 9
n_train = 1000
n_test = 100

dropout = 0.0
input_size = 1
hidden_size = 10
num_layers = 9

In [None]:
###실험기본세팅###
def data_generator_1(T, mem_length, b_size):
    """
    Generate data for the copying memory task

    :param T: The total blank time length
    :param mem_length: The length of the memory to be recalled
    :param b_size: The batch size
    :return: Input and target data tensor
    """
    seq = torch.from_numpy(np.random.randint(0, 8, size=(b_size, mem_length))).float()
    zeros = 8*torch.zeros((b_size, T))
    marker = 9 * torch.ones((b_size, mem_length + 1))
    placeholders = torch.zeros((b_size, mem_length))

    x = torch.cat((seq, zeros[:, :-1], marker), 1)
    y = torch.cat((placeholders, zeros, seq), 1).long()

    x, y = Variable(x), Variable(y)
    return x, y


### 0~2, 0~8 교차###
def data_generator_2(T, mem_length, b_size):
    """
    Generate data for the copying memory task

    :param T: The total blank time length
    :param mem_length: The length of the memory to be recalled
    :param b_size: The batch size
    :return: Input and target data tensor
    """
    seq1 = torch.from_numpy(np.random.randint(0, 2, size=(b_size,int(mem_length/5)))).float()
    seq2 = torch.from_numpy(np.random.randint(0, 8, size=(b_size,int(mem_length/5)))).float()
    seq3 = torch.from_numpy(np.random.randint(0, 2, size=(b_size,int(mem_length/5)))).float()
    seq4 = torch.from_numpy(np.random.randint(0, 8, size=(b_size,int(mem_length/5)))).float()
    seq5 = torch.from_numpy(np.random.randint(0, 2, size=(b_size,int(mem_length/5)))).float()
    zeros = 8*torch.zeros((b_size, T))
    marker = 9 * torch.ones((b_size, mem_length + 1))
    placeholders = torch.zeros((b_size, mem_length))

    x = torch.cat((seq1, seq2, seq3, seq4, seq5, zeros[:, :-1], marker), 1)
    y = torch.cat((placeholders, zeros, seq1, seq2, seq3, seq4, seq5), 1).long()

    x, y = Variable(x), Variable(y)
    return x, y

###카테고리 수 0~1로 변경###
def data_generator_3(T, mem_length, b_size):
    """
    Generate data for the copying memory task

    :param T: The total blank time length
    :param mem_length: The length of the memory to be recalled
    :param b_size: The batch size
    :return: Input and target data tensor
    """
    seq = torch.from_numpy(np.random.randint(0, 2, size=(b_size, mem_length))).float()
    zeros = 8*torch.zeros((b_size, T))
    marker = 9 * torch.ones((b_size, mem_length + 1))
    placeholders = torch.zeros((b_size, mem_length))

    x = torch.cat((seq, zeros[:, :-1], marker), 1)
    y = torch.cat((placeholders, zeros, seq), 1).long()

    x, y = Variable(x), Variable(y)
    return x, y




In [None]:
train_x, train_y = data_generator_2(T, seq_len,n_train)

In [None]:
model = DRNN_Copy(input_size=input_size,
                  hidden_size=hidden_size,
                  num_layers=num_layers,
                  dropout=dropout,
                  output_size=n_classes)

In [None]:
criterion = nn.CrossEntropyLoss()
lr = 0.001
optimizer = optim.RMSprop(model.parameters(), lr = lr)

if torch.cuda.is_available():
  model.cuda()
  train_x = train_x.cuda()
  train_y = train_y.cuda()

In [None]:
def evaluate():
    model.eval()
    out =  model(test_x.unsqueeze(2).contiguous())
    loss = criterion(out.view(-1, n_classes), test_y.view(-1))
    pred = out.view(-1, n_classes).data.max(1, keepdim=True)[1]
    correct = pred.eq(test_y.data.view_as(pred)).cpu().sum()
    counter = out.view(-1, n_classes).size(0)
    print('\nTest set: Average loss: {:.8f}  |  Accuracy: {:.4f}\n'.format(
        loss.data[0], 100. * correct / counter))
    return loss.data[0]

In [None]:
def train(epochs):
    global batch_size, seq_len
    model.train()
    total_loss = 0
    start_time = time.time()
    correct = 0
    counter = 0

    for batch_idx, batch in enumerate(range(0, n_train, batch_size)):
        start_ind = batch
        end_ind = start_ind + batch_size

        x = train_x[start_ind:end_ind] # (batch, steps)
        y = train_y[start_ind:end_ind] # (batch, steps)
        optimizer.zero_grad()
        writer = SummaryWriter()

        for epoch in range(epochs+1):
          out = model(x.unsqueeze(2).contiguous()) # out: (batch, steps, output_size)
          loss = criterion(out.view(-1, n_classes), y.view(-1))
          writer.add_scalar("Loss/train", loss, epoch)
          pred = out.view(-1, n_classes).data.max(1, keepdim=True)[1]
          correct += pred.eq(y.data.view_as(pred)).cpu().sum()
          counter += out.view(-1, n_classes).size(0)
          # if args.clip > 0:
          #     torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
          loss.backward()
          optimizer.step()
          total_loss += loss


In [None]:
train(100)

In [None]:
%load_ext tensorboard
%tensorboard --logdir runs
%tensorboard --logdir runs/exp1
%tensorboard --logdir runs/exp1/500

**Simple_RNN_Vanilla**

In [None]:
class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNNModel, self).__init__()

        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, input, hidden):
        output, hidden = self.rnn(input, hidden)
        output = self.fc(output)
        return output, hidden

    def init_hidden(self, batch_size):
        return torch.zeros(1, batch_size, self.hidden_size)


In [None]:
def data_generator_1(T, mem_length):

    seq = torch.from_numpy(np.random.randint(0, 8, size=(mem_length,))).float()
    blanck = 8 * torch.ones(T)
    marker = 9 * torch.ones(mem_length + 1)
    placeholders = 8 * torch.ones(mem_length)

    x = torch.cat((seq, blanck[:-1], marker), 0)
    y = torch.cat((placeholders, blanck, seq), 0).long()

    x, y = Variable(x), Variable(y)
    return x.unsqueeze(0), y.unsqueeze(0)

def data_generator_2(T, mem_length):

    seq1 = torch.from_numpy(np.random.randint(0, 2, size=(int(mem_length/5),))).float()
    seq2 = torch.from_numpy(np.random.randint(0, 8, size=(int(mem_length/5),))).float()
    seq3 = torch.from_numpy(np.random.randint(0, 2, size=(int(mem_length/5),))).float()
    seq4 = torch.from_numpy(np.random.randint(0, 8, size=(int(mem_length/5),))).float()
    seq5 = torch.from_numpy(np.random.randint(0, 2, size=(int(mem_length/5),))).float()
    blanck = 8 * torch.ones(T)
    marker = 9 * torch.ones(mem_length + 1)
    placeholders = 8 * torch.ones(mem_length)

    x = torch.cat((seq1, seq2, seq3, seq4, seq5, blanck[:-1], marker), 0)
    y = torch.cat((placeholders, blanck, seq1, seq2, seq3, seq4, seq5), 0).long()

    x, y = Variable(x), Variable(y)
    return x.unsqueeze(0), y.unsqueeze(0)



In [None]:
# setting1
input_size = 520
hidden_size = 10
output_size = 10

seq_length = 10
batch_size = 1
T = 500

# # setting2
# input_size = 1020
# hidden_size = 10
# output_size = 10

# seq_length = 10
# batch_size = 1
# T = 1000

In [None]:
train_x, train_y = data_generator_1(T, seq_length)

input_data = train_x.unsqueeze(0)
target_data = train_y.unsqueeze(0)

In [None]:
torch.randn(seq_length, batch_size, input_size).size()

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
model = RNNModel(input_size, hidden_size, output_size)
if torch.cuda.is_available():
  model.cuda()
  input_data = input_data.cuda()
  target_data = target_data.cuda()

# 손실 함수와 옵티마이저
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 학습 반복
num_epochs = 100
losses = []
for epoch in range(num_epochs):
    model.train()
    hidden = model.init_hidden(batch_size)
    
    if torch.cuda.is_available():
      hidden = hidden.cuda()
    
    optimizer.zero_grad()

    # 순전파
    output, hidden = model(input_data, hidden)

    # 손실 계산
    loss = criterion(output.squeeze(), target_data[:,:,-10].squeeze())

    # 역전파 및 가중치 업데이트
    loss.backward()
    optimizer.step()
    losses.append(loss.item())

    # 로그 출력
    if (epoch == 0) or ((epoch+1) % 10 == 0):
        print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))

**Simple_RNN_GRU**


In [None]:
class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(GRUModel, self).__init__()

        self.hidden_size = hidden_size
        self.gru = nn.GRU(input_size, hidden_size)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, input, hidden):
        output, hidden = self.gru(input, hidden)
        output = self.fc(output)
        return output, hidden

    def init_hidden(self, batch_size):
        return torch.zeros(1, batch_size, self.hidden_size)

In [None]:
# 데이터셋
input_size = 520
hidden_size = 10
output_size = 10

seq_length = 10
batch_size = 1
T = 500

train_x, train_y = data_generator_1(T, seq_length)

input_data = train_x.unsqueeze(0)
target_data = train_y.unsqueeze(0)


In [None]:
torch.randn(seq_length, batch_size, input_size).size()

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
model = GRUModel(input_size, hidden_size, output_size)
if torch.cuda.is_available():
  model.cuda()
  input_data = input_data.cuda()
  target_data = target_data.cuda()

# 손실 함수와 옵티마이저
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 학습 반복
num_epochs = 100
losses = []
for epoch in range(num_epochs):
    model.train()
    hidden = model.init_hidden(batch_size)
    
    if torch.cuda.is_available():
      hidden = hidden.cuda()
    
    optimizer.zero_grad()

    # 순전파
    output, hidden = model(input_data, hidden)

    # 손실 계산
    loss = criterion(output.squeeze(), target_data[:,:,-10].squeeze())

    # 역전파 및 가중치 업데이트
    loss.backward()
    optimizer.step()
    losses.append(loss.item())

    # 로그 출력
    if (epoch == 0) or ((epoch+1) % 10 == 0):
        print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))

**StackRNN_Vanilla**

In [None]:
class StackRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(StackRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x, hidden):
        out, hidden = self.rnn(x, hidden)
        out = self.fc(out[:, -1, :])
        return out, hidden

    def init_hidden(self, batch_size):
        return torch.zeros(self.num_layers, batch_size, self.hidden_size)


In [None]:
# 데이터셋
input_size = 1020
hidden_size = 10
num_layers = 2
output_size = 10
num_classes = 10  # 클래스 수

seq_length = 10
batch_size = 1
T = 1000

train_x, train_y = data_generator_1(T, seq_length)

input_data = train_x.unsqueeze(0)
target_data = train_y.unsqueeze(0)

In [None]:
torch.randn(seq_length, batch_size, input_size).size()

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
model = StackRNN(input_size, hidden_size, num_layers, output_size)
if torch.cuda.is_available():
  model.cuda()
  input_data = input_data.cuda()
  target_data = target_data.cuda()

# 손실 함수와 옵티마이저
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 학습
num_epochs = 100
losses = []

for epoch in range(num_epochs):
    # 초기 은닉 상태 초기화
    hidden = model.init_hidden(input_data.size(0))
    if torch.cuda.is_available():
      hidden = hidden.cuda()

    # Forward 패스
    output, hidden = model(input_data, hidden)
    loss = criterion(output.squeeze(), target_data[:,:,-10].squeeze())


    # Backward 패스 및 최적화
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # 손실 기록
    losses.append(loss.item())
    # 로그 출력
    if (epoch == 0) or ((epoch+1) % 10 == 0):
       print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))

**StackRNN_GRU**

In [None]:
class StackGRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(StackGRUModel, self).__init__()

        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.stack_gru = nn.GRU(input_size, hidden_size, num_layers)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, input, hidden):
        output, hidden = self.stack_gru(input, hidden)
        output = self.fc(output)
        return output, hidden

    def init_hidden(self, batch_size):
        return torch.zeros(self.num_layers, batch_size, self.hidden_size)

In [None]:
# 데이터셋
input_size = 520
hidden_size = 10
num_layers = 2
output_size = 10
num_classes = 10  # 클래스 수

seq_length = 10
batch_size = 1
T = 500

train_x, train_y = data_generator_2(T, seq_length)

input_data = train_x.unsqueeze(0)
target_data = train_y.unsqueeze(0)

In [None]:
torch.randn(seq_length, batch_size, input_size).size()

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
model = StackGRUModel(input_size, hidden_size, num_layers, output_size)
if torch.cuda.is_available():
  model.cuda()
  input_data = input_data.cuda()
  target_data = target_data.cuda()

# 손실 함수와 옵티마이저
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 학습
num_epochs = 100
losses = []

for epoch in range(num_epochs):
    # 초기 은닉 상태 초기화
    hidden = model.init_hidden(input_data.size(0))
    if torch.cuda.is_available():
      hidden = hidden.cuda()

    # Forward 패스
    output, hidden = model(input_data, hidden)
    loss = criterion(output.squeeze(), target_data[:,:,-10].squeeze())


    # Backward 패스 및 최적화
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # 손실 기록
    losses.append(loss.item())
    # 로그 출력
    if (epoch == 0) or ((epoch+1) % 10 == 0):
       print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))