In [8]:
import torch 
import torch.nn as nn
import os
import pandas as pd
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import numpy as np

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [1]:
# Hyper-parameters
sequence_length = n_timesteps = 19
input_size = [64, 114]
hidden_size = [32, 64, 128, 256, 512]
num_layers = [1, 2]
num_classes = 4
batch_size = [32, 64, 128]
num_epochs = 10
learning_rate = [0.01, 0.001, 0.0001, 0.00001]
optimizer = ['sgd', 'rmsprop', 'adam']

In [4]:
n_samples = 764
n_features = 128

x = torch.randn(n_samples, n_timesteps, n_features)
y = torch.randint(0, 2, (n_samples,))

In [5]:
class CustomDataset(Dataset):
    def __init__(self, x, y):
        self.data = x
        self.label = y
        
    def __len__(self):
        return len(self.label)

    def __getitem__(self, idx):
        data = torch.tensor(self.data[idx], dtype=torch.float32)
        label = torch.tensor(self.label[idx], dtype=torch.int64)
        return data, label

In [6]:
x.shape

torch.Size([764, 19, 128])

In [7]:
y.shape

torch.Size([764])

In [20]:
#Cross-Validation
k = 10
idx = list(np.random.choice(n_samples, n_samples, replace=False))
fold_size = n_samples//k
dataset = dict()

for i in range(k):
    train = idx[:i*fold_size] + idx[(i+1)*fold_size:]
    test = idx[i*fold_size:(i+1)*fold_size]
    dataset[i] = (CustomDataset(x[train], y[train]), CustomDataset(x[test], y[test]))

In [23]:
#0번째 k-fold 의 trainset의 첫번째 데이터
dataset[0][0].data[0]

tensor([[-0.8934,  0.8516, -1.5742,  ...,  0.4017, -0.3243, -0.2149],
        [ 1.0225,  0.2800,  0.6403,  ..., -1.3013, -1.7293,  0.1123],
        [-1.5453,  0.1417,  1.4023,  ..., -0.9582, -0.9954,  0.5708],
        ...,
        [ 2.1423,  0.0292,  1.1826,  ..., -0.8704, -0.3447,  0.6466],
        [-0.1447,  0.4124, -1.4303,  ..., -0.8386, -0.0705, -0.2782],
        [ 0.2907, -0.1864, -0.5096,  ...,  1.1016, -0.1130,  0.5015]])

In [28]:
train_loader = DataLoader(trainset, batch_size=batch_size, shuffle=True, drop_last=True)
test_loader = DataLoader(testset, batch_size=batch_size, shuffle=False, drop_last=True)

In [59]:
'''
# Recurrent neural network (many-to-one)
# save last layer's activation values
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, batch_size, sequence_length):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.input_size = input_size
        self.batch_size = batch_size
        self.sequence_length = sequence_length
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=False)
        self.fc = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        # Set initial hidden and cell states 
        outputs = torch.autograd.Variable(torch.cuda.FloatTensor(self.sequence_length, self.batch_size, self.hidden_size))
        h = torch.zeros(self.num_layers, self.batch_size, self.hidden_size).to(device) 
        c = torch.zeros(self.num_layers, self.batch_size, self.hidden_size).to(device)
        
        for i in range(self.sequence_length):
        # Forward propagate LSTM
            curr_seq = x[i:i+1, :, :]
            out, (h, c) = self.lstm(curr_seq, (h, c))  # out: tensor of shape (batch_size, seq_length, hidden_size)
            outputs[i, :, :] = out
            # Decode the hidden state of the last time step
        out = self.fc(outputs[-1, :, :])
        return out, outputs
'''

In [76]:
# Recurrent neural network (many-to-one)
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, batch_size, sequence_length):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.input_size = input_size
        self.batch_size = batch_size
        self.sequence_length = sequence_length
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=False)
        self.fc = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        # Set initial hidden and cell states 
        outputs = torch.autograd.Variable(torch.cuda.FloatTensor(self.sequence_length, self.num_layers, self.batch_size, self.hidden_size))
        h = torch.zeros(self.num_layers, self.batch_size, self.hidden_size).to(device) 
        c = torch.zeros(self.num_layers, self.batch_size, self.hidden_size).to(device)
        
        for i in range(self.sequence_length):
        # Forward propagate LSTM
            curr_seq = x[i:i+1, :, :]
            out, (h, c) = self.lstm(curr_seq, (h, c))  # out: tensor of shape (batch_size, seq_length, hidden_size)
            outputs[i, :, :, :] = h
            # Decode the hidden state of the last time step
        out = self.fc(out[-1, :, :])
        return out, outputs
    
model = RNN(input_size, hidden_size, num_layers, num_classes, batch_size, sequence_length).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.reshape(sequence_length, batch_size, input_size).to(device)
        labels = labels.to(device)
        if epoch == 0 and i == 0:
            print("data type:", type(images), type(labels))
            print(images.shape, labels.shape)
            print("images:", images)
            print("labels:", labels)
        # Forward pass
        o, _ = model(images)
        loss = criterion(o, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 3 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))
params = dict()
for i, param in enumerate(model.parameters()):
    params['w'+str(i)] = param
    print(param, param.shape)
    
# Test the model
model.eval()
y_true = list()
y_pred = list()
lstm_h = list()
last_fc = list()

with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.reshape(sequence_length, batch_size, input_size).to(device)
        labels = labels.to(device)
        o, outputs = model(images)
        _, predicted = torch.max(o.data, 1)
        y_true.extend(labels)
        y_pred.extend(predicted)
        lstm_h.extend(outputs)
        last_fc.extend(o)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total)) 

# Save the model checkpoint
torch.save(model.state_dict(), 'model.ckpt')

bad_index = list()
for i, pred in enumerate(y_pred):
    if pred == 0:
        bad_index.append(i)
print(bad_index)

used_samples = len(testset) - (len(testset)%batch_size)


In [77]:
model = RNN(input_size, hidden_size, num_layers, num_classes, batch_size, sequence_length).to(device)

In [78]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [79]:
# Train the model
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.reshape(sequence_length, batch_size, input_size).to(device)
        labels = labels.to(device)
        if epoch == 0 and i == 0:
            print("data type:", type(images), type(labels))
            print(images.shape, labels.shape)
            print("images:", images)
            print("labels:", labels)
        # Forward pass
        o, _ = model(images)
        loss = criterion(o, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 3 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

  data = torch.tensor(self.data[idx], dtype=torch.float32)
  label = torch.tensor(self.label[idx], dtype=torch.int64)


data type: <class 'torch.Tensor'> <class 'torch.Tensor'>
torch.Size([18, 64, 128]) torch.Size([64])
images: tensor([[[-1.2745e+00, -1.2203e+00,  1.9554e+00,  ...,  1.5681e+00,
           1.1967e+00,  5.3070e-01],
         [ 1.4619e+00,  1.4328e+00,  1.0010e+00,  ..., -7.0722e-01,
           1.1809e+00, -1.4300e+00],
         [ 5.7767e-01, -4.0261e-01,  7.3021e-02,  ...,  1.0023e+00,
          -9.4646e-01, -1.0641e+00],
         ...,
         [ 8.7250e-01,  2.1396e-01, -6.0197e-01,  ..., -6.6695e-01,
          -1.3337e-01,  8.4785e-02],
         [ 1.0351e-01,  4.7204e-01, -1.3595e+00,  ..., -8.7757e-02,
           1.1758e-02,  5.1441e-01],
         [-8.0490e-01,  7.6909e-01,  1.0566e+00,  ..., -1.1384e-01,
          -1.4867e+00,  9.7428e-01]],

        [[-2.0316e-01,  1.5572e-01,  1.4202e+00,  ...,  1.6420e+00,
           6.6786e-01,  5.8187e-01],
         [ 1.0492e+00, -5.1203e-01,  1.8969e+00,  ..., -9.0130e-01,
           5.8423e-01,  3.0711e-01],
         [ 1.5174e+00, -1.3090e+00, 

In [80]:
params = dict()
for i, param in enumerate(model.parameters()):
    params['w'+str(i)] = param
    print(param, param.shape)

Parameter containing:
tensor([[ 0.1116,  0.1171,  0.0116,  ...,  0.0521,  0.0767, -0.0293],
        [ 0.0958,  0.1450,  0.1369,  ...,  0.0936,  0.2016,  0.0890],
        [ 0.1199,  0.0392,  0.0700,  ...,  0.0016,  0.1653,  0.0778],
        ...,
        [-0.0356, -0.0490,  0.1948,  ..., -0.0351, -0.0617,  0.0055],
        [ 0.0009,  0.1043, -0.0416,  ...,  0.0171,  0.0218,  0.0369],
        [-0.0486,  0.0445,  0.0326,  ...,  0.0463,  0.0468, -0.0044]],
       device='cuda:0', requires_grad=True) torch.Size([1024, 128])
Parameter containing:
tensor([[ 0.1035, -0.0120,  0.0789,  ..., -0.0713,  0.0720,  0.0265],
        [-0.0688,  0.2744,  0.2545,  ...,  0.0381,  0.0871,  0.0254],
        [ 0.0359, -0.0419,  0.0594,  ..., -0.0565,  0.0005,  0.0746],
        ...,
        [ 0.0407,  0.0618,  0.0810,  ...,  0.1242, -0.0864, -0.0119],
        [ 0.0846, -0.0222,  0.0177,  ..., -0.1169,  0.0031,  0.0121],
        [ 0.0998,  0.0637,  0.1303,  ..., -0.0862, -0.0238,  0.0387]],
       device='cuda:

In [81]:
# Test the model
model.eval()
y_true = list()
y_pred = list()
lstm_h = list()
last_fc = list()

with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.reshape(sequence_length, batch_size, input_size).to(device)
        labels = labels.to(device)
        o, outputs = model(images)
        _, predicted = torch.max(o.data, 1)
        y_true.extend(labels)
        y_pred.extend(predicted)
        lstm_h.extend(outputs)
        last_fc.extend(o)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total)) 

# Save the model checkpoint
torch.save(model.state_dict(), 'model.ckpt')

Test Accuracy of the model on the 10000 test images: 44.53125 %


  data = torch.tensor(self.data[idx], dtype=torch.float32)
  label = torch.tensor(self.label[idx], dtype=torch.int64)


In [31]:
y_true, y_pred

([tensor(0, device='cuda:0'),
  tensor(1, device='cuda:0'),
  tensor(0, device='cuda:0'),
  tensor(1, device='cuda:0'),
  tensor(1, device='cuda:0'),
  tensor(0, device='cuda:0'),
  tensor(0, device='cuda:0'),
  tensor(1, device='cuda:0'),
  tensor(1, device='cuda:0'),
  tensor(0, device='cuda:0'),
  tensor(0, device='cuda:0'),
  tensor(1, device='cuda:0'),
  tensor(0, device='cuda:0'),
  tensor(1, device='cuda:0'),
  tensor(0, device='cuda:0'),
  tensor(0, device='cuda:0'),
  tensor(1, device='cuda:0'),
  tensor(1, device='cuda:0'),
  tensor(1, device='cuda:0'),
  tensor(1, device='cuda:0'),
  tensor(0, device='cuda:0'),
  tensor(1, device='cuda:0'),
  tensor(0, device='cuda:0'),
  tensor(1, device='cuda:0'),
  tensor(0, device='cuda:0'),
  tensor(1, device='cuda:0'),
  tensor(0, device='cuda:0'),
  tensor(1, device='cuda:0'),
  tensor(1, device='cuda:0'),
  tensor(0, device='cuda:0'),
  tensor(0, device='cuda:0'),
  tensor(0, device='cuda:0'),
  tensor(0, device='cuda:0'),
  tensor(1

In [43]:
y_pred.count(0)

10

In [44]:
bad_index = list()
for i, pred in enumerate(y_pred):
    if pred == 0:
        bad_index.append(i)
print(bad_index)

In [86]:
used_samples = len(testset) - (len(testset)%batch_size)

In [82]:
# len(testset)//batch_size = 2 
# sequence_length = 18
# len(lstm_h) = 2*18
print(len(lstm_h))
print(lstm_h[0].shape) #num_layers, batch_size, hidden_size

36

In [110]:
# n_samples, n_layers, sequence_length, hidden_size

reshaped = torch.zeros(used_samples, num_layers, sequence_length, hidden_size)
n_loops = used_samples//batch_size

for i in range(n_loops):
    temp = torch.zeros(sequence_length, num_layers, batch_size, hidden_size)
    for j in range(sequence_length):
        temp[j] = lstm_h[j]
    #batch_size, num_layers, sequence_length, hidden_size 순서로 변경
    temp = temp.permute(2, 1, 0, 3)
    reshaped[i*batch_size:(i+1)*batch_size] = temp
print(reshaped.shape)

tensor([[[[-7.5187e-02,  2.2605e-02,  2.4008e-02,  ...,  2.1915e-01,
           -8.5587e-02,  2.4571e-01],
          [-2.6804e-01,  1.0029e-01,  6.6043e-02,  ...,  1.4403e-01,
            3.4353e-02,  1.8264e-02],
          [-3.0316e-02,  3.8239e-01,  1.8399e-02,  ...,  1.1955e-01,
           -3.6300e-01, -6.6691e-02],
          ...,
          [ 1.6575e-02,  9.9467e-01, -3.4478e-02,  ...,  4.5354e-03,
            6.9323e-03, -1.8976e-03],
          [-1.6052e-02,  9.1037e-01, -1.3856e-02,  ...,  1.2897e-02,
           -5.7702e-02,  2.8490e-01],
          [ 1.1527e-02,  8.7045e-01, -4.5849e-03,  ...,  9.2885e-04,
           -2.1635e-01,  7.0458e-02]],

         [[-2.3767e-02,  1.5613e-01, -8.1646e-02,  ..., -1.3176e-01,
            1.9233e-02,  2.2309e-02],
          [-1.0631e-02,  3.9692e-03, -4.5396e-02,  ..., -9.1430e-02,
           -3.1552e-04,  7.1993e-03],
          [-7.2483e-03,  5.4823e-02, -1.5646e-02,  ..., -6.4968e-02,
           -7.5864e-03, -6.0242e-03],
          ...,
     

In [114]:
params.keys()

dict_keys(['w0', 'w1', 'w2', 'w3', 'w4', 'w5', 'w6', 'w7', 'w8', 'w9'])

In [115]:
print(params['w8'][0].shape, params['w9'].shape)

torch.Size([256])

In [117]:
print(reshaped[0][1][17].shape, params['w8'][0].shape)

(torch.Size([256]), torch.Size([256]))

In [119]:
#check whether last_h * w + b == last_fc
print(torch.matmul(reshaped[0][1][17].to(device), params['w8'][0]) + params['w9'][0])
print(last_fc[0])

tensor(-0.0148, device='cuda:0', grad_fn=<AddBackward0>)

In [126]:
#get index of argmax(without considering bias)
b1 = reshaped[0][1][17].to(device)*params['w8'][0]
max_b1 = torch.argmax(b1)
print(max_b1)

tensor(62, device='cuda:0')


In [141]:
#W_h for input gate
w_hh = params['w1'][:hidden_size, :]
w_hh.shape

torch.Size([256, 256])

In [131]:
#W_x for input gate
w_ih = params['w0'][:hidden_size, :]
w_ih.shape

torch.Size([256, 128])

In [143]:
# second layer....
b2 = reshaped[0][1][16].to(device)*w_hh[max_b1]
max_b2 = torch.argmax(b2)
print(max_b2)

tensor(215, device='cuda:0')


In [145]:
p18 = testset[0][0][17].to(device) * w_ih[max_b1]
max_p18 = torch.argmax(p18)
print(max_p18, p18[max_p18])

tensor(92, device='cuda:0') tensor(0.2815, device='cuda:0', grad_fn=<SelectBackward>)


  data = torch.tensor(self.data[idx], dtype=torch.float32)
  label = torch.tensor(self.label[idx], dtype=torch.int64)


In [146]:
p17 = testset[0][0][16].to(device) * w_ih[max_p18]
max_p17 = torch.argmax(p17)
print(max_p17, p17[max_p17])

tensor(79, device='cuda:0') tensor(0.2302, device='cuda:0', grad_fn=<SelectBackward>)


  data = torch.tensor(self.data[idx], dtype=torch.float32)
  label = torch.tensor(self.label[idx], dtype=torch.int64)


In [147]:
p16 = testset[0][0][15].to(device) * w_ih[max_p17]
max_p16 = torch.argmax(p16)
print(max_p16, p16[max_p16])

tensor(12, device='cuda:0') tensor(0.2424, device='cuda:0', grad_fn=<SelectBackward>)


  data = torch.tensor(self.data[idx], dtype=torch.float32)
  label = torch.tensor(self.label[idx], dtype=torch.int64)


In [150]:
p15 = testset[0][0][14].to(device) * w_ih[max_p16]
max_p15 = torch.argmax(p15)
print(max_p15, p15[max_p15])

tensor(82, device='cuda:0') tensor(0.2346, device='cuda:0', grad_fn=<SelectBackward>)


  data = torch.tensor(self.data[idx], dtype=torch.float32)
  label = torch.tensor(self.label[idx], dtype=torch.int64)


In [151]:
p14 = testset[0][0][13].to(device) * w_ih[max_p15]
max_p14 = torch.argmax(p14)
print(max_p14, p14[max_p14])

tensor(41, device='cuda:0') tensor(0.3145, device='cuda:0', grad_fn=<SelectBackward>)


  data = torch.tensor(self.data[idx], dtype=torch.float32)
  label = torch.tensor(self.label[idx], dtype=torch.int64)


In [152]:
p13 = testset[0][0][12].to(device) * w_ih[max_p14]
max_p13 = torch.argmax(p13)
print(max_p13, p13[max_p13])

tensor(72, device='cuda:0') tensor(0.2042, device='cuda:0', grad_fn=<SelectBackward>)


  data = torch.tensor(self.data[idx], dtype=torch.float32)
  label = torch.tensor(self.label[idx], dtype=torch.int64)


In [73]:
'''
print(testset[0])
data = testset[0][0].to(device)
data[0].shape
params['w0'].shape
data = data.reshape((128, 18))
lstm_x = torch.matmul(params['w0'], data) + params['w2'][:, None]
'''