In [16]:
import torch
import torch.nn as nn
import numpy as np 
import torch.nn.functional as F 

import visdom
vis = visdom.Visdom()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

Setting up a new session...


device(type='cuda')

In [17]:
class rnn_model(nn.Module):
    def __init__(self):
        super(rnn_model, self).__init__()
        self.num_layers = 2
        self.hidden_size = 100
        self.rnn1 = nn.RNN(28, self.hidden_size, self.num_layers, batch_first=True)
        self.linear = nn.Linear(self.hidden_size, 10)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        x, hn = self.rnn1(x, h0)
        x = self.linear(x[:, -1, :])
        return x


class lstm_model(nn.Module):
    def __init__(self):
        super(lstm_model, self).__init__()
        self.num_layers = 2
        self.hidden_size = 100
        self.lstm1 = nn.LSTM(28, self.hidden_size, self.num_layers, batch_first=True)
        self.linear = nn.Linear(self.hidden_size, 10)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        x, (hn,cn) = self.lstm1(x, (h0, c0))
        x = self.linear(x[:, -1, :])
        return x


class gru_model(nn.Module):
    def __init__(self):
        super(gru_model, self).__init__()
        self.num_layers = 2
        self.hidden_size = 100
        self.gru1 = nn.GRU(28, self.hidden_size, self.num_layers, batch_first=True)
        self.linear = nn.Linear(self.hidden_size, 10)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        x, hn = self.gru1(x, h0)
        x = self.linear(x[:, -1, :])
        return x


models = [rnn_model(), lstm_model(), gru_model()]        

In [18]:
#--------------------------------------------- file read
x_train = np.load("data/mnist_train.npy")
x_test  = np.load("data/mnist_test.npy")
y_train = np.load("data/mnist_train_target.npy")
y_test  = np.load("data/mnist_test_target.npy")
#--------------------------------------------- numpy to tensor
x_train  = torch.from_numpy(x_train).float()       #long으로 하면 loss 계산할 때 에러
x_test   = torch.from_numpy(x_test).float()
y_train  = torch.from_numpy(y_train).long()        #float으로 하면 loss 계산할 때 에러  
y_test   = torch.from_numpy(y_test).long()

#--------------------------------------------- data to dataset
train_dataset = torch.utils.data.TensorDataset(x_train, y_train)
test_dataset  = torch.utils.data.TensorDataset(x_test,  y_test)

#--------------------------------------------- dataset to dataloader 
train_loader = torch.utils.data.DataLoader(train_dataset,
                                          batch_size=256,
                                          shuffle=True,
                                          num_workers=2)
                                    
test_loader = torch.utils.data.DataLoader(train_dataset,
                                          batch_size=256,
                                          shuffle=True,
                                          num_workers=2)                                 
                                    

In [19]:
model = models[0].to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

EPOCH = 1
for t in range(EPOCH): # EPOCH
    for i, (sample, target) in enumerate(train_loader): #BATCH
        sample = sample.to(device)
        target = target.to(device)
        y = model(sample)

        loss = criterion(y, target)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        vis.line(X=[i], Y=[loss.item()], win="loss",name="RNN" , 
                    update='append',opts=dict(showlegend=True))

In [20]:
model = models[1].to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

EPOCH = 1
for t in range(EPOCH): # EPOCH
    for i, (sample, target) in enumerate(train_loader): #BATCH
        sample = sample.to(device)
        target = target.to(device)
        y = model(sample)

        loss = criterion(y, target)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        vis.line(X=[i], Y=[loss.item()], win="loss",name="LSTM" , 
                    update='append',opts=dict(showlegend=True))

In [21]:
model = models[2].to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

EPOCH = 1
for t in range(EPOCH): # EPOCH
    for i, (sample, target) in enumerate(train_loader): #BATCH
        sample = sample.to(device)
        target = target.to(device)
        y = model(sample)

        loss = criterion(y, target)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        vis.line(X=[i], Y=[loss.item()], win="loss",name="GRU " , 
                    update='append',opts=dict(showlegend=True))

In [23]:
# Test
for i, model in enumerate(models):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad(): 
        for data in test_loader:
            images, labels = data
            images = images.to(device)
            labels = labels.to(device)
           
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f'Accuracy of the model {i} on the test images: {100 * correct / total}')

Accuracy of the model 0 on the test images: 44.13666666666666
Accuracy of the model 1 on the test images: 73.335
Accuracy of the model 2 on the test images: 94.13
