In [0]:
import numpy as np
import torch
import torch.optim as optim

from torch import nn
from torchvision import datasets, transforms

In [3]:
torch.cuda.is_available()

True

In [4]:
in_feat = np.arange(1*1*28*28).reshape(1,1,28,28)
in_feat = torch.FloatTensor(in_feat)
print(in_feat.size())

torch.Size([1, 1, 28, 28])


In [5]:
conv_layer = nn.Conv2d(1,3,3)
out_feat = conv_layer(in_feat)
print(out_feat.size())

torch.Size([1, 3, 26, 26])


In [7]:
conv_layer = nn.Conv2d(in_channels=1, out_channels=3, kernel_size=[3,2], stride=[1,2])
out_feat = conv_layer(in_feat)
print(out_feat.size())

torch.Size([1, 3, 26, 14])


In [9]:
pool_layer = nn.MaxPool2d(kernel_size=[2,2], stride=[2,2])
pool_out = pool_layer(out_feat)
print(pool_out.size())

torch.Size([1, 3, 13, 7])


In [11]:
print(nn.functional.relu(out_feat))

tensor([[[[5.9096e-01, 2.1677e-01, 0.0000e+00,  ..., 0.0000e+00,
           0.0000e+00, 0.0000e+00],
          [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,
           0.0000e+00, 0.0000e+00],
          [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,
           0.0000e+00, 0.0000e+00],
          ...,
          [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,
           0.0000e+00, 0.0000e+00],
          [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,
           0.0000e+00, 0.0000e+00],
          [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,
           0.0000e+00, 0.0000e+00]],

         [[2.0435e+01, 2.2859e+01, 2.5282e+01,  ..., 4.7092e+01,
           4.9516e+01, 5.1939e+01],
          [5.4362e+01, 5.6786e+01, 5.9209e+01,  ..., 8.1019e+01,
           8.3443e+01, 8.5866e+01],
          [8.8289e+01, 9.0713e+01, 9.3136e+01,  ..., 1.1495e+02,
           1.1737e+02, 1.1979e+02],
          ...,
          [8.0076e+02, 8.0318e+02, 8.0560e+02,  ..., 8.2741

In [0]:
class Net(nn.Module):
  
  def __init__(self, out_feats):
    super(Net, self).__init__()
    
    conv_1 = [
        nn.Conv2d(1, 8, kernel_size=[3, 3], stride=[1, 1]),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=[2, 2], stride=[2, 2])
    ]
    
    conv_2 = [
        nn.Conv2d(8, 16, kernel_size=[4, 4], stride=[1, 1]),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=[2, 2], stride=[2, 2])
    ]
    
    conv_3 = [
        nn.Conv2d(16, 32, kernel_size=[3, 3], stride=[1, 1]),
        nn.ReLU()
    ]
    
    self.conv_layers = nn.Sequential(*conv_1, *conv_2, *conv_3)
    
    in_feats = 32*3*3
    
    layer_1 = [
        nn.Linear(in_feats, in_feats*2),
        nn.Sigmoid()
    ]
    
    layer_2 = [
        nn.Linear(in_feats*2, in_feats),
        nn.Sigmoid()
    ]
    
    layer_3 = [
        nn.Linear(in_feats, out_feats),
        nn.Softmax()
    ]
    
    self.classifier = nn.Sequential(*layer_1, *layer_2, *layer_3)
    
  def forward(self, x):
    out = self.conv_layers(x)
    
    batch_size = x.size(0)
    out = out.view(batch_size, -1)
    
    out = self.classifier(out)
    return out

In [0]:
x = np.arange(4*3).reshape(3,4)
x = torch.Tensor(x)
net = MLP(4, 1)
net(x)

  input = module(input)


tensor([[1.],
        [1.],
        [1.]], grad_fn=<SoftmaxBackward>)

In [0]:
BATCH_SIZE = 32
EPOCH = 80
LR = 5E-3
MOMENT = 0.9
EVAL_STEP = 2

In [32]:
trans = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.0,), (1.0,))
])

train_set = datasets.MNIST(
    root='', train=True, 
    transform=trans, download=True
)

test_set = datasets.MNIST(
    root='', train=False, 
    transform=trans, download=True
)

train_loader = torch.utils.data.DataLoader(
    dataset=train_set, batch_size=BATCH_SIZE,
    shuffle=True
)

test_loader = torch.utils.data.DataLoader(
    dataset=test_set, batch_size=BATCH_SIZE,
    shuffle=False
)

print(len(train_loader), len(test_loader))

1875 313


In [0]:
net = Net(10)
net = net.cuda()

optimizer = optim.SGD(net.parameters(), lr=LR, momentum=MOMENT)
criterion = nn.CrossEntropyLoss()

In [0]:
def train(net, criterion, optim, loader, epoch):
  net.train()
  total_loss = 0
  for data, label in loader:
    batch_size = data.size(0)
    
    optim.zero_grad()
    data, label = data.cuda(), label.cuda()
    #data = data.view(batch_size, -1)
    out = net(data)
    loss = criterion(out, label)
    
    total_loss += loss.item()
    
    loss.backward()
    optim.step()
    
  print("[%03d/%03d] avg loss: %0.4f" %(epoch, EPOCH, total_loss/len(loader)))
  
def test(net, loader):
  net.eval()
  pred = []
  target = []
  for data, label in loader:
    batch_size = data.size(0)
    #data = data.view(batch_size, -1)
    data = data.cuda()
    
    pred_batch = net(data)
    pred_batch = pred_batch.to('cpu').detach().numpy()
    pred_batch = np.argmax(pred_batch, 1)
    
    pred.append(pred_batch)
    target.append(label.numpy())
    
    pred = np.concatenate(pred)
    target = np.concatenate(target)
    acc = (np.sum(pred == target) / len(pred))
    
    return pred, target, acc
    

In [35]:
prediction = None
target = None
for epoch in range(1, EPOCH+1):
  train(net, criterion, optimizer, train_loader, epoch)
  if epoch % EVAL_STEP == 0:
    with torch.no_grad():
      prediction, target, acc = test(net, test_loader)
      print(acc)

  input = module(input)


[001/080] avg loss: 2.3017
[002/080] avg loss: 2.3015
0.15625
[003/080] avg loss: 2.3014
[004/080] avg loss: 2.3015
0.15625
[005/080] avg loss: 2.3014
[006/080] avg loss: 2.3014
0.15625
[007/080] avg loss: 2.3014
[008/080] avg loss: 2.3012
0.15625
[009/080] avg loss: 2.3001
[010/080] avg loss: 2.2728
0.3125
[011/080] avg loss: 2.1133
[012/080] avg loss: 1.8398
0.75
[013/080] avg loss: 1.7645
[014/080] avg loss: 1.7076
0.75
[015/080] avg loss: 1.6906
[016/080] avg loss: 1.6393
0.90625
[017/080] avg loss: 1.6029
[018/080] avg loss: 1.5929
0.90625
[019/080] avg loss: 1.5865
[020/080] avg loss: 1.5818
0.90625
[021/080] avg loss: 1.5783
[022/080] avg loss: 1.5757
0.90625
[023/080] avg loss: 1.5733
[024/080] avg loss: 1.5711
0.90625
[025/080] avg loss: 1.5695
[026/080] avg loss: 1.5682
0.90625
[027/080] avg loss: 1.5666
[028/080] avg loss: 1.5654
0.90625
[029/080] avg loss: 1.5008
[030/080] avg loss: 1.4902
1.0
[031/080] avg loss: 1.4867
[032/080] avg loss: 1.4840
1.0
[033/080] avg loss: 1.4