<a href="https://colab.research.google.com/github/jiyun1006/colab_deep/blob/main/Convolutional_Neural_Network.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
%matplotlib inline
%config InlineBackend.figure_format ="retina"
print("PyTorch version:{%s}"%(torch.__version__))
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print("device : {%s}"%(device))

In [None]:
from torchvision import datasets, transforms
mnist_train = datasets.MNIST(root='/data/', train=True, transform=transforms.ToTensor(), download=True)
mnist_test = datasets.MNIST(root='/data/', train=False, transform=transforms.ToTensor(), download=True)
print('Done')

In [None]:
BATCH_SIZE = 256
train_iter = torch.utils.data.DataLoader(mnist_train, batch_size = BATCH_SIZE, shuffle=True, num_workers=1)
test_iter = torch.utils.data.DataLoader(mnist_test, batch_size = BATCH_SIZE, shuffle=True, num_workers=1)
print('Done')

Define Model

In [None]:
class ConvolutionalNeuralNetworkClass(nn.Module):
  def __init__(self, name="cnn", xdim=[1,28,28], ksize=3, cdims=[32,64], hdims=[1024,128], ydim=10, USE_BATCHNORM=False):
    super(ConvolutionalNeuralNetworkClass, self).__init__()
    self.name = name
    self.xdim = xdim
    self.ksize = ksize
    self.cdims = cdims
    self.hdims = hdims
    self.ydim = ydim
    self.USE_BATCHNORM = USE_BATCHNORM

    # Convolutional layers
    self.layers =[]
    prev_cdim = self.xdim[0]
    for cdim in self.cdims:
      self.layers.append(
          nn.Conv2d(in_channels=prev_cdim,
                    out_channels=cdim,
                    kernel_size=self.ksize,
                    stride=(1,1),
                    padding=self.ksize//2)
      )
      if self.USE_BATCHNORM:
        self.layers.append(nn.BatchNorm2d(cdim))
      self.layers.append(nn.ReLU(True))
      self.layers.append(nn.MaxPool2d(kernel_size=(2,2),stride=(2,2)))
      self.layers.append(nn.Dropout2d(p=0.5))
      prev_cdim = cdim


    # Dense layers
    self.layers.append(nn.Flatten())
    prev_hdim = prev_cdim*(self.xdim[1]//(2**len(self.cdims)))*(self.xdim[2]//(2**len(self.cdims)))
    for hdim in self.hdims:
      self.layers.append(nn.Linear(prev_hdim, hdim, bias=True))
      self.layers.append(nn.ReLU(True))
      prev_hdim = hdim
    self.layers.append(nn.Linear(prev_hdim, self.ydim, bias=True))


    # Concatenate all layers

    self.net = nn.Sequential()
    for l_idx, layer in enumerate(self.layers):
      layer_name = "%s_%02d"%(type(layer).__name__.lower(), l_idx)
      self.net.add_module(layer_name, layer)
    self.init_param()

  def init_param(self):
    for m in self.modules():
      if isinstance(m, nn.Conv2d):
        nn.init.kaiming_normal_(m.weight)
        nn.init.zeros_(m.bias)
      elif isinstance(m, nn.BatchNorm2d):
        nn.init.constant_(m.weight,1)
        nn.init.constant_(m.bias, 0)
      elif isinstance(m, nn.Linear):
        nn.init.kaiming_normal_(m.weight)
        nn.init.zeros_(m.bias)
  
  def forward(self, x):
    return self.net(x)
  
C = ConvolutionalNeuralNetworkClass(
    name='cnn', xdim=[1,28,28], ksize=3, cdims=[32,64], hdims=[256,128], ydim=10).to(device)
loss = nn.CrossEntropyLoss()
optm = optim.Adam(C.parameters(), lr=1e-3)
print('Done')



check Parameters


In [None]:
np.set_printoptions(precision=3)
n_param = 0

for p_idx, (param_name, param) in enumerate(C.named_parameters()):
  if param.requires_grad:
    param_numpy = param.detach().cpu().numpy()
    n_param += len(param_numpy.reshape(-1))
    print("[%d] name: [%s] shape: [%s]"%(p_idx, param_name, param_numpy.shape))
    print("      val : %s"%(param_numpy.reshape(-1)[:5]))
print("Total number of parameters: [%s]"%(format(n_param,' ,d')))

Simple Forward 

In [None]:
np.set_printoptions(precision=3)
torch.set_printoptions(precision=3)
x_numpy = np.random.rand(2,1,28,28)
x_torch = torch.from_numpy(x_numpy).float().to(device)
y_torch = C.forward(x_torch)
y_numpy = y_torch.detach().cpu().numpy()
print("x_torch:\n", x_torch)
print("y_torch:\n", y_torch)
print("\nx_numpy %s : \n"%(x_numpy.shape,),x_numpy)
print("y_numpy %s :\n"%(y_numpy.shape,),y_numpy)

evaluation

In [None]:
def func_eval(model,data_iter,device):
    with torch.no_grad():
        model.eval() # evaluate (affects DropOut and BN)
        n_total,n_correct = 0,0
        for batch_in,batch_out in data_iter:
            y_trgt = batch_out.to(device)
            model_pred = model(batch_in.view(-1,1,28,28).to(device))
            _,y_pred = torch.max(model_pred.data,1)
            n_correct += (y_pred==y_trgt).sum().item()
            n_total += batch_in.size(0)
        val_accr = (n_correct/n_total)
        model.train() # back to train mode 
    return val_accr
print ("Done")

In [None]:
C.init_param() # initialize parameters
train_accr = func_eval(C,train_iter,device)
test_accr = func_eval(C,test_iter,device)
print ("train_accr:[%.3f] test_accr:[%.3f]."%(train_accr,test_accr))

train

In [None]:
print ("Start training.")
C.init_param() # initialize parameters
C.train()
EPOCHS,print_every = 10,1
for epoch in range(EPOCHS):  
    loss_val_sum = 0
    for batch_in,batch_out in train_iter:
        # Forward path
        y_pred = C.forward(batch_in.view(-1,1,28,28).to(device))
        loss_out = loss(y_pred,batch_out.to(device))
        # Update
        optm.zero_grad()      # reset gradient 
        loss_out.backward()      # backpropagate
        optm.step()      # optimizer update
        loss_val_sum += loss_out
    loss_val_avg = loss_val_sum/len(train_iter)
    # Print
    if ((epoch%print_every)==0) or (epoch==(EPOCHS-1)):
        train_accr = func_eval(C,train_iter,device)
        test_accr = func_eval(C,test_iter,device)
        print ("epoch:[%d] loss:[%.3f] train_accr:[%.3f] test_accr:[%.3f]."%
               (epoch,loss_val_avg,train_accr,test_accr))
print ("Done")        

test

In [None]:
n_sample = 25
sample_indices = np.random.choice(len(mnist_test.targets), n_sample, replace=False)
test_x = mnist_test.data[sample_indices]
test_y = mnist_test.targets[sample_indices]
with torch.no_grad():
    y_pred = C.forward(test_x.view(-1,1,28,28).type(torch.float).to(device))
y_pred = y_pred.argmax(axis=1)
plt.figure(figsize=(10,10))
for idx in range(n_sample):
    plt.subplot(5, 5, idx+1)
    plt.imshow(test_x[idx], cmap='gray')
    plt.axis('off')
    plt.title("Pred:%d, Label:%d"%(y_pred[idx],test_y[idx]))
plt.show()    
print ("Done")