In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
import sys
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
print(f'pytorch verison : [{torch.__version__}]')
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(f'device : [{device}]')

pytorch verison : [1.7.1]
device : [cuda:0]


- Dataset

In [2]:
from torchvision import datasets, transforms

train_dataset = datasets.MNIST(root='./data',train=True,transform=transforms.ToTensor(),download=True)
test_dataset = datasets.MNIST(root='./data',train=False,transform=transforms.ToTensor(),download=True)


- Dataloader

In [3]:
BATCH_SIZE = 256
train_iter = DataLoader(train_dataset,batch_size=BATCH_SIZE,shuffle=True,num_workers=1)
test_iter = DataLoader(test_dataset,batch_size=BATCH_SIZE,shuffle=True,num_workers=1)

- Model

In [4]:
class MLP(nn.Module):
    def __init__(self,x_dim,h_dim,y_dim):
        super(MLP,self).__init__()
        self.x_dim = x_dim
        self.h_dim = h_dim
        self.y_dim = y_dim
        self.lin_1 = nn.Linear(x_dim,h_dim)
        self.lin_2 = nn.Linear(h_dim,y_dim)
        self.init_param()
        
    def init_param(self):
        nn.init.kaiming_normal_(self.lin_1.weight)
        nn.init.zeros_(self.lin_1.bias)
        nn.init.kaiming_normal_(self.lin_2.weight)
        nn.init.zeros_(self.lin_2.bias)
    
    def forward(self,input):
        x = input
        x = self.lin_2(F.relu(self.lin_1(x)))
        return x

In [5]:
model = MLP(x_dim = 784, h_dim = 256, y_dim = 10).to(device)
loss = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr=1e-3)

- Model check

In [6]:
x_numpy = np.random.rand(10,28*28)
x_torch = torch.from_numpy(x_numpy).float().to(device)
y_torch = model.forward(x_torch)
y_numpy = y_torch.detach().cpu().numpy()
print(x_numpy[:5])
print(x_torch[:5])
print(y_torch[:5])
print(y_numpy[:5])

[[2.09822185e-01 2.29431938e-01 5.64319481e-01 ... 1.29506622e-01
  9.45328982e-01 6.81141090e-01]
 [2.76034812e-01 8.42832964e-01 9.42733239e-02 ... 6.24224651e-02
  6.55234586e-04 5.64705840e-01]
 [5.57022200e-01 7.87504554e-01 2.02977379e-01 ... 6.70308143e-01
  8.90050486e-01 8.96027140e-01]
 [1.99074471e-01 4.88631039e-01 3.20708759e-01 ... 3.36177864e-01
  2.63486204e-01 1.07261606e-01]
 [7.17011331e-01 4.99610141e-01 6.60495356e-01 ... 9.01675260e-01
  6.13409988e-01 7.61126289e-01]]
tensor([[2.0982e-01, 2.2943e-01, 5.6432e-01,  ..., 1.2951e-01, 9.4533e-01,
         6.8114e-01],
        [2.7603e-01, 8.4283e-01, 9.4273e-02,  ..., 6.2422e-02, 6.5523e-04,
         5.6471e-01],
        [5.5702e-01, 7.8750e-01, 2.0298e-01,  ..., 6.7031e-01, 8.9005e-01,
         8.9603e-01],
        [1.9907e-01, 4.8863e-01, 3.2071e-01,  ..., 3.3618e-01, 2.6349e-01,
         1.0726e-01],
        [7.1701e-01, 4.9961e-01, 6.6050e-01,  ..., 9.0168e-01, 6.1341e-01,
         7.6113e-01]], device='cuda:0')
t

- Parameters check

In [8]:
total_param = 0
for i, (name,param) in enumerate(model.named_parameters()):
    param_numpy = param.detach().cpu().numpy()
    total_param += param_numpy.size
    print(f'[{i}], name : [{name}], parameters : [{param.reshape(-1)[:2]}]')
print(f'parameters : [{total_param:,d}]')

[0], name : [lin_1.weight], parameters : [tensor([-0.0495,  0.0432], device='cuda:0', grad_fn=<SliceBackward>)]
[1], name : [lin_1.bias], parameters : [tensor([0., 0.], device='cuda:0', grad_fn=<SliceBackward>)]
[2], name : [lin_2.weight], parameters : [tensor([0.1496, 0.1271], device='cuda:0', grad_fn=<SliceBackward>)]
[3], name : [lin_2.bias], parameters : [tensor([0., 0.], device='cuda:0', grad_fn=<SliceBackward>)]
parameters : [203,530]


- test function