In [17]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
print(f'pytorch verison : [{torch.__version__}]')
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(f'device : [{device}]')

pytorch verison : [1.7.1]
device : [cuda:0]


- Dataset

In [9]:
from torchvision import datasets, transforms

train_dataset = datasets.MNIST(root='./data',train=True,transform=transforms.ToTensor(),download=True)
test_dataset = datasets.MNIST(root='./data',train=False,transform=transforms.ToTensor(),download=True)


- Dataloader

In [11]:
BATCH_SIZE = 256
train_iter = DataLoader(train_dataset,batch_size=BATCH_SIZE,shuffle=True,num_workers=1)
test_iter = DataLoader(test_dataset,batch_size=BATCH_SIZE,shuffle=True,num_workers=1)

- Model

In [14]:
class MLP(nn.Module):
    def __init__(self,x_dim,h_dim,y_dim):
        super(MLP,self).__init__()
        self.x_dim = x_dim
        self.h_dim = h_dim
        self.y_dim = y_dim
        self.lin_1 = nn.Linear(x_dim,h_dim)
        self.lin_2 = nn.Linear(h_dim,y_dim)
        self.init_param()
        
    def init_param(self):
        nn.init.kaiming_normal_(self.lin_1.weight)
        nn.init.zeros_(self.lin_1.bias)
        nn.init.kaiming_normal_(self.lin_2.weight)
        nn.init.zeros_(self.lin_2.bias)
    
    def forward(self,input):
        x = input
        x = self.lin_2(F.relu(self.lin_1(x)))
        return x

In [18]:
model = MLP(x_dim = 784, h_dim = 256, y_dim = 10).to(device)
loss = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr=1e-3)

- Model check

In [22]:
x_numpy = np.random.rand(10,28*28)
x_torch = torch.from_numpy(x_numpy).float().to(device)
y_torch = model.forward(x_torch)
y_numpy = y_torch.detach().cpu().numpy()
print(x_numpy[:5])
print(x_torch[:5])
print(y_torch[:5])
print(y_numpy[:5])

[[8.44767374e-01 9.05907372e-01 4.65312950e-02 ... 7.31807971e-01
  2.30314824e-01 1.33678027e-01]
 [4.56305096e-02 3.29474065e-02 3.22641576e-01 ... 8.46130974e-01
  8.45394961e-01 8.25833215e-01]
 [7.57347473e-01 5.81760063e-01 6.53017126e-01 ... 6.58806099e-01
  3.64242288e-01 1.75647079e-01]
 [5.12465196e-01 3.89134135e-01 8.72091231e-01 ... 6.96818152e-04
  4.55809791e-01 8.41174974e-01]
 [5.53421054e-01 2.97467860e-01 2.42213845e-01 ... 5.11835320e-01
  4.82667310e-01 3.28874053e-01]]
tensor([[8.4477e-01, 9.0591e-01, 4.6531e-02,  ..., 7.3181e-01, 2.3031e-01,
         1.3368e-01],
        [4.5631e-02, 3.2947e-02, 3.2264e-01,  ..., 8.4613e-01, 8.4539e-01,
         8.2583e-01],
        [7.5735e-01, 5.8176e-01, 6.5302e-01,  ..., 6.5881e-01, 3.6424e-01,
         1.7565e-01],
        [5.1247e-01, 3.8913e-01, 8.7209e-01,  ..., 6.9682e-04, 4.5581e-01,
         8.4117e-01],
        [5.5342e-01, 2.9747e-01, 2.4221e-01,  ..., 5.1184e-01, 4.8267e-01,
         3.2887e-01]], device='cuda:0')
t

- Parameters check

In [30]:
total_param = 0
for i, (name,param) in enumerate(model.named_parameters()):
    param_numpy = param.detach().cpu().numpy()
    total_param += param_numpy.size
    print(f'[{i}], name : [{name}], parameters : [{param.reshape(-1)[:2]}]')
print('parameters ',total_param)

[0], name : [lin_1.weight], parameters : [tensor([ 0.0182, -0.0592], device='cuda:0', grad_fn=<SliceBackward>)]
[1], name : [lin_1.bias], parameters : [tensor([0., 0.], device='cuda:0', grad_fn=<SliceBackward>)]
[2], name : [lin_2.weight], parameters : [tensor([ 0.0118, -0.1665], device='cuda:0', grad_fn=<SliceBackward>)]
[3], name : [lin_2.bias], parameters : [tensor([0., 0.], device='cuda:0', grad_fn=<SliceBackward>)]
parameters  203530
