# Lab 09-2. Weight initialization

In [1]:
import torch
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

**MNIST_nn_Xavier**

In [2]:
# nn Layer
linear1 = torch.nn.Linear(784, 256, bias=True)
linear2 = torch.nn.Linear(256, 256, bias=True)
linear3 = torch.nn.Linear(256,  10, bias=True)
relu    = torch.nn.ReLU()

In [3]:
torch.nn.init.xavier_uniform_(linear1.weight)
torch.nn.init.xavier_uniform_(linear2.weight)
torch.nn.init.xavier_uniform_(linear3.weight)

Parameter containing:
tensor([[-0.0462,  0.0730,  0.0617,  ...,  0.1105, -0.0446,  0.1151],
        [ 0.0929, -0.1331, -0.0115,  ..., -0.0120,  0.0729, -0.1396],
        [-0.0822, -0.1142, -0.1460,  ...,  0.1074, -0.1021, -0.1249],
        ...,
        [-0.0674, -0.1261,  0.1211,  ...,  0.0228, -0.0988,  0.0741],
        [-0.0287, -0.1213,  0.0012,  ...,  0.0689,  0.0396, -0.0654],
        [ 0.0697, -0.0232,  0.0398,  ...,  0.0570, -0.0119, -0.1456]],
       requires_grad=True)

In [4]:
model = torch.nn.Sequential(linear1, relu, linear2, relu, linear3).to(device)

In [5]:
# params
learning_rate = 1e-3
training_epochs = 15
batch_size = 100

**Data Loader**

In [6]:
import torchvision.datasets as datasets
import torchvision.transforms as transforms

In [7]:
mnist_train = datasets.MNIST(root='MNIST_data/',
                             train=True,
                             transform=transforms.ToTensor(),
                             download=True)

mnist_test  = datasets.MNIST(root='MNIST_data/',
                             train=True,
                             transform=transforms.ToTensor(),
                             download=True)

In [8]:
data_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)

**Training**

In [9]:
criterion = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [10]:
total_batch = len(data_loader)

for epoch in range(training_epochs):
    
    avg_cost = 0

    for X, Y in data_loader:
        X = X.view(-1, 28*28).to(device)
        Y = Y.to(device)

        optimizer.zero_grad()
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()

        avg_cost += cost / total_batch

    print(f'Epoch: {epoch+1:4d}, Cost: {avg_cost:.9f}')

Epoch:    1, Cost: 0.242638513
Epoch:    2, Cost: 0.093776345
Epoch:    3, Cost: 0.061197829
Epoch:    4, Cost: 0.043724474
Epoch:    5, Cost: 0.031497348
Epoch:    6, Cost: 0.024512086
Epoch:    7, Cost: 0.023511698
Epoch:    8, Cost: 0.016384959
Epoch:    9, Cost: 0.016238673
Epoch:   10, Cost: 0.014869830
Epoch:   11, Cost: 0.013843667
Epoch:   12, Cost: 0.010825093
Epoch:   13, Cost: 0.009542700
Epoch:   14, Cost: 0.011163206
Epoch:   15, Cost: 0.010211847


**Test**

In [12]:
import warnings
warnings.filterwarnings(action='ignore')

In [13]:
with torch.no_grad():
    X_test = mnist_test.test_data.view(-1, 28*28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)

    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print(f"Accuracy: {accuracy.item()*100:.4f}%")

Accuracy: 99.8100%
