In [11]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, transforms
from torchvision import models

train_set = datasets.MNIST(root = './data',
                           train = True,
                           download = True,
                           transform = transforms.ToTensor())

test_set = datasets.MNIST(root = './data',
                          train = False,
                          download = True,
                          transform = transforms.ToTensor())

class MLP(nn.Module) :
    def __init__(self):
        super(MLP, self).__init__()
        self.Net = nn.Sequential(
            nn.Linear(784, 512),
            nn.ReLU(),

            nn.Linear(512, 128),
            nn.ReLU(),

            nn.Linear(128, 10), 
        )
    def forward(self, input):
        input = input.view(-1, 28 * 28)
        return F.softmax(self.Net(input))

def weights_init(m) :
    if isinstance(m, (nn.Linear, nn.Conv2d)) :
        nn.init.uniform_(m.weight)
        nn.init.constant_(m.bias, 0.0)

Batch_size = 64
Max_epoch = 3
learning_rate = 0.01
device = torch.device('cuda' if torch.cuda.is_available() else "cpu")
model = MLP().to(device)
model.apply(weights_init)

train_loader = DataLoader(train_set, batch_size = Batch_size, shuffle = True, num_workers = 0)
test_loader = DataLoader(test_set, batch_size = Batch_size, shuffle = False, num_workers = 0)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), momentum = 0.9, lr = learning_rate)

train_losses = []
train_acces = []

for epoch in range(Max_epoch) :
    model.train()
    train_loss = 0.0
    train_acc = 0.0
    for input, label in train_loader :
        input, label = input.to(device), label.to(device)

        optimizer.zero_grad()
        outputs = model(input)
#         print(torch.sum(outputs) / outputs.shape[0])

        loss = criterion(outputs, label)
        train_loss += loss.item()
        loss.backward()

        _, pred = torch.max(outputs, dim = 1)
        num_correct = (pred == label).sum().item()
        acc = num_correct / input.size(0)
        train_acc += acc

        optimizer.step()
    with torch.no_grad():
        model.eval()
        total_loss = 0.0
        test_acc = 0.0
        for input, label in test_loader:
            input, label = input.to(device), label.to(device)

            outputs = model(input)
            loss = criterion(outputs, label)
            total_loss += loss.item()

            _, pred = torch.max(outputs, dim=1)
            num_correct = (pred == label).sum().item()
            acc = num_correct / input.size(0)
            test_acc += acc

    train_losses.append(train_loss / len(train_loader))
    train_acces.append(train_acc / len(train_loader))

    print(f'epoch : {epoch + 1}, train_loss : {train_loss / len(train_loader)}, '
          f'train_acc : {train_acc / len(train_loader)}, '
          f'test_loss : {total_loss / len(test_loader)}, '
          f'test_acc : {test_acc / len(test_loader)}')


  return F.softmax(self.Net(input))


epoch : 1, train_loss : 2.3625201543511105, train_acc : 0.0986307302771855, test_loss : 2.3652113926638463, test_acc : 0.09593949044585988
epoch : 2, train_loss : 2.3624535232210464, train_acc : 0.09869736140724947, test_loss : 2.3652113926638463, test_acc : 0.09593949044585988
epoch : 3, train_loss : 2.3625201543511105, train_acc : 0.0986307302771855, test_loss : 2.3652113926638463, test_acc : 0.09593949044585988


In [8]:
import torch.nn as nn
import torch

w = torch.empty(3, 4)
print(nn.init.uniform_(w))

tensor([[0.3769, 0.7266, 0.6105, 0.0085],
        [0.3868, 0.7112, 0.6474, 0.3288],
        [0.7520, 0.4217, 0.2518, 0.2033]])
