In [7]:
import torchvision
import torch

In [8]:
from torchvision import transforms
from torchvision.transforms import ToTensor, Normalize

transform = transforms.Compose([
    ToTensor(),
    Normalize((0.1307,), (0.3081,))
])


In [9]:
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
train_dataset = torchvision.datasets.MNIST(
    root='./data/train',
    train=True,
    transform=transform,
    download=True
)

test_dataset = torchvision.datasets.MNIST(
    root='./data/test',
    train=False,
    transform=transform,
    download=True
)




In [10]:
BATCH_SIZE = 64
train_loader = torch.utils.data.DataLoader(
    dataset=train_dataset,
    shuffle=True,
    batch_size= BATCH_SIZE,
    num_workers=2
)
test_loader = torch.utils.data.DataLoader(
    dataset=test_dataset,
    shuffle=False,
    batch_size=BATCH_SIZE,
    num_workers=2
)

In [11]:
import torch.nn as nn
class FeedForward(nn.Module):
    def __init__(self, input_dim: int, output_dim:int, hidden_dim:int):
        super().__init__()
        self.fc1 = nn.Linear(in_features=input_dim, out_features=hidden_dim)
        self.fc2 = nn.Linear(in_features=hidden_dim, out_features=output_dim)
        self.activation = nn.ReLU()

    def forward(self, x: torch.Tensor):
        x = x.reshape(x.shape[0], -1)
        out = self.activation(self.fc1(x))
        return self.fc2(out)


In [12]:
x = torch.rand(1, 28*28).to('cpu')
net = FeedForward(input_dim=784, hidden_dim=128, output_dim=10)
net.to('cpu')
net(x)

tensor([[-0.2266, -0.1055,  0.2336, -0.0007,  0.0336,  0.0614,  0.0688, -0.1144,
         -0.0265, -0.1826]], grad_fn=<AddmmBackward0>)

In [13]:
def get_acc(predicts: torch.Tensor, labels: torch.Tensor):
    _,outputs = predicts.max(dim=1)
    num_correct = (outputs == labels).sum()
    num_total = labels.shape[0]
    return num_correct/num_total

In [14]:
from tqdm import tqdm
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(lr=0.001, params=net.parameters())
EPOCHS = 100
epochs_tqdm = tqdm(range(EPOCHS), unit="batch", desc="Training")
for epoch in epochs_tqdm:
    for i, (images, labels) in enumerate(train_loader):

        images = images.to('cpu')
        labels = labels.to('cpu')

        predicts = net(images)
        loss = criterion(predicts, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    acc = 0
    for images, labels in test_loader:
        images = images.to('cpu')
        labels = labels.to('cpu')
        predicts = net(images)
        acc += get_acc(predicts, labels)
    acc = acc/len(test_loader)
    epochs_tqdm.set_postfix(acc=acc)


Training: 100%|██████████| 100/100 [04:39<00:00,  2.79s/batch, acc=tensor(0.9772)]


In [15]:
x = torch.rand(64,10)
a,b = x.max(1)

In [16]:
b.shape

torch.Size([64])