In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader 
from torchvision import datasets
from torchvision.transforms import ToTensor

In [25]:
BATCH_SIZE = 128
EPOCHS = 10
LEARNING_RATE = 0.001

class FeedForwardNet(nn.Module):

    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.dense_layers = nn.Sequential(
            nn.Linear(28*28, 256), # keras Dense
            nn.ReLU(),
            nn.Linear(256,10) # number of class at MNIST is 10\
        )
        self.softmax = nn.Softmax(dim=1)

    def forward(self, input_data):
        flattened_data = self.flatten(input_data)
        logits = self.dense_layers(flattened_data)
        predictions = self.softmax(logits)
        return predictions

In [3]:
# 1 - download dataset

def download_mnist_datasets():
    train_data = datasets.MNIST(
        root="data",   # root directory
        download=True,
        train=True,   # train set
        transform=ToTensor()    # What is ToTensor
    )
    validation_data = datasets.MNIST(
        root="data",   # root directory
        download=True,
        train=False,   # validation set
        transform=ToTensor()   
    )
    return train_data, validation_data

In [None]:
# download MNIST dataset

train_data, _ = download_mnist_datasets()
print('MNIST dataset downloaded')

In [None]:
train_data

In [12]:
def train_one_epoch(model, data_loader, loss_fn, optimiser, device):
    for inputs, targets in data_loader:
        inputs,targets = inputs.to(device), targets.to(device)

        # calculate loss
        predictions = model(inputs)
        loss = loss_fn(predictions,targets)

        # backpropagate loss and update weights
        optimiser.zero_grad() # make gradient zero
        loss.backward()
        optimiser.step() 

    print(f"Loss : {loss.item()}")

def train(model, data_loader, loss_fn, optimiser, device, epochs):
    for i in range(epochs):
        print(f"Epoch {i+1}")
        train_one_epoch(model, data_loader, loss_fn, optimiser, device)
        print("---------------------------")
    print("Training is done.")


In [26]:
# create a data loader for the train set
# this case, we don't use val data

train_data_loader = DataLoader(train_data, batch_size=BATCH_SIZE)

# build model
if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"

print(f"Using {device} device")

feed_forward_net = FeedForwardNet().to(device)

# instantiate loss function + optimiser
loss_fn = nn.CrossEntropyLoss()
optimiser = torch.optim.Adam(feed_forward_net.parameters(),
                             lr=LEARNING_RATE)

# train model

train(feed_forward_net, train_data_loader,loss_fn, optimiser, device, EPOCHS)

torch.save(feed_forward_net.state_dict(), "feedforwardnet.pth")

print("Model trained and stored at feedforwardnet.pth")

Using cpu device
Epoch 1
Loss : 1.5099091529846191
---------------------------
Epoch 2
Loss : 1.4991612434387207
---------------------------
Epoch 3
Loss : 1.496337890625
---------------------------
Epoch 4
Loss : 1.4842170476913452
---------------------------
Epoch 5
Loss : 1.4768468141555786
---------------------------
Epoch 6
Loss : 1.4741939306259155
---------------------------
Epoch 7
Loss : 1.473215103149414
---------------------------
Epoch 8
Loss : 1.4733935594558716
---------------------------
Epoch 9
Loss : 1.4727405309677124
---------------------------
Epoch 10
Loss : 1.4731807708740234
---------------------------
Training is done.
Model trained and stored at feedforwardnet.pth


In [None]:
# Model loading and prediction

import torch
# from train import FeedForwardNet, download_mnist_datasets 

In [29]:
class_mapping = [
                 "0",
                 "1",
                 "2",
                 "3",
                 "4",
                 "5",
                 "6",
                 "7",
                 "8",
                 "9"
]

def predict(model, input, target, class_mapping): #class_mapping이 꼭 필요한가?
    model.eval()
    #mode.train()
    with torch.no_grad():
        predictions = model(input)
        # Tesnor (1, 10) -> one sample, 10 classes
        predicted_index = predictions[0].argmax(0)
        predicted = class_mapping[predicted_index]
        expected = class_mapping[target]
    return predicted, expected

# load back the model

feed_forward_net = FeedForwardNet()
state_dict = torch.load("feedforwardnet.pth")
feed_forward_net.load_state_dict(state_dict)

# load MNIST validation dataset
_, validation_data = download_mnist_datasets()

# get a sample from the validation dataset for inference
input, target = validation_data[0][0], validation_data[0][1]

# make an inference
predicted, expected = predict(feed_forward_net, input, target, class_mapping)
print(f"Predicted: '{predicted}', expected : '{expected}'")

Predicted: '7', expected : '7'
