In [92]:
import numpy as np

import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader,random_split

In [93]:
#import data
data_dir = 'dataset'

train_dataset = torchvision.datasets.MNIST(data_dir, train=True, download=True)
test_dataset  = torchvision.datasets.MNIST(data_dir, train=False, download=True)

train_transform = transforms.Compose([transforms.ToTensor(),])

test_transform = transforms.Compose([transforms.ToTensor(),])

train_dataset.transform = train_transform
test_dataset.transform = test_transform

m=len(train_dataset)

train_data, val_data = random_split(train_dataset, [int(m-m*0.2), int(m*0.2)])
batch_size=256

train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size)
valid_loader = torch.utils.data.DataLoader(val_data, batch_size=batch_size)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size,shuffle=True)


In [94]:
#model
class Classifier(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.model = nn.Sequential(
            nn.Linear(28*28,64),
            nn.ReLU(True),
            nn.Linear(64,64),
            nn.ReLU(True),
            nn.Linear(64,10)
        )
        self.flatten = nn.Flatten(start_dim=1)
        
    def forward(self,x):
        x = self.flatten(x)
        x = self.model(x)
        return x

In [100]:
def train(classifier, device, data_loader, loss, optimizer):
    train_loss = []
    for images,labels in data_loader:
        images = images.to(device)
        labels = labels.to(device)
        #print(labels)
        get_one_hot(labels,10)
        #forward
        estimated = classifier(images)
        
        #compute cost function
        J = loss(estimated,labels)
        
        #get gradient: set accumulate to zero and calculate derivates dJ/dvar
        optimizer.zero_grad()
        J.backward()
        
        #step to the gradient direction
        optimizer.step()
        
        train_loss.append(J.item())
    return np.mean(train_loss)

def validation(classifier, device, data_loader, loss, optimizer):
    with torch.no_grad():
        conc_out = []
        conc_label = []
        val_loss = []
        for images,labels in data_loader:
            images = images.to(device)
            labels = labels.to(device)
            #forward
            predicted = classifier(images)

            #compute cost function
            #conc_out.append(estimated.cpu())
            #conc_label.append(labels.cpu())
            val_loss.append(loss(predicted.cpu(), labels.cpu()))
        # Create a single tensor with all the values in the lists
        #conc_out = torch.cat(conc_out)
        #conc_label = torch.cat(conc_label) 
        # Evaluate global loss
        #val_loss = loss(conc_out, conc_label)
    return val_loss

In [101]:
def plot_ae_outputs(classifier,n=10):
    targets = test_dataset.targets.numpy()
    #categorize images respective to number 0 to 9
    t_idx = {i:np.where(targets==i)[0][0] for i in range(n)}

    for i in range(n):
        #get the 0th image of number i
        img = test_dataset[t_idx[i]][0].unsqueeze(0).to(device)
        classifier.eval()

        with torch.no_grad():
            rec  = classifier(img)

        print('image number',i,'predicted number',np.argmax(rec.squeeze().numpy()))
        

In [102]:
classifier = Classifier()
optimizer = optim.SGD(classifier.parameters(),lr=1e-2)
loss = nn.CrossEntropyLoss()

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(f'Selected device: {device}')

# Move model to the selected device
classifier.to(device)

Selected device: cpu


Classifier(
  (model): Sequential(
    (0): Linear(in_features=784, out_features=64, bias=True)
    (1): ReLU(inplace=True)
    (2): Linear(in_features=64, out_features=64, bias=True)
    (3): ReLU(inplace=True)
    (4): Linear(in_features=64, out_features=10, bias=True)
  )
  (flatten): Flatten(start_dim=1, end_dim=-1)
)

In [104]:
num_epochs = 5
diz_loss = {'train_loss':[],'val_loss':[]}
for epoch in range(num_epochs):
    train_loss = train(classifier, device, train_loader, loss, optimizer)
    val_loss = validation(classifier, device, valid_loader, loss, optimizer)
    print('\n EPOCH {}/{} \t train loss {} \t val loss {}'.format(epoch + 1, num_epochs,train_loss,np.mean(val_loss)))
    diz_loss['train_loss'].append(train_loss)
    diz_loss['val_loss'].append(np.mean(val_loss))

plot_ae_outputs(classifier,n=10)
test_loss = validation(classifier, device, test_loader, loss, optimizer)


 EPOCH 1/5 	 train loss 2.1020133596785526 	 val loss 1.9492356777191162

 EPOCH 2/5 	 train loss 1.7054073931054865 	 val loss 1.4528179168701172

 EPOCH 3/5 	 train loss 1.2316213272353436 	 val loss 1.04625403881073

 EPOCH 4/5 	 train loss 0.9064906309259698 	 val loss 0.7994855642318726

 EPOCH 5/5 	 train loss 0.7169794291257858 	 val loss 0.6582539677619934
image number 0 predicted number 0
image number 1 predicted number 1
image number 2 predicted number 2
image number 3 predicted number 3
image number 4 predicted number 4
image number 5 predicted number 6
image number 6 predicted number 2
image number 7 predicted number 7
image number 8 predicted number 2
image number 9 predicted number 9


In [105]:
test_loss

[tensor(0.6887),
 tensor(0.6200),
 tensor(0.6378),
 tensor(0.5960),
 tensor(0.6082),
 tensor(0.6198),
 tensor(0.6464),
 tensor(0.6044),
 tensor(0.6688),
 tensor(0.5553),
 tensor(0.6259),
 tensor(0.6618),
 tensor(0.7062),
 tensor(0.7042),
 tensor(0.5917),
 tensor(0.5695),
 tensor(0.6109),
 tensor(0.6455),
 tensor(0.6360),
 tensor(0.6614),
 tensor(0.7087),
 tensor(0.5937),
 tensor(0.6788),
 tensor(0.6898),
 tensor(0.6210),
 tensor(0.6433),
 tensor(0.5811),
 tensor(0.6424),
 tensor(0.5802),
 tensor(0.5900),
 tensor(0.6486),
 tensor(0.6193),
 tensor(0.6434),
 tensor(0.6045),
 tensor(0.6588),
 tensor(0.5910),
 tensor(0.6008),
 tensor(0.6281),
 tensor(0.6724),
 tensor(0.5607)]