In [12]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import os
from tqdm import tqdm

In [13]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [14]:
path_to_img = os.getcwd() + '/../../../../images'

In [15]:
input_size = 784
num_classes = 10
batch_size = 128

In [16]:
train_dataset = datasets.MNIST(root=path_to_img, train=True, download=True)
test_dataset = datasets.MNIST(root=path_to_img, train=False, download=True)

Before computing mean and std  
we need to change pixels range from 1..255 to 0..1  

That's because during transforms first step should be `ToTensor`  
which transform data to tensor and change the range of data to 0..1  
So when we apply `Normalize` in next step we have another range of data  
and can't use mean from data with range 1..255

In [17]:
mean = torch.mean(train_dataset.data.float()/255)
std = torch.std(train_dataset.data.float()/255)

print(f'mean = {mean:.4f}')
print(f'std  = {std:.4f}')

mean = 0.1307
std  = 0.3081


In [18]:
mnist_transforms = transforms.Compose(
    [  
        transforms.ToTensor(),
        transforms.Normalize(mean, std),
        transforms.Lambda(lambda x: x.view(-1)),
    ]
)

In [19]:
train_dataset.transform = mnist_transforms
test_dataset.transform = mnist_transforms

In [20]:
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)

Transforms applying only when you accessing data form DataLoader.  
Data in datset is non changed.  
We can see it if we run the same code for computing mean and std of the dataset

In [21]:
mean = torch.mean(train_dataset.data.float()/255)
std = torch.std(train_dataset.data.float()/255)

print(f'mean = {mean:.4f}')
print(f'std  = {std:.4f}')

mean = 0.1307
std  = 0.3081


Data in batches is transformed and has normalized mean and std

In [22]:
# Get one batch
data_iter = iter(train_loader)
batch = next(data_iter)
data, targets = batch
print(f'mean = {data.mean():.4f}')
print(f'std  = {data.std():.4f}')

mean = 0.0119
std  = 1.0103


In [23]:
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()
    
    with torch.no_grad():
        for data, targets in loader:
            #move to cuda
            data = data.to(device)
            targets = targets.to(device)
            
            outputs = model(data)
            _, predictions = outputs.max(1)
            num_correct += (predictions == targets).sum()
            num_samples += predictions.size(0)

    return num_correct / num_samples

In [24]:
class NN(nn.Module):
    def __init__(self, input_size, num_classes):
        super(NN, self).__init__()
        self.fc1 = nn.Linear(input_size, 50)
        self.fc2 = nn.Linear(50, num_classes)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [25]:
learning_rate = 1e-3
num_epochs = 3

model = NN(input_size=input_size, num_classes=num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [26]:
for epoch in range(num_epochs):
    model.train()
    running_train_loss = 0.0
    correct_train = 0
    total_train = 0
    
    for batch_idx, (data, targets) in enumerate(tqdm(train_loader, ncols=80, desc=f'epoch: {epoch}')):
        #move to cuda
        data = data.to(device)
        targets = targets.to(device)
        
        #make predictions
        outputs = model(data)
        
        #calculating loss
        loss = criterion(outputs, targets)
        
        #calculate gradient
        optimizer.zero_grad()
        loss.backward()
        
        #change parameters with gradient * learning_rate
        optimizer.step()
        
        #save loss
        running_train_loss += loss
        
        #calculate and save accuracy
        _, predicted = outputs.max(1)
        total_train += predicted.size(0)
        correct_train += (predicted == targets).sum()
    
    #accumulated loss / number of batches
    train_loss = running_train_loss / len(train_loader)
    
    #number of correct predictions / number of images
    train_acc = correct_train / total_train
    
    # Validation phase
    model.eval()
    running_val_loss = 0.0
    correct_val = 0
    total_val = 0
    with torch.no_grad():
        for data, targets in test_loader:
            #move to cuda
            data = data.to(device)
            targets = targets.to(device)
            
            #make predictions
            outputs = model(data)
            
            #calculating loss
            loss = criterion(outputs, targets)
            
            #save loss
            running_val_loss += loss

            #calculate and save accuracy
            _, predicted = outputs.max(1)
            total_val += predicted.size(0)
            correct_val += (predicted == targets).sum()

    #accumulated loss / number of batches
    val_loss = running_val_loss / len(test_loader)
    
    #number of correct predictions / number of images
    val_acc = correct_val / total_val
        
    print(f't_loss = {train_loss:.4f}, v_loss: {val_loss:.4f}, t_acc = {train_acc:.4f}, v_acc = {val_acc:.4f}')

# checking final accuracy
train_acc = check_accuracy(train_loader, model).item()
test_acc = check_accuracy(test_loader, model).item()
print(f'\nfin_train_acc: {train_acc:.4f}, fin_test_acc: {test_acc:.4f}')

epoch: 0: 100%|███████████████████████████████| 469/469 [00:10<00:00, 45.37it/s]


t_loss = 0.3682, v_loss: 0.2176, t_acc = 0.8949, v_acc = 0.9358


epoch: 1: 100%|███████████████████████████████| 469/469 [00:11<00:00, 41.48it/s]


t_loss = 0.1929, v_loss: 0.1581, t_acc = 0.9444, v_acc = 0.9534


epoch: 2: 100%|███████████████████████████████| 469/469 [00:11<00:00, 39.43it/s]


t_loss = 0.1423, v_loss: 0.1404, t_acc = 0.9586, v_acc = 0.9587

fin_train_acc: 0.9641, fin_test_acc: 0.9587


```
 5 epochs -lin 50                         - 96.09  
 5 epochs -lin 100                        - 97.06  
 5 epochs -lin 50 + lin50                 - 96.17  
 5 epochs -lin 100 + lin100               - 97.10  
 5 epochs -lin 100 + lin100 + lin100      - 97.41  
 5 epochs -lin 150 + lin150 + lin150      - 97.60  
 5 epochs -lin 250 + lin250 + lin250      - 97.81  
 5 epochs -lin 250 + lin50 + lin250       - 97.60  
 5 epochs -lin 250 + lin40 + lin250       - 97.34  
 5 epochs -lin 250 + lin500 + lin250      - 97.38  
 5 epochs -lin 784 + lin784 + lin784      - 97.62  
 5 epochs -lin 1784 + lin1784 + lin1784   - 98.14  
25 epochs -lin 50                         - 97.23  
```