<a href="https://colab.research.google.com/github/arkalim/PyTorch/blob/master/Fashion_MNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as transforms
import time

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


## Define the transformations

In [2]:
# Define transforms for data preprocessing
transform = transforms.Compose([
    transforms.ToTensor()
])

trainset = torchvision.datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)

testset = torchvision.datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)

0it [00:00, ?it/s]

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz


26427392it [00:06, 3823445.16it/s]                              


Extracting ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz


0it [00:00, ?it/s]

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


32768it [00:00, 95432.05it/s]                            
0it [00:00, ?it/s]

Extracting ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


4423680it [00:01, 4139558.50it/s]                             
0it [00:00, ?it/s]

Extracting ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


8192it [00:00, 31945.07it/s]            

Extracting ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz
Processing...
Done!





## Create data loaders to shuffle and create batches

In [0]:
trainloader = torch.utils.data.DataLoader(trainset, batch_size=1000, shuffle=True, num_workers=2)

testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)

In [4]:
print('No. of train images: {}'.format(len(trainset)))
print('No. of test images: {}'.format(len(testset)))

print('No. of train batches: {}'.format(len(trainloader)))
print('No. of test batches: {}'.format(len(testloader)))

No. of train images: 60000
No. of test images: 10000
No. of train batches: 60
No. of test batches: 100


## Create the Network

In [5]:
class Network(nn.Module):
    
    def __init__(self):
        super(Network, self).__init__()
    
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)

        self.pool = nn.MaxPool2d(kernel_size = 2, stride = 2)

        self.fc1 = nn.Linear(12 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 60)
        self.fc3 = nn.Linear(60, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.pool(x)
        
        x = self.conv2(x)
        x = F.relu(x)
        x = self.pool(x)
        
        x = x.reshape(-1, 12 * 4 * 4)
        
        x = self.fc1(x)
        x = F.relu(x)
        
        x = self.fc2(x)
        x = F.relu(x)
        
        x = self.fc3(x)
        return x

# create an instance of the Network    
network = Network()

# move the network object to GPU
network.to(device)

Network(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 12, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=192, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=60, bias=True)
  (fc3): Linear(in_features=60, out_features=10, bias=True)
)

## Function to find the accuracy

In [0]:
def find_acc(pred, label):
    correct = pred.argmax(dim = 1).eq(label)
    accuracy = correct.to(torch.float32).mean().item()
    return accuracy

## Train the Network

In [7]:
num_epochs = 10

# loss defined using torch.nn
loss = nn.CrossEntropyLoss()

# define the optimizer
optimizer = optim.SGD(network.parameters(), lr=0.01, momentum=0.9)

# note the starting time to find the total time elapsed
start_time = time.time()

for epoch in range(1,num_epochs+1):
    
    loss_train = 0
    loss_valid = 0
    acc_train = 0
    acc_valid = 0
    
    # set the network into train mode
    network.train()
    
    for step in range(len(trainloader)):

        images , labels = next(iter(trainloader))
        
        # move the images and labels to GPU
        images = images.to(device)
        labels = labels.to(device)
        
        pred = network(images)
        
        # clear all the gradients before calculating them
        optimizer.zero_grad()
        
        # find the loss for the current step
        loss_train_step = loss(pred , labels)
        
        # find accuracy
        acc_train_step = find_acc(pred, labels)
        
        # calculate the gradients
        loss_train_step.backward()
        
        # update the parameters
        optimizer.step()
        
        loss_train += loss_train_step.item()

        acc_train += acc_train_step  
        
    network.eval()    
        
    for step in range(len(testloader)):

        images , labels = next(iter(testloader))
        
        # move the images and labels to GPU
        images = images.to(device)
        labels = labels.to(device)
        
        pred = network(images)
        
        # clear all the gradients before calculating them
        optimizer.zero_grad()
        
        # find the loss for the current step
        loss_valid_step = loss(pred , labels)
        
        # find accuracy
        acc_valid_step = find_acc(pred, labels)
      
        loss_valid += loss_valid_step.item()
    
        acc_valid += acc_valid_step
    
    loss_train /= len(trainloader)
    loss_valid /= len(testloader)
   
    acc_train /= len(trainloader)
    acc_valid /= len(testloader)
    
    print('Epoch: {}  Train Loss: {}  Train Acc: {}  Valid Loss: {}  Valid Acc: {}'.format(epoch, loss_train, acc_train, loss_valid, acc_valid))
    
# find the time at the end of training    
end_time = time.time()

total_time = end_time - start_time
print("Total time taken : {}".format(total_time))

Epoch: 1  Train Loss: 2.3032644152641297  Train Acc: 0.09945000434915224  Valid Loss: 2.2960023880004883  Valid Acc: 0.09999999403953552
Epoch: 2  Train Loss: 2.293939208984375  Train Acc: 0.10143333797653516  Valid Loss: 2.282881259918213  Valid Acc: 0.09999999403953552
Epoch: 3  Train Loss: 2.1478913048903148  Train Acc: 0.25403334411482015  Valid Loss: 1.5311754941940308  Valid Acc: 0.44999998807907104
Epoch: 4  Train Loss: 1.0353164047002792  Train Acc: 0.608083360393842  Valid Loss: 0.8027231097221375  Valid Acc: 0.6699999570846558
Epoch: 5  Train Loss: 0.7841018269459407  Train Acc: 0.7015333662430445  Valid Loss: 0.7975581884384155  Valid Acc: 0.6499999761581421
Epoch: 6  Train Loss: 0.6961853017409643  Train Acc: 0.7313667019208272  Valid Loss: 0.7145153284072876  Valid Acc: 0.7099999785423279
Epoch: 7  Train Loss: 0.6426092356443405  Train Acc: 0.755000035961469  Valid Loss: 0.7043855786323547  Valid Acc: 0.7099999785423279
Epoch: 8  Train Loss: 0.6058665176232656  Train Acc: 

## Testing the model

In [8]:
def test_model(model):

    start_time = time.time()

    num_correct = 0
    accuracy = 0

    # turning off backprop and gradient calculation.
    # this improves performance 
    with torch.no_grad():

        for batch in testloader:

            images, labels = batch
            images = images.to(device)
            labels = labels.to(device)

            total_images = len(testset)

            pred = model(images)

            num_correct_batch = pred.argmax(dim = 1).eq(labels).sum().item()

            accuracy_batch = pred.argmax(dim = 1).eq(labels).float().mean().item()

            num_correct += num_correct_batch
            accuracy += accuracy_batch

        accuracy /= len(testloader)

    print('Total number of test images: {}'.format(total_images))
    print('Total number of correct predictions: {}'.format(num_correct))
    print('Accuracy: {}'.format(accuracy * 100))

    end_time = time.time()
    print("Elapsed Time : {}".format(end_time - start_time))
    
# test the trained network    
test_model(network)    

Total number of test images: 10000
Total number of correct predictions: 7884
Accuracy: 78.83999824523926
Elapsed Time : 0.8637130260467529


## Save and Restore

### Save and Load Model's Parameters
#### Saving the model's state_dict()

In [0]:
path = 'network_weights.pth'

# Save the parameters
torch.save(network.state_dict(), path)

#### Loading the model's state_dict()


In [10]:
# create a new model object 
new_network = Network()

# move the network to GPU
new_network.to(device)

# load the network's parameters
new_network.load_state_dict(torch.load(path))

# set the network into evaluate mode
new_network.eval()

Network(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 12, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=192, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=60, bias=True)
  (fc3): Linear(in_features=60, out_features=10, bias=True)
)

#### Test

In [11]:
test_model(new_network)

Total number of test images: 10000
Total number of correct predictions: 7884
Accuracy: 78.83999824523926
Elapsed Time : 0.857184648513794


## Saving and Loading entire Model

In [12]:
path = 'full_network.pth'

# save the model
torch.save(network, path)

# load the model
new_model = torch.load(path)
new_model.eval()

  "type " + obj.__name__ + ". It won't be checked "


Network(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 12, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=192, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=60, bias=True)
  (fc3): Linear(in_features=60, out_features=10, bias=True)
)

#### Test

In [13]:
test_model(new_model)

Total number of test images: 10000
Total number of correct predictions: 7884
Accuracy: 78.83999824523926
Elapsed Time : 0.8697969913482666
