### Get the data

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!cp drive/MyDrive/_NN_NLP/PyTorch/datasets/shoes.zip . 

In [None]:
!unzip shoes.zip

### Define and train the model

In [13]:
import torch
import numpy as np
import torch.nn.functional as F
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader
import torch.nn as nn
from tqdm.notebook import tqdm
from PIL import Image

In [14]:
# create data loaders

transforms = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225] )
    ])

train_data_path = "train"
train_data = torchvision.datasets.ImageFolder(root=train_data_path,
                                              transform=transforms)

val_data_path = "val"
val_data = torchvision.datasets.ImageFolder(root=val_data_path,
                                            transform=transforms)

test_data_path = "test"
test_data = torchvision.datasets.ImageFolder(root=test_data_path,
                                            transform=transforms)

batch_size=64
train_data_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_data_loader  = DataLoader(val_data, batch_size=batch_size, shuffle=True)
test_data_loader  = DataLoader(test_data, batch_size=batch_size)

In [15]:
# use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# define the network
class SimpleNet(nn.Module):
  def __init__(self):
    super().__init__()
    self.fc1 = nn.Linear(12288, 84) # input layer with the size 64*64*3 (input image)
    self.fc2 = nn.Linear(84, 50)
    self.fc3 = nn.Linear(50,3) # final layer with 3 units as output (for 3 categories)
  
  def forward(self, x): # bug was here
    x = x.view(-1, 12288) # flatten the input image
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = self.fc3(x)
    return x

# initialize the model
simplenet = SimpleNet()
simplenet.to(device)

SimpleNet(
  (fc1): Linear(in_features=12288, out_features=84, bias=True)
  (fc2): Linear(in_features=84, out_features=50, bias=True)
  (fc3): Linear(in_features=50, out_features=3, bias=True)
)

In [None]:
optimizer = torch.optim.Adam(simplenet.parameters(), lr=0.001)

In [None]:
# training loop
def train(model, optimizer, loss_fn, train_loader, val_loader, epochs=20, device="cpu"):
  for epoch in tqdm(range(epochs)):
    
    training_loss = 0.0 # set for the epoch
    valid_loss = 0.0
    
    # start training for the given epoch
    model.train()
    correct = 0
    for inputs, labels in train_loader: # during each batch
      
      inputs = inputs.to(device) # move to GPU
      labels = labels.to(device)
      
      predictions = model(inputs) # get model predictions for the batch
      loss = loss_fn(predictions, labels) # compare model predictions with target labels
      
      optimizer.zero_grad() # zero the gradient
      loss.backward() # compute gradient
      optimizer.step() # take an optimization step
      
      training_loss += loss.item()*inputs.size(0) # accumulating loss over each batch
      _, predicted = predictions.max(1)
      correct += (predicted == labels).sum() # accumulating TP + TN

    # get average training loss for the epoch across all training examples
    # by deviding the accumulated loss by the number of all training examples (20k) 
    training_loss /= len(train_loader.dataset) # len(train_loader) gives number of batches
    # calculate training accuracy for all training examples
    train_accuracy = correct / len(train_loader.dataset)

    # evaluate current model performance once all batches were proccessed
    model.eval()
    correct = 0
    for inputs, labels in val_loader:
      
      inputs = inputs.to(device)
      labels = labels.to(device)

      # Deactivate gradients for making a prediction
      with torch.no_grad(): # (faster and less memory usage)
        predictions = model(inputs)
      loss = loss_fn(predictions, labels)
      valid_loss += loss.item()*inputs.size(0)
        
      _, predicted = predictions.max(1)
      correct += (predicted == labels).sum()
      
    # get average validation loss for the epoch across all validation batches
    valid_loss /= len(val_loader.dataset)
    val_accuracy = correct / len(val_loader.dataset)
    
    print(f'Epoch: {epoch+1}, train_loss: {training_loss:.2f},\
          val_loss: {valid_loss:.2f}, val_accuracy = {val_accuracy:.2f},\
          train_accuracy = {train_accuracy:.2f}')

*Start training...*

In [None]:
train(simplenet, optimizer, torch.nn.CrossEntropyLoss(), train_data_loader, val_data_loader)

  0%|          | 0/20 [00:00<?, ?it/s]

Epoch: 1, train_loss: 0.99,          val_loss: 0.37, val_accuracy = 0.87,          train_accuracy = 0.67
Epoch: 2, train_loss: 0.32,          val_loss: 0.38, val_accuracy = 0.78,          train_accuracy = 0.87
Epoch: 3, train_loss: 0.31,          val_loss: 0.31, val_accuracy = 0.88,          train_accuracy = 0.87
Epoch: 4, train_loss: 0.22,          val_loss: 0.42, val_accuracy = 0.78,          train_accuracy = 0.92
Epoch: 5, train_loss: 0.23,          val_loss: 0.47, val_accuracy = 0.82,          train_accuracy = 0.91
Epoch: 6, train_loss: 0.30,          val_loss: 0.35, val_accuracy = 0.88,          train_accuracy = 0.89
Epoch: 7, train_loss: 0.17,          val_loss: 0.29, val_accuracy = 0.86,          train_accuracy = 0.94
Epoch: 8, train_loss: 0.16,          val_loss: 0.29, val_accuracy = 0.88,          train_accuracy = 0.94
Epoch: 9, train_loss: 0.13,          val_loss: 0.28, val_accuracy = 0.89,          train_accuracy = 0.95
Epoch: 10, train_loss: 0.09,          val_loss: 0.33, v

After 20 epochs, we get a validation accuracy of 92 percent and a train accuracy of 99 percent.

In a real world scenario, we also would be interested in the final test accuracy to better understand how well our model generalizes to unseen data.

### Inference and saving model weights

Following is just a small example of how we can perform inference (make predictions) with the trained model:

In [9]:
labels = ['boot','sandal', 'shoe']

img = Image.open("./test/boot/boot (948).jpg")
img = transforms(img).to(device)
img = torch.unsqueeze(img, 0)

# set the model in evaluation mode
simplenet.eval()
prediction = F.softmax(simplenet(img), dim=1)
prediction = prediction.argmax()
print(f'{labels[prediction]}')

For a boot image the predicted label is sandal
For a sandal image the predicted label is sandal
For a shoe image the predicted label is sandal


In [None]:
state_dict = simplenet.state_dict()
print(state_dict)
torch.save(state_dict, "shoe_model.tar")

### Test accuracy

In [16]:
state_dict = torch.load("shoe_model.tar")
simplenet.load_state_dict(state_dict)

<All keys matched successfully>

In [25]:
def eval_model(model, data_loader):
    model.eval() # Set model to eval mode
    true_preds, num_preds = 0., 0.

    with torch.no_grad(): # Deactivate gradients for the following code
        for data_inputs, data_labels in data_loader:

            # Determine prediction of model on dev set
            #data_inputs, data_labels = data_inputs.to(device), data_labels.to(device)
            preds = model(data_inputs)

            _, predicted = preds.max(1)
            true_preds += (predicted == data_labels).sum()

            # Keep records of predictions for the accuracy metric (true_preds=TP+TN, num_preds=TP+TN+FP+FN)
            num_preds += data_labels.shape[0]

    acc = true_preds / num_preds
    print(f"Test accuracy of the model: {100.0*acc:4.2f}%")

In [26]:
eval_model(simplenet, test_data_loader)

Test accuracy of the model: 91.19%
