**Image classification with your own data**
---

Download the directories in this link https://drive.google.com/drive/folders/1Cfos2BCUdCriC_9ygHFUvA6SKr0YPZtJ?usp=sharing

Before running code, make sure the training images are in the class directories for train, valid and test.

1) Train directory is './drive/My Drive/public/train'

2) Valid directory is './drive/My Drive/public/valid'

3) Test directory is './drive/My Drive/public/test'

For example, with 'HDH' and 'OH' classes 
* ./drive/My Drive/public/train/HDH/*.jpg
* ./drive/My Drive/public/train/OH/*.jpg
* ./drive/My Drive/public/valid/HDH/*.jpg
* ./drive/My Drive/public/valid/OH/*.jpg
* ./drive/My Drive/public/test/HDH/*.jpg
* ./drive/My Drive/public/test/OH/*.jpg

Initial tutorial code was provided by hchoi@handong.edu, Nov. 30, 2019. Code is updated in Jun. 23. 2020 for Gcamp

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
import torchvision
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader
from torch.autograd import Variable
import os
import warnings
warnings.filterwarnings("ignore")

Define model architecture

In [3]:
class MyCNN(nn.Module):
    def __init__(self, output_dim=10):
        super(MyCNN,self).__init__()

        self.output_dim=output_dim

        self.cnn_layers = nn.Sequential(
            nn.Conv2d(3,32,3,padding=1), # try with different kernels
            nn.ReLU(),
            nn.Conv2d(32,32,3,padding=1),
            nn.ReLU(),
            nn.MaxPool2d(4,4), # 32 x (25x25)
            
            nn.Conv2d(32,16,3,padding=1),
            nn.ReLU(),
            nn.Conv2d(16,16,3,padding=1),
            nn.ReLU(),
            nn.MaxPool2d(5,5) # 16 x (5x5) 
        )
        conv_size = self.get_conv_size(3, input_size)
        self.fc_layer = nn.Sequential(
            nn.Linear(conv_size,100),
            nn.ReLU(),
            nn.Linear(100,output_dim)
        )       

    def get_conv_size(self, channel, shape):
        o = self.cnn_layers(torch.zeros(1, channel, *shape))
        return int(np.prod(o.size()))
        
    def forward(self,x):
        batch_size, c, h, w = x.data.size()
        out = self.cnn_layers(x)
        out = out.view(batch_size, -1)
        out = self.fc_layer(out)
        return out


Prepare Data

In [4]:
resize=(120, 120)
input_size=(100, 100)

data_transforms = {
    'train': transforms.Compose([
        transforms.Resize(resize),
        #transforms.RandomCrop(input_size), # data augmentation
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.3, 0.3, 0.3])
    ]),
    'valid': transforms.Compose([
        transforms.Resize(resize),
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.3, 0.3, 0.3])
    ]),
}
test_transform = data_transforms['valid']

batch_size = 64 # try different batch_size values
data_dir = './drive/My Drive/public/'
train_dir = 'train'
valid_dir = 'valid'

train_set = datasets.ImageFolder(data_dir+train_dir, data_transforms['train'])
valid_set = datasets.ImageFolder(data_dir+valid_dir, data_transforms['valid'])

train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size,
                                              shuffle=True, num_workers=4)
valid_loader = torch.utils.data.DataLoader(valid_set, batch_size=batch_size,
                                              shuffle=True, num_workers=4)

train_size = len(train_set)
valid_size = len(valid_set)

class_names = train_set.classes
print(class_names)

print(f'Train image samples: {train_size}')
print(f'Validation image samples: {valid_size}')

['ANH', 'HDH', 'Hyoam', 'NTH', 'OH']
Train image samples: 4054
Validation image samples: 241


Define training parameters and model

In [7]:
num_epoch = 10 # try with different epochs and find the best epoch
patience=5
output_dim=5
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = MyCNN(output_dim=output_dim).to(device)
param_list = list(model.children())
print(param_list)

learning_rate = 0.001
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
#optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)
#optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)

loss_func = nn.CrossEntropyLoss()

[Sequential(
  (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU()
  (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU()
  (4): MaxPool2d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
  (5): Conv2d(32, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): ReLU()
  (7): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): ReLU()
  (9): MaxPool2d(kernel_size=5, stride=5, padding=0, dilation=1, ceil_mode=False)
), Sequential(
  (0): Linear(in_features=400, out_features=100, bias=True)
  (1): ReLU()
  (2): Linear(in_features=100, out_features=5, bias=True)
)]


Training and validating loop


In [None]:
total_loss = 0.0
n_steps = 0
bad_counter = 0
best_vacc = None
best_epoch = None

result_dir = 'drive/My Drive/public/results/'
if not os.path.exists(result_dir):
    os.mkdir(result_dir)

for i in range(num_epoch):
    model.train()
    for j, [image,label] in enumerate(train_loader):
        x = image.to(device)
        y_= label.to(device)

        optimizer.zero_grad()
        output = model(x)
        loss = loss_func(output,y_)

        total_loss += loss.item()
        n_steps += 1

        loss.backward()
        optimizer.step()

    print('[TRAIN LOSS] ', 'Epochs {:d} Loss {:.4f}'.format(i+1, total_loss / n_steps))
    total_loss, n_steps = 0.0, 0

    model.eval()
    hits = 0
    for k,[image,label] in enumerate(valid_loader):
        x = image.to(device)
        y_= label.to(device)

        output = model(x)
        y_est = output.argmax(1)
        hits = hits + torch.sum(y_est == y_).item()

    vacc = float(int(hits)/(valid_size+0.0))
    print('[VALID ACC] ', 'Epochs', i, 'Hits', int(hits), 'Accuracy {:.4f}'.format(vacc))

    if best_vacc is None or best_vacc < vacc:
        best_vacc = vacc
        bad_counter = 0
        best_epoch = i+1
        torch.save(model, result_dir + 'own_data.pth.best')
    else:
        bad_counter +=1
    if bad_counter >= patience:
        break

print('best epoch: ', best_epoch)

[TRAIN LOSS]  Epochs 1 Loss 0.8063
[VALID ACC]  Epochs 0 Hits 220 Accuracy 0.9129
[TRAIN LOSS]  Epochs 2 Loss 0.2032
[VALID ACC]  Epochs 1 Hits 226 Accuracy 0.9378
[TRAIN LOSS]  Epochs 3 Loss 0.1314
[VALID ACC]  Epochs 2 Hits 230 Accuracy 0.9544


Test the model

In [None]:
test_batch_size = 10
model_name = 'own_data.pth.best'

model = torch.load(result_dir + model_name)
model.to(device)
model.eval()

test_dir = 'drive/My Drive/public/valid2'
test_set = datasets.ImageFolder(test_dir, test_transform)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=test_batch_size,
                                          shuffle=False, num_workers=4)

hits = 0
for k,[image,label] in enumerate(test_loader):
    x = image.to(device)
    y_= label.to(device)

    output = model(x)
    y_est = output.argmax(1)
    print('Target', label.numpy(), 'Prediction', y_est.cpu().numpy())
    hits = hits + torch.sum(y_est == y_).item()

print('[TEST] ', 'hits', int(hits),'accuracy {:.4f}'.format(float(hits/(len(test_set)+0.0))))

classify one image

In [None]:
from skimage import io

img_name = './drive/My Drive/public/test/test1.jpg'
test_img = io.imread(img_name)
test_img = transforms.ToPILImage()(test_img)
test_img = test_transform(test_img)
test_data = test_img.unsqueeze(0).to(device)

output=model(test_data)

class_id = output.argmax(dim=1).cpu().numpy()[0]
print(img_name.split('/')[-1], '==>', class_id, class_names[class_id])

test1.jpg ==> 0 ANH


classify all images in a directory

In [None]:
from skimage import io
import glob

img_dir = 'drive/My Drive/public/test/'
file_list = glob.glob(img_dir + '*.*')
for img_name in file_list:
  test_img = io.imread(img_name)
  test_img = transforms.ToPILImage()(test_img)
  test_img = test_transform(test_img)
  test_data = test_img.unsqueeze(0).to(device)

  output=model(test_data)

  class_id = output.argmax(dim=1).cpu().numpy()[0]
  print(img_name.split('/')[-1], '==>', class_id, class_names[class_id])


please print?
53
D11_NTH15.jpeg ==> 3 NTH
D08_OH29.jpg ==> 4 OH
D15_OH31.jpg ==> 3 NTH
D18_Hyoam38.JPG ==> 2 Hyoam
D05_NTH03.jpg ==> 3 NTH
D17_NTH08.jpg ==> 3 NTH
D03_OH13.jpg ==> 4 OH
D05_NTH31.jpg ==> 3 NTH
D17_Hyoam39.jpeg ==> 2 Hyoam
D09_NTH30.jpg ==> 3 NTH
D12_NTH15.jpg ==> 3 NTH
D01_Hyoam41.jpg ==> 2 Hyoam
D04_OH18.jpeg ==> 4 OH
D06_OH41.jpg ==> 3 NTH
D13_NTH30.jpg ==> 3 NTH
D05_NTH45.jpg ==> 3 NTH
D12_OH21.jpg ==> 4 OH
D06_OH29.jpg ==> 4 OH
D11_NTH30.jpeg ==> 3 NTH
D18_NTH30.jpg ==> 3 NTH
D14_OH12.jpg ==> 4 OH
D11_NTH01.jpeg ==> 3 NTH
D15_OH43.jpg ==> 0 ANH
D16_OH34.jpg ==> 4 OH
D01_OH22.jpg ==> 4 OH
D18_Hyoam40.JPG ==> 2 Hyoam
D07_HDH07.jpg ==> 1 HDH
D18_ANH48.jpg ==> 3 NTH
D11_Hyoam05.JPG ==> 2 Hyoam
D06_ANH32.jpg ==> 0 ANH
D07_ANH22.jpg ==> 0 ANH
D15_HDH04.JPG ==> 4 OH
D12_HDH10.jpg ==> 1 HDH
D12_AHN36.jpg ==> 0 ANH
D14_Hyoam12.jpg ==> 2 Hyoam
test1.jpg ==> 0 ANH
D08_Hyoam09.jpg ==> 2 Hyoam
D09_Hyoam09.JPG ==> 2 Hyoam
D07_HDH45.jpg ==> 4 OH
D13_ANH47.jpg ==> 3 NTH
D08_ANH23.j

The end! (hchoi@handong.edu)

---

