# Training Classifier for Caltech101
Codebase copied from https://debuggercafe.com/getting-95-accuracy-on-the-caltech101-dataset-using-deep-learning/

In [3]:
# imports
import matplotlib.pyplot as plt
import matplotlib
import joblib
import cv2
import os
import torch 
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import time
import random
import pretrainedmodels
from imutils import paths
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from torchvision.transforms import transforms
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm
import json
matplotlib.style.use('ggplot')
'''SEED Everything'''
def seed_everything(SEED=42):
    random.seed(SEED)
    np.random.seed(SEED)
    torch.manual_seed(SEED)
    torch.cuda.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)
    torch.backends.cudnn.benchmark = True # keep True if all the input have same size.
SEED=42
seed_everything(SEED=SEED)
'''SEED Everything'''

'SEED Everything'

In [4]:
# device
print(f"PyTorch version: {torch.__version__}")

# Check PyTorch has access to MPS (Metal Performance Shader, Apple's GPU architecture)
print(f"Is MPS (Metal Performance Shader) built? {torch.backends.mps.is_built()}")
print(f"Is MPS available? {torch.backends.mps.is_available()}")

# Set the device
device = "mps" if torch.backends.mps.is_available() else "cpu"
device = torch.device(device)
print(f"Using device: {device}")

epochs = 20
BATCH_SIZE = 16

PyTorch version: 2.0.1
Is MPS (Metal Performance Shader) built? True
Is MPS available? True
Using device: mps


In [14]:
dataset_path = "/Volumes/disk_2t/datasets/mini_imagenet_and_hands"
print(os.path.exists(dataset_path))
with open(os.path.join(dataset_path, "train.json"), 'rb') as f:
    train_labels = json.load(f)
with open(os.path.join(dataset_path, "val.json"), 'rb') as f:
    val_labels = json.load(f)
with open(os.path.join(dataset_path, "test.json"), 'rb') as f:
    test_labels = json.load(f)

True


In [18]:
print(train_labels.keys(), len(train_labels['label_names']), len(train_labels['image_names']), len(train_labels['image_labels']))
print(val_labels.keys(), len(val_labels['label_names']), len(val_labels['image_names']), len(val_labels['image_labels']))
print(test_labels.keys(), len(test_labels['label_names']), len(test_labels['image_names']), len(test_labels['image_labels']))

dict_keys(['label_names', 'image_names', 'image_labels']) 100 38400 38400
dict_keys(['label_names', 'image_names', 'image_labels']) 100 12000 12000
dict_keys(['label_names', 'image_names', 'image_labels']) 100 9600 9600


In [20]:
images_paths = list(paths.list_images(os.path.join(dataset_path, 'archive')))
len(images_paths)

60000

In [26]:
cnt = 0
for category in os.listdir(os.path.join(dataset_path, 'archive')):
    print(len(os.listdir(os.path.join(dataset_path, 'archive', category))))
    # print(cnt)
    # cnt += 1
    # print(category)
train_labels['image_names'][0]

600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600
600


'filelists/miniImagenet/n01532829/n01532829_721.JPEG'

In [32]:
set(train_labels['image_labels']), set(val_labels['image_labels']), set(test_labels['image_labels'])

({0,
  1,
  2,
  3,
  4,
  5,
  6,
  7,
  8,
  9,
  10,
  11,
  12,
  13,
  14,
  15,
  16,
  17,
  18,
  19,
  20,
  21,
  22,
  23,
  24,
  25,
  26,
  27,
  28,
  29,
  30,
  31,
  32,
  33,
  34,
  35,
  36,
  37,
  38,
  39,
  40,
  41,
  42,
  43,
  44,
  45,
  46,
  47,
  48,
  49,
  50,
  51,
  52,
  53,
  54,
  55,
  56,
  57,
  58,
  59,
  60,
  61,
  62,
  63},
 {80,
  81,
  82,
  83,
  84,
  85,
  86,
  87,
  88,
  89,
  90,
  91,
  92,
  93,
  94,
  95,
  96,
  97,
  98,
  99},
 {64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79})

In [5]:
# For Imagenet + Hands data preparation

# label & img preparation
dataset_path = "/Volumes/disk_2t/datasets/mini_imagenet_and_hands"
data, labels = [], []

# Get hands data
for img_pth in list(paths.list_images(os.path.join(dataset_path, '102')))[:600]:
    image = cv2.imread(img_pth)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    data.append(image)
    labels.append('hand')
print("Hand setup!")


Hand setup!


In [6]:
# For imagenet_mini
for i, label in enumerate(list(os.listdir(os.path.join(dataset_path, 'archive')))[:30]):
    print(i, " - ", label)
    img_pths = list(paths.list_images(os.path.join(dataset_path, 'archive', label)))
    for j, img_pth in enumerate(img_pths):
        print("- ", j)
        image = cv2.imread(img_pth)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        data.append(image)
        labels.append(label)
print("Mini Imagenet setup!")


0  -  n04243546
-  0
-  1
-  2
-  3
-  4
-  5
-  6
-  7
-  8
-  9
-  10
-  11
-  12
-  13
-  14
-  15
-  16
-  17
-  18
-  19
-  20
-  21
-  22
-  23
-  24
-  25
-  26
-  27
-  28
-  29
-  30
-  31
-  32
-  33
-  34
-  35
-  36
-  37
-  38
-  39
-  40
-  41
-  42
-  43
-  44
-  45
-  46
-  47
-  48
-  49
-  50
-  51
-  52
-  53
-  54
-  55
-  56
-  57
-  58
-  59
-  60
-  61
-  62
-  63
-  64
-  65
-  66
-  67
-  68
-  69
-  70
-  71
-  72
-  73
-  74
-  75
-  76
-  77
-  78
-  79
-  80
-  81
-  82
-  83
-  84
-  85
-  86
-  87
-  88
-  89
-  90
-  91
-  92
-  93
-  94
-  95
-  96
-  97
-  98
-  99
-  100
-  101
-  102
-  103
-  104
-  105
-  106
-  107
-  108
-  109
-  110
-  111
-  112
-  113
-  114
-  115
-  116
-  117
-  118
-  119
-  120
-  121
-  122
-  123
-  124
-  125
-  126
-  127
-  128
-  129
-  130
-  131
-  132
-  133
-  134
-  135
-  136
-  137
-  138
-  139
-  140
-  141
-  142
-  143
-  144
-  145
-  146
-  147
-  148
-  149
-  150
-  151
-  152
-  153
-  154
-  155
- 

In [7]:
# For imagenet_mini
for i, label in enumerate(list(os.listdir(os.path.join(dataset_path, 'archive')))[30:60]):
    print(i, " - ", label)
    img_pths = list(paths.list_images(os.path.join(dataset_path, 'archive', label)))
    for j, img_pth in enumerate(img_pths):
        print("- ", j)
        image = cv2.imread(img_pth)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        data.append(image)
        labels.append(label)
print("Mini Imagenet setup!")


0  -  n01855672
-  0
-  1
-  2
-  3
-  4
-  5
-  6
-  7
-  8
-  9
-  10
-  11
-  12
-  13
-  14
-  15
-  16
-  17
-  18
-  19
-  20
-  21
-  22
-  23
-  24
-  25
-  26
-  27
-  28
-  29
-  30
-  31
-  32
-  33
-  34
-  35
-  36
-  37
-  38
-  39
-  40
-  41
-  42
-  43
-  44
-  45
-  46
-  47
-  48
-  49
-  50
-  51
-  52
-  53
-  54
-  55
-  56
-  57
-  58
-  59
-  60
-  61
-  62
-  63
-  64
-  65
-  66
-  67
-  68
-  69
-  70
-  71
-  72
-  73
-  74
-  75
-  76
-  77
-  78
-  79
-  80
-  81
-  82
-  83
-  84
-  85
-  86
-  87
-  88
-  89
-  90
-  91
-  92
-  93
-  94
-  95
-  96
-  97
-  98
-  99
-  100
-  101
-  102
-  103
-  104
-  105
-  106
-  107
-  108
-  109
-  110
-  111
-  112
-  113
-  114
-  115
-  116
-  117
-  118
-  119
-  120
-  121
-  122
-  123
-  124
-  125
-  126
-  127
-  128
-  129
-  130
-  131
-  132
-  133
-  134
-  135
-  136
-  137
-  138
-  139
-  140
-  141
-  142
-  143
-  144
-  145
-  146
-  147
-  148
-  149
-  150
-  151
-  152
-  153
-  154
-  155
- 

In [8]:
# For imagenet_mini
for i, label in enumerate(list(os.listdir(os.path.join(dataset_path, 'archive')))[60:90]):
    print(i, " - ", label)
    img_pths = list(paths.list_images(os.path.join(dataset_path, 'archive', label)))
    for j, img_pth in enumerate(img_pths):
        print("- ", j)
        image = cv2.imread(img_pth)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        data.append(image)
        labels.append(label)
print("Mini Imagenet setup!")


0  -  n02823428
-  0
-  1
-  2
-  3
-  4
-  5
-  6
-  7
-  8
-  9
-  10
-  11
-  12
-  13
-  14
-  15
-  16
-  17
-  18
-  19
-  20
-  21
-  22
-  23
-  24
-  25
-  26
-  27
-  28
-  29
-  30
-  31
-  32
-  33
-  34
-  35
-  36
-  37
-  38
-  39
-  40
-  41
-  42
-  43
-  44
-  45
-  46
-  47
-  48
-  49
-  50
-  51
-  52
-  53
-  54
-  55
-  56
-  57
-  58
-  59
-  60
-  61
-  62
-  63
-  64
-  65
-  66
-  67
-  68
-  69
-  70
-  71
-  72
-  73
-  74
-  75
-  76
-  77
-  78
-  79
-  80
-  81
-  82
-  83
-  84
-  85
-  86
-  87
-  88
-  89
-  90
-  91
-  92
-  93
-  94
-  95
-  96
-  97
-  98
-  99
-  100
-  101
-  102
-  103
-  104
-  105
-  106
-  107
-  108
-  109
-  110
-  111
-  112
-  113
-  114
-  115
-  116
-  117
-  118
-  119
-  120
-  121
-  122
-  123
-  124
-  125
-  126
-  127
-  128
-  129
-  130
-  131
-  132
-  133
-  134
-  135
-  136
-  137
-  138
-  139
-  140
-  141
-  142
-  143
-  144
-  145
-  146
-  147
-  148
-  149
-  150
-  151
-  152
-  153
-  154
-  155
- 

In [9]:
# For imagenet_mini
for i, label in enumerate(list(os.listdir(os.path.join(dataset_path, 'archive')))[90:]):
    print(i, " - ", label)
    img_pths = list(paths.list_images(os.path.join(dataset_path, 'archive', label)))
    for j, img_pth in enumerate(img_pths):
        print("- ", j)
        image = cv2.imread(img_pth)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        data.append(image)
        labels.append(label)
print("Mini Imagenet setup!")


0  -  n03908618
-  0
-  1
-  2
-  3
-  4
-  5
-  6
-  7
-  8
-  9
-  10
-  11
-  12
-  13
-  14
-  15
-  16
-  17
-  18
-  19
-  20
-  21
-  22
-  23
-  24
-  25
-  26
-  27
-  28
-  29
-  30
-  31
-  32
-  33
-  34
-  35
-  36
-  37
-  38
-  39
-  40
-  41
-  42
-  43
-  44
-  45
-  46
-  47
-  48
-  49
-  50
-  51
-  52
-  53
-  54
-  55
-  56
-  57
-  58
-  59
-  60
-  61
-  62
-  63
-  64
-  65
-  66
-  67
-  68
-  69
-  70
-  71
-  72
-  73
-  74
-  75
-  76
-  77
-  78
-  79
-  80
-  81
-  82
-  83
-  84
-  85
-  86
-  87
-  88
-  89
-  90
-  91
-  92
-  93
-  94
-  95
-  96
-  97
-  98
-  99
-  100
-  101
-  102
-  103
-  104
-  105
-  106
-  107
-  108
-  109
-  110
-  111
-  112
-  113
-  114
-  115
-  116
-  117
-  118
-  119
-  120
-  121
-  122
-  123
-  124
-  125
-  126
-  127
-  128
-  129
-  130
-  131
-  132
-  133
-  134
-  135
-  136
-  137
-  138
-  139
-  140
-  141
-  142
-  143
-  144
-  145
-  146
-  147
-  148
-  149
-  150
-  151
-  152
-  153
-  154
-  155
- 

In [9]:
# # For Caltech-101 data preparation
# # label & img preparation
# image_paths = list(paths.list_images('data/Caltech101'))
# data = []
# labels = []
# for image_path in image_paths:
#     label = image_path.split(os.path.sep)[-2]
#     if label == 'BACKGROUND_Google':
#         continue
#     image = cv2.imread(image_path)
#     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
#     # print(type(image), (image).shape)
#     data.append(image)
#     labels.append(label)
# # data = np.array(data)
# data = data
# labels = np.array(labels)
#
# # one hot encode
# lb = LabelBinarizer()
# labels = lb.fit_transform(labels)
# print(f"Total number of classes: {len(lb.classes_)}")

Total number of classes: 103


In [3]:

data = data
labels = np.array(labels)
# one hot encode
lb = LabelBinarizer()
labels = lb.fit_transform(labels)
print(f"Total number of classes: {len(lb.classes_)}")

NameError: name 'data' is not defined

In [10]:
# define transforms
train_transform = transforms.Compose(
    [transforms.ToPILImage(),
     transforms.Resize((224, 224)),
     transforms.ToTensor(),
     transforms.Normalize(mean=[0.485, 0.456, 0.406],
                          std=[0.229, 0.224, 0.225])])
val_transform = transforms.Compose(
    [transforms.ToPILImage(),
     transforms.Resize((224, 224)),
     transforms.ToTensor(),
     transforms.Normalize(mean=[0.485, 0.456, 0.406],
                          std=[0.229, 0.224, 0.225])])

In [11]:
# divide the data into train, validation, and test set
(X, x_val , Y, y_val) = train_test_split(data, labels, 
                                                    test_size=0.2,  
                                                    stratify=labels,
                                                    random_state=42)
(x_train, x_test, y_train, y_test) = train_test_split(X, Y, 
                                                    test_size=0.25, 
                                                    random_state=42)
print(f"x_train examples: {len(x_train)}\nx_test examples: {len(x_test)}\nx_val examples: {len(x_val)}")

x_train examples: 6508
x_test examples: 2170
x_val examples: 2170


In [12]:
# custom dataset
class ImageDataset(Dataset):
    def __init__(self, images, labels=None, transforms=None):
        self.X = images
        self.y = labels
        self.transforms = transforms
         
    def __len__(self):
        return (len(self.X))
    
    def __getitem__(self, i):
        data = self.X[i][:]
        
        if self.transforms:
            data = self.transforms(data)
            
        if self.y is not None:
            return (data, self.y[i])
        else:
            return data
        
train_data = ImageDataset(x_train, y_train, train_transform)
val_data = ImageDataset(x_val, y_val, val_transform)
test_data = ImageDataset(x_test, y_test, val_transform)

# dataloaders
trainloader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
valloader = DataLoader(val_data, batch_size=BATCH_SIZE, shuffle=True)
testloader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False)

In [13]:
# the resnet34 model
class ResNet34(nn.Module):
    def __init__(self, pretrained):
        super(ResNet34, self).__init__()
        if pretrained is True:
            self.model = pretrainedmodels.__dict__['resnet34'](pretrained='imagenet')
        else:
            self.model = pretrainedmodels.__dict__['resnet34'](pretrained=None)
        
        # change the classification layer
        self.l0 = nn.Linear(512, len(lb.classes_))
        self.dropout = nn.Dropout2d(0.4)
    def forward(self, x):
        # get the batch size only, ignore (c, h, w)
        batch, _, _, _ = x.shape
        x = self.model.features(x)
        x = F.adaptive_avg_pool2d(x, 1).reshape(batch, -1)
        x = self.dropout(x)
        l0 = self.l0(x)
        return l0
model = ResNet34(pretrained=True).to(device)



In [14]:
# optimizer
optimizer = optim.Adam(model.parameters(), lr=1e-4)
# loss function
criterion = nn.CrossEntropyLoss()

# training function
def fit(model, dataloader):
    print('Training')
    model.train()
    running_loss = 0.0
    running_correct = 0
    for i, data in tqdm(enumerate(dataloader), total=int(len(train_data)/dataloader.batch_size)):
        data, target = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, torch.max(target.to(torch.int32), 1)[1])
        running_loss += loss.item()
        _, preds = torch.max(outputs.data, 1)
        running_correct += (preds == torch.max(target.to(torch.int32), 1)[1]).sum().item()
        loss.backward()
        optimizer.step()
        
    loss = running_loss/len(dataloader.dataset)
    accuracy = 100. * running_correct/len(dataloader.dataset)
    
    print(f"Train Loss: {loss:.4f}, Train Acc: {accuracy:.2f}")
    
    return loss, accuracy

#validation function
def validate(model, dataloader):
    print('Validating')
    model.eval()
    running_loss = 0.0
    running_correct = 0
    with torch.no_grad():
        for i, data in tqdm(enumerate(dataloader), total=int(len(val_data)/dataloader.batch_size)):
            data, target = data[0].to(device), data[1].to(device)
            outputs = model(data)
            loss = criterion(outputs, torch.max(target.to(torch.int32), 1)[1])
            
            running_loss += loss.item()
            _, preds = torch.max(outputs.data, 1)
            running_correct += (preds == torch.max(target.to(torch.int32), 1)[1]).sum().item()
        
        loss = running_loss/len(dataloader.dataset)
        accuracy = 100. * running_correct/len(dataloader.dataset)
        print(f'Val Loss: {loss:.4f}, Val Acc: {accuracy:.2f}')
        
        return loss, accuracy

def test(model, dataloader):
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            inputs, target = data[0].to(device), data[1].to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += target.size(0)
            correct += (predicted == torch.max(target.to(torch.int32), 1)[1]).sum().item()
    return correct, total

In [16]:
train_loss , train_accuracy = [], []
val_loss , val_accuracy = [], []
print(f"Training on {len(train_data)} examples, validating on {len(val_data)} examples...")
start = time.time()
for epoch in range(epochs):
    print(f"Epoch {epoch+1} of {epochs}")
    train_epoch_loss, train_epoch_accuracy = fit(model, trainloader)
    val_epoch_loss, val_epoch_accuracy = validate(model, valloader)
    train_loss.append(train_epoch_loss)
    train_accuracy.append(train_epoch_accuracy)
    val_loss.append(val_epoch_loss)
    val_accuracy.append(val_epoch_accuracy)
end = time.time()
print((end-start)/60, 'minutes')

In [None]:
torch.save(model.state_dict(), f"outputs/models/resnet34_epochs{epochs}.pth")
# accuracy plots
plt.figure(figsize=(10, 7))
plt.plot(train_accuracy, color='green', label='train accuracy')
plt.plot(val_accuracy, color='blue', label='validataion accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.savefig('outputs/plots/accuracy.png')
# loss plots
plt.figure(figsize=(10, 7))
plt.plot(train_loss, color='orange', label='train loss')
plt.plot(val_loss, color='red', label='validataion loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.savefig('outputs/plots/loss.png')

# save the accuracy and loss lists as pickled files
print('Pickling accuracy and loss lists...')
joblib.dump(train_accuracy, 'outputs/models/train_accuracy.pkl')
joblib.dump(train_loss, 'outputs/models/train_loss.pkl')
joblib.dump(val_accuracy, 'outputs/models/val_accuracy.pkl')
joblib.dump(val_loss, 'outputs/models/val_loss.pkl')

In [None]:
correct, total = test(model, testloader)
print('Accuracy of the network on test images: %0.3f %%' % (100 * correct / total))
print('train.py finished running')