In [1]:
import torch
import numpy as np
import torchvision
import torch.nn as nn
from torch.utils.data.dataset import Dataset
from torch.utils.data.dataloader import DataLoader
from torchvision import datasets, transforms, models
from PIL import Image
import imagenet_classes
%matplotlib inline
from matplotlib.pyplot import imshow
import matplotlib.pyplot as plt
import os
import pandas as pd

In [2]:
# load pretrained model
model = torchvision.models.vgg19(pretrained=True)
model.eval()  # set in eval mode

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (17): ReLU(inplace)

In [3]:
# remove the last 2 layers from classifier (fc, dropout)
model.classifier = nn.Sequential(*[model.classifier[i] for i in range(5)])

In [4]:
model.classifier  # check to see if removed layers

Sequential(
  (0): Linear(in_features=25088, out_features=4096, bias=True)
  (1): ReLU(inplace)
  (2): Dropout(p=0.5)
  (3): Linear(in_features=4096, out_features=4096, bias=True)
  (4): ReLU(inplace)
)

In [5]:
# normalize features by their L2 norm
def normalize_feat(x):
    
    # find the l2 norm per row (along columns), note detach, p is for l2
    x_norm = torch.norm(x, p=2, dim=1).detach()
    
    # divide each elem by x_norm, note expand will match the same shape
    x = x.div(x_norm.expand_as(x))
    
    return x

In [6]:
# create a pytorch dataset class
class PetDataset(Dataset):
    def __init__(self, path_to_data):
        train_data = pd.read_csv(path_to_data, sep=" ", header=None)
        self.train_img_names = train_data.iloc[:,0]
        self.train_img_labels = train_data.iloc[:,1]  # 1-37 species label
        self.data_len = len(self.train_img_names)
        self.size = (224, 224)

    def __getitem__(self, index):
        try:
            image_dir = 'images'
            single_image_name = self.train_img_names[index] + '.jpg'
            img_path = os.path.join(image_dir, single_image_name)
            
            # Open image
            img_as_img = Image.open(img_path).convert('RGB') # ensure 3 channels

            label = int(self.train_img_labels[index])
            
#             print(type(label))

            # Resize
            resize = transforms.Resize(size=self.size)
            img_as_img = resize(img_as_img)
            
            # Transform to tensor
            toTensor = transforms.ToTensor()
            img_as_tensor = toTensor(img_as_img)
            
            # normalize by ImageNet standards
            normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 
            img_tensor_norm = normalize(img_as_tensor)
            
            return (img_tensor_norm, label)
            
            
        except Exception as e:
            print(e)
            return False
        
    def __len__(self):
        return self.data_len

In [7]:
# path to train and test data
train_data_path = 'annotations/trainval.txt'
test_data_path = 'annotations/test.txt'

# create dataset
train_dataset = PetDataset(train_data_path)
test_dataset = PetDataset(test_data_path)

In [14]:
len(test_dataset)

3669

In [8]:
# create dataloaders
train_dataloader = DataLoader(dataset=train_dataset, batch_size=1, shuffle=True)
test_dataloader = DataLoader(dataset=test_dataset, batch_size=1, shuffle=True)

In [9]:
# obtain training data features and labels

x_train = []
y_train = []

count = 1

# loop through dataloader
for img_tensor, label in train_dataloader:
    
#     if count == 40:
#         break
    
    # forward prop to get features
    img_feat = model(img_tensor)
    
    # normalize features by L2, convert to list
    img_feat = normalize_feat(img_feat).tolist()
    
    x_train.extend(img_feat)
    y_train.extend(label.tolist())  # convert label to list too
    
    if count%100 == 0:
        print(count)
    
    count += 1

100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600


In [10]:
# obtain training data features and labels

x_test = []
y_test = []

count = 1

# loop through dataloader
for img_tensor, label in test_dataloader:
    
#     if count == 30:
#         break
    
    # forward prop to get features
    img_feat = model(img_tensor)
    
    # normalize features by L2, convert to list
    img_feat = normalize_feat(img_feat).tolist()
    
    x_test.extend(img_feat)
    y_test.extend(label.tolist())  # convert label to list too
    
    if count%100 == 0:
        print(count)
        
    count += 1

100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600


In [11]:
#Import svm model
from sklearn import svm
from sklearn.metrics import confusion_matrix

#Create a svm Classifier
clf = svm.SVC(kernel='linear') # Linear Kernel

#Train the model using the training sets
clf.fit(x_train, y_train)

#Predict the response for test dataset
y_pred = clf.predict(x_test)

In [12]:
#Get the confusion matrix
cm = confusion_matrix(y_test, y_pred)
cm

array([[82,  0,  0, ...,  0,  0,  0],
       [ 0, 84,  7, ...,  0,  0,  0],
       [ 0, 13, 61, ..., 17,  0,  0],
       ...,
       [ 0,  7, 17, ..., 60,  0,  0],
       [ 0,  0,  0, ...,  0, 94,  0],
       [ 0,  0,  0, ...,  0,  1, 94]])

In [13]:
# per class accuracy
cm.diagonal()/cm.sum(axis=1)

array([0.83673469, 0.84      , 0.61      , 0.9       , 0.95      ,
       0.88      , 0.84      , 0.95454545, 0.91919192, 0.69      ,
       0.89      , 0.87628866, 0.94      , 0.96      , 0.99      ,
       0.96      , 0.98      , 1.        , 0.97979798, 0.98      ,
       0.77      , 0.9       , 0.99      , 0.82      , 0.98      ,
       0.97      , 0.75      , 0.81      , 0.99      , 1.        ,
       0.98989899, 0.97      , 0.84      , 0.94      , 0.6741573 ,
       0.94      , 0.94      ])

In [15]:
# overall accuracy
np.average(cm.diagonal()/cm.sum(axis=1))

0.8986652702831485

In [None]:
'''
I used VGG19, which is on not as deep as state of the art models, but I figured
I would start with a simpler, smaller model (in terms of layers) so that
I could follow what was going on better.  It seemed to perform pretty well,
with an overall accuracy of 0.90.  As for per class accuracy, there were 2
classes in particular that didn't do well, which were class 3 and class 35, which
had accuracies of 0.61 and 0.67, respectively.  These was the American
Pit Bull, and the staffordshire_bull_terrier, which interestingly were 
both a type of bull dogs.  Perhaps between the two, the dogs look similar 
and can misclassify with each other.


'''
