# Stanford Dogs CNN 

In [13]:
# basic modeling
import os
import torch
#import torchvision
import torch.nn as nn
import numpy as np

# progress bar
from tqdm import tqdm

# for reading and displaying images and editing
from skimage.io import imread
from skimage.transform import resize
import matplotlib.pyplot as plt
%matplotlib inline

# validation & test set
from sklearn.model_selection import train_test_split

# Loss Function & Optimizer
import torch.nn.functional as F
import torch.optim as optim

# Data Cleaning
#from torchvision.transforms import ToTensor
from torch.autograd import Variable # requires_gradient = true
from torch.nn import Linear, ReLU, Sequential, Conv2d, MaxPool2d, Module
from torch.nn import Softmax, BatchNorm2d, Dropout

In [3]:
# Directory where data is
#laptop
#data_dir = r'C:\Users\dsk02\Desktop\python_projects\torch_dogs\main_images\Images'

#gpu comp
data_dir = r'C:\Users\Dylan\Desktop\GPU_RUN\StanfordDogsCNN\images\Images'
print(os.listdir(data_dir))

['n02085620-Chihuahua', 'n02085782-Japanese_spaniel', 'n02085936-Maltese_dog', 'n02086079-Pekinese', 'n02086240-Shih-Tzu', 'n02086646-Blenheim_spaniel', 'n02086910-papillon', 'n02087046-toy_terrier', 'n02087394-Rhodesian_ridgeback', 'n02088094-Afghan_hound', 'n02088238-basset', 'n02088364-beagle', 'n02088466-bloodhound', 'n02088632-bluetick', 'n02089078-black-and-tan_coonhound', 'n02089867-Walker_hound', 'n02089973-English_foxhound', 'n02090379-redbone', 'n02090622-borzoi', 'n02090721-Irish_wolfhound', 'n02091032-Italian_greyhound', 'n02091134-whippet', 'n02091244-Ibizan_hound', 'n02091467-Norwegian_elkhound', 'n02091635-otterhound', 'n02091831-Saluki', 'n02092002-Scottish_deerhound', 'n02092339-Weimaraner', 'n02093256-Staffordshire_bullterrier', 'n02093428-American_Staffordshire_terrier', 'n02093647-Bedlington_terrier', 'n02093754-Border_terrier', 'n02093859-Kerry_blue_terrier', 'n02093991-Irish_terrier', 'n02094114-Norfolk_terrier', 'n02094258-Norwich_terrier', 'n02094433-Yorkshire_t

In [4]:
# clean up classes name
temp = os.listdir(data_dir)

classes = []

# class cleaning

for each_folder in tqdm(temp):
    
    # split words on -
    split_words = each_folder.split('-')
    
    # if there is more than one dash
    if len(split_words) > 2:
        
        # create temp word
        temp = ''
        # for each value in the split_words array (starting from 1)
        for i in range(1,len(split_words)):
            
            # if it's not the end add word + space
            if i != (len(split_words)-1):
                temp += (split_words[i] + ' ')
            else:
                # if end just add the word
                temp += (split_words[i])
        
        # append temp to classes
        classes.append(temp.lower())
    
    # if it equals 2 it's just nasty title + name, append name
    elif len(split_words) == 2:
        classes.append(each_folder.split('-')[1].lower())
    
    # outliers append 
    else:
        classes.append(each_folder.lower())
    
# print first few classes, all lower case
print(classes[0:15])

100%|███████████████████████████████████████████████████████████████████████████████| 120/120 [00:00<?, ?it/s]


['chihuahua', 'japanese_spaniel', 'maltese_dog', 'pekinese', 'shih tzu', 'blenheim_spaniel', 'papillon', 'toy_terrier', 'rhodesian_ridgeback', 'afghan_hound', 'basset', 'beagle', 'bloodhound', 'bluetick', 'black and tan_coonhound']


In [5]:
from collections import defaultdict

# create lookup dictionary for index -> class
lookup = [x for x in range(0,len(classes))]

# initialize with strings
classLookup = defaultdict(str)

# index and class, push into defaultDict ->
for i, each_class in enumerate(classes):
    classLookup[i] = each_class
    
# will use at end for lookups on guesses
print(classLookup)

defaultdict(<class 'str'>, {0: 'chihuahua', 1: 'japanese_spaniel', 2: 'maltese_dog', 3: 'pekinese', 4: 'shih tzu', 5: 'blenheim_spaniel', 6: 'papillon', 7: 'toy_terrier', 8: 'rhodesian_ridgeback', 9: 'afghan_hound', 10: 'basset', 11: 'beagle', 12: 'bloodhound', 13: 'bluetick', 14: 'black and tan_coonhound', 15: 'walker_hound', 16: 'english_foxhound', 17: 'redbone', 18: 'borzoi', 19: 'irish_wolfhound', 20: 'italian_greyhound', 21: 'whippet', 22: 'ibizan_hound', 23: 'norwegian_elkhound', 24: 'otterhound', 25: 'saluki', 26: 'scottish_deerhound', 27: 'weimaraner', 28: 'staffordshire_bullterrier', 29: 'american_staffordshire_terrier', 30: 'bedlington_terrier', 31: 'border_terrier', 32: 'kerry_blue_terrier', 33: 'irish_terrier', 34: 'norfolk_terrier', 35: 'norwich_terrier', 36: 'yorkshire_terrier', 37: 'wire haired_fox_terrier', 38: 'lakeland_terrier', 39: 'sealyham_terrier', 40: 'airedale', 41: 'cairn', 42: 'australian_terrier', 43: 'dandie_dinmont', 44: 'boston_bull', 45: 'miniature_schnau

In [6]:
# load train images
train_img = []
train_targets = []

def load_train(): 

    temp = os.listdir(data_dir)

    # for each folder in the train set
    for i_, each_folder in tqdm(enumerate(temp)):

        print('{} out of {}'.format(i_, len(temp)))

        #current image names
        image_names = os.listdir(data_dir + '/' + str(temp[i_]))

        # for each file in the folder 
        for i, img_name in enumerate(image_names):

            # image path
            image_path = str(str(data_dir) + '/' + str(temp[i_]) + '/' + str(img_name))

            #read image
            img = imread(image_path)

            #change image shape to -> 3,28,28 (originally were massive)
            img = resize(img, (3,28,28))

            # convert dt -> may have to change if 2 big
            img = img.astype('float32')

            #append to trainning list
            train_img.append(img)

            #append to targets list
            train_targets.append(i)
            
            # delete this
            if i == 70:
                print(i)
                return 0

load_train()

0it [00:00, ?it/s]

0 out of 120
70


0it [00:09, ?it/s]


0

In [7]:
# convert to numpy arrays, make sure worked also
print(len(train_img))
print(len(train_targets))

train_x = np.array(train_img)
train_y = np.array(train_targets)

71
71


# Creating validation set and preprocessing the images

In [8]:
train_x, val_x, train_y, val_y = train_test_split(train_x, 
                                                train_y, 
                                                test_size = 0.1,
                                                shuffle=True,
                                                random_state=32)

In [12]:
torch.cuda.is_available()

False

In [10]:
print(train_x.shape)
print(train_y.shape)
print(val_x.shape)
print(val_y.shape)

(63, 3, 28, 28)
(63,)
(8, 3, 28, 28)
(8,)


In [11]:
train_xT = torch.from_numpy(train_x)
train_yT = torch.from_numpy(train_y)
train_yT = train_yT.long()

print(train_xT.shape, train_yT.shape)

val_xT = torch.from_numpy(val_x)
val_yT = torch.from_numpy(val_y)
val_yT = val_yT.long()

print(val_xT.shape, val_yT.shape)


torch.Size([63, 3, 28, 28]) torch.Size([63])
torch.Size([8, 3, 28, 28]) torch.Size([8])


# Build Model

In [12]:
import torch.nn as nn
import torch.nn.functional as F

class StanfordDogs(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.cnn_layers = Sequential(
            Conv2d(3,32, kernel_size=3, stride=1, padding=1),
            BatchNorm2d(32),
            ReLU(inplace=True),
            
            Conv2d(32,64, kernel_size=3, stride=1, padding=1),
            BatchNorm2d(64),
            ReLU(inplace=True),
            MaxPool2d(2, 2) # 64 x 14 x 14   
        )
        
        self.linear_layers = Sequential(
            Linear(64 * 14 * 14, len(classes)))
        
    def forward(self,x):
        x = self.cnn_layers(x)
        x = x.view(x.size(0),-1)
        x = self.linear_layers(x)
        return x


# Basic Modeling / GPU Check

In [16]:
torch.cuda.is_available()

False

In [14]:
# Initialize Model
model = StanfordDogs()

# optimizer, Adam GOAT
optimizer = optim.Adam(model.parameters(), lr=0.05)

# loss function
criterion = nn.CrossEntropyLoss()

# if gpu -> Cuda
if torch.cuda.is_available():
    # push to gpu
    model = model.cuda()
    criterion = criterion.cuda()
    print('GPU is available \n')
else:
    print('GPU is not available \n')
    
print(model)

GPU is not available 

StanfordDogs(
  (cnn_layers): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (linear_layers): Sequential(
    (0): Linear(in_features=12544, out_features=120, bias=True)
  )
)


# Trainning Day

In [129]:
loss_Tracker = []

def train(epoch):
    
    # tell model that trainning is coming
    model.train()
    
    # loss
    loss = 0
    
    # get trainning set
    x_train, y_train = Variable(train_xT), Variable(train_yT) # req_grad = Tru
    
    # get validation set
    x_val, y_val = Variable(val_xT), Variable(val_yT)
    
    # check gpu, if so -> send
    if torch.cuda.is_available():
        x_train = x_train.cuda()
        y_train = y_train.cuda()
        x_val = x_val.cuda()
        y_val = y_val.cuda()
        
    
    #clear gradients of model params
    optimizer.zero_grad()
    
    #preds
    train_preds = model(x_train)
    val_preds = model(x_val)
    
    # loss
    train_loss = criterion(train_preds, y_train)
    loss_val = criterion(val_preds, y_val)
    
    # keep track of val set loss, want to graph
    loss_Tracker.append(loss_val)
    
    # update weights
    train_loss.backward()
    optimizer.step()
    
    # extract float
    loss = loss_val.item()
    print('Epoch : {}, Loss {}.'.format(epoch+1,loss_val))

In [130]:
# Running
epochs = 25
for epoch in tqdm(range(0,epochs)):
    train(epoch)

  4%|█▌                                    | 1/25 [00:00<00:14,  1.65it/s]

Epoch : 1, Loss 224.33897399902344.


  8%|███                                   | 2/25 [00:01<00:11,  2.00it/s]

Epoch : 2, Loss 216.2875213623047.


 12%|████▌                                 | 3/25 [00:01<00:10,  2.14it/s]

Epoch : 3, Loss 209.3113555908203.


 16%|██████                                | 4/25 [00:01<00:09,  2.20it/s]

Epoch : 4, Loss 203.17184448242188.


 20%|███████▌                              | 5/25 [00:02<00:07,  2.68it/s]

Epoch : 5, Loss 197.75917053222656.


 24%|█████████                             | 6/25 [00:02<00:06,  3.13it/s]

Epoch : 6, Loss 193.00613403320312.


 28%|██████████▋                           | 7/25 [00:02<00:05,  3.46it/s]

Epoch : 7, Loss 188.8110809326172.


 32%|████████████▏                         | 8/25 [00:02<00:04,  3.73it/s]

Epoch : 8, Loss 185.02732849121094.


 36%|█████████████▋                        | 9/25 [00:03<00:04,  3.80it/s]

Epoch : 9, Loss 181.6373291015625.


 40%|██████████████▊                      | 10/25 [00:03<00:03,  3.84it/s]

Epoch : 10, Loss 178.63388061523438.


 44%|████████████████▎                    | 11/25 [00:03<00:03,  3.89it/s]

Epoch : 11, Loss 175.9929962158203.


 48%|█████████████████▊                   | 12/25 [00:03<00:03,  4.07it/s]

Epoch : 12, Loss 173.68231201171875.


 52%|███████████████████▏                 | 13/25 [00:04<00:02,  4.08it/s]

Epoch : 13, Loss 171.66152954101562.


 56%|████████████████████▋                | 14/25 [00:04<00:02,  3.98it/s]

Epoch : 14, Loss 169.9087371826172.


 60%|██████████████████████▏              | 15/25 [00:04<00:02,  4.13it/s]

Epoch : 15, Loss 168.3998260498047.


 64%|███████████████████████▋             | 16/25 [00:04<00:02,  4.07it/s]

Epoch : 16, Loss 167.11131286621094.


 68%|█████████████████████████▏           | 17/25 [00:05<00:02,  3.07it/s]

Epoch : 17, Loss 166.02102661132812.


 72%|██████████████████████████▋          | 18/25 [00:05<00:02,  2.57it/s]

Epoch : 18, Loss 165.11412048339844.


 76%|████████████████████████████         | 19/25 [00:06<00:02,  2.31it/s]

Epoch : 19, Loss 164.36875915527344.


 80%|█████████████████████████████▌       | 20/25 [00:06<00:02,  2.12it/s]

Epoch : 20, Loss 163.76512145996094.


 84%|███████████████████████████████      | 21/25 [00:07<00:01,  2.02it/s]

Epoch : 21, Loss 163.28692626953125.


 88%|████████████████████████████████▌    | 22/25 [00:07<00:01,  1.97it/s]

Epoch : 22, Loss 162.91744995117188.


 92%|██████████████████████████████████   | 23/25 [00:08<00:01,  1.86it/s]

Epoch : 23, Loss 162.64024353027344.


 96%|███████████████████████████████████▌ | 24/25 [00:09<00:00,  1.83it/s]

Epoch : 24, Loss 162.4394989013672.


100%|█████████████████████████████████████| 25/25 [00:09<00:00,  2.56it/s]

Epoch : 25, Loss 162.30213928222656.



