In [1]:
import numpy as np
from glob import glob
import os
import cv2
import matplotlib.pyplot as plt
from PIL import Image
from PIL import ImageFile
import torch
import torchvision
from torch.autograd import Variable
from torchvision import datasets
from torchvision import transforms
import torchvision.models as models
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
#load images
dog_files = np.array(glob("dogImages/*/*/*"))
human_files = np.array(glob("lfw/*/*"))

# print number of images in each dataset
print('Total dog images are %d ' % len(dog_files))
print('Total human images are %d ' % len(human_files))

Total dog images are 8351 
Total human images are 13233 


In [3]:
#face detector function
face_cascade = cv2.CascadeClassifier('detector_architectures/haarcascade_frontalface_default.xml')

def face_detector(img_path):
    
    image = cv2.imread(img_path)
    image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(image_gray)
    
    return len(faces) > 0


In [17]:
#calculate percentage of human in both files

human_files_per = human_files[:100]
dog_files_per = dog_files[:100]

sum_human_hd = 0
sum_dog_hd = 0

for i in range(0,len(human_files_per)):
    human_hd = human_files_per[i]
    dog_hd = dog_files_per[i]
    
    if face_detector(human_hd) == True:
        sum_human_hd += 1
    if face_detector(dog_hd) == True:
        sum_dog_hd += 1

per_human_hd = (sum_human_hd/len(human_files_per))*100
per_dog_hd = (sum_dog_hd/len(dog_files_per))*100
print("The percentage of human in human files is " ,per_human_hd)
print("The percentage of dog in dog files is " ,per_dog_hd)

The percentage of human in human files is  100.0
The percentage of dog in dog files is  52.0


In [4]:
#percentage of dog in both files
#dog detetctor using vgg16 model

VGG = models.vgg16(pretrained = True)

#VGG function to return index of images from ImageNet
def VGG_16(img):
#def VGG16_predict(img):
    image = Image.open(img)
    data_transform = transforms.Compose([transforms.RandomResizedCrop(224),
                                        transforms.ToTensor(),
                                        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                            std=[0.229, 0.224, 0.225])])
    image_trans = data_transform(image)
    imagee = image_trans.unsqueeze(0)
    image_var = Variable(imagee)
    image_vgg = VGG(image_var)
    image_index = image_vgg.data.numpy().argmax()
    
    return image_index

In [5]:
#dog detector index b/w 151 and 268
def dog_detector(img):
    index_re = VGG_16(img)
    
    if index_re >= 151 and index_re <= 268:
        return True
    else:
        return False

In [5]:
#calculate percentage of dog in both files
human_files_per = human_files[:100]
dog_files_per = dog_files[:100]

sum_human_dd = 0
sum_dog_dd = 0

for i in range(0,len(human_files_per)):
    human_dd = human_files_per[i]
    dog_dd = dog_files_per[i]
    
   # d = dog_detector(dog_dd)
    
    if dog_detector(human_dd) == True:
        sum_human_dd += 1
    if dog_detector(dog_dd) == True:
        sum_dog_dd += 1

per_human_dd = (sum_human_dd/len(human_files_per))*100
per_dog_dd = (sum_dog_dd/len(dog_files_per))*100
print("The percentage of human in human files is " ,per_human_dd)
print("The percentage of dog in dog files is " ,per_dog_dd)

The percentage of human in human files is  1.0
The percentage of dog in dog files is  92.0


In [2]:
#CNN
#data preperation
data_train = os.path.join("dogImages/","train/")
data_valid = os.path.join("dogImages/","valid/")
data_test = os.path.join("dogImages/","test/")

#transforms

data_transform = { 
                    'train' : transforms.Compose([transforms.RandomResizedCrop(224),
                                        transforms.RandomHorizontalFlip(),
                                        transforms.ToTensor(),
                                        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                            std=[0.229, 0.224, 0.225])]),
    
                    'valid' : transforms.Compose([transforms.RandomResizedCrop(224),
                                        transforms.RandomHorizontalFlip(),
                                        transforms.ToTensor(),
                                        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                            std=[0.229, 0.224, 0.225])]),
    
                    'test' : transforms.Compose([transforms.RandomResizedCrop(224),
                                        transforms.ToTensor(),
                                        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                            std=[0.229, 0.224, 0.225])])
                 }

#load image data set
train_set = datasets.ImageFolder(data_train, transform = data_transform['train'])
valid_set = datasets.ImageFolder(data_valid, transform = data_transform['valid'])
test_set = datasets.ImageFolder(data_test, transform = data_transform['test'])

#Dataloader
batch_size = 15
num_workers = 0
train_loader = torch.utils.data.DataLoader(train_set,batch_size = batch_size,num_workers = num_workers,shuffle = True)
valid_loader = torch.utils.data.DataLoader(valid_set,batch_size = batch_size,num_workers = num_workers,shuffle = True)
test_loader = torch.utils.data.DataLoader(test_set,batch_size = batch_size,num_workers = num_workers,shuffle = True)

loaders = {
    'train': train_loader,
    'valid': valid_loader,
    'test': test_loader
}

#check tensor size of the image
sample = next(iter(train_set))
image, label = sample
print(image.shape)

num_classes = train_set.classes
print(len(num_classes))
print(len(loaders['train']))
print(len(loaders['valid']))
print(len(loaders['test']))

torch.Size([3, 224, 224])
133
446
56
56


In [3]:
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [4]:
#CNN model architecture
class Net(nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        
        #convo layers
        self.conv1 = nn.Conv2d(3,32,3,2)
        self.conv2 = nn.Conv2d(32,64,3,2)
        self.conv3 = nn.Conv2d(64,128,3,2)
        #self.conv4 = nn.Conv2d(128,256,3)
        #self.conv5 = nn.Conv2d(256,512,3)
        
        #pooling layer
        self.pool = nn.MaxPool2d(2,2)
        
        #linear layers
        self.fc1 = nn.Linear(128*3*3,2048)
        self.fc2 = nn.Linear(2048,133)
         
        #dropout layer
        self.dropout = nn.Dropout(0.3)
    
    def forward(self, x):
        #first layer
        x = self.pool(F.relu(self.conv1(x)))
        x = self.dropout(x)
        #second layer
        x = self.pool(F.relu(self.conv2(x)))
        x = self.dropout(x)
        #third layer
        x = self.pool(F.relu(self.conv3(x)))
        x = self.dropout(x)
        '''
        #fourth layer
        x = self.conv4(x)
        x = F.relu(x)
        x = self.pool(x)
        #fifth layer
        x = self.conv5(x)
        x = F.relu(x)
        x = self.pool(x)
        #x = self.dropout(x)
        '''
        
        #reshape tensor
        x = x.view(-1,128*3*3)
        #last layer
        x = self.dropout(x)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)

        return x
         
# instantiate the CNN
net = Net()
print(net)
#print(net.conv1.weight.shape)
#print(net.conv1.weight[0][0].shape)

Net(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2))
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=1152, out_features=2048, bias=True)
  (fc2): Linear(in_features=2048, out_features=133, bias=True)
  (dropout): Dropout(p=0.3)
)


In [11]:
#loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr = 0.0001)
#criterion = nn.CrossEntropyLoss()
#optimizer = optim.SGD(net.parameters(), lr=0.05)

In [14]:
#train and validate model
def train(n_epochs,model,loader,optimizer,criterion,save_path):
    
    for epoch in range(n_epochs):
        train_loss = 0
        valid_loss = 0
        
        #training 
        net.train()
        for batch, (data,target) in enumerate(loaders['train']):
            
            target = target.view(1,-1)
            print(data.shape)
            print(target.shape)
 
            #zero the gradients
            optimizer.zero_grad()
            
            #get output
            outputs = net(data)
            print(outputs.shape)
            
            #calculate loss
            loss = criterion(outputs,target)
            
            #backward prop
            loss.backward()
            
            #update parameters
            optimizer.step()
            
            #calculate training loss
            #train_loss += loss.item()
            train_loss = train_loss + ((1 / (batch + 1)) * (loss.data - train_loss))

            
            #print results
            if batch % 100 == 0:
                print("Epoch: {}, Batch: {}, Training Loss: {}".format(epoch+1, batch, train_loss))
        
        #validating
        net.eval()
        for batch, (data,target) in enumerate(loaders['valid']):
            
            #get output
            outputs = net(data)
            
            #calculate loss
            loss = criterion(outputs,target)

            #calculate training loss
            valid_loss += loss.item()
            
            #print results
            if batch % 100 == 0:
                print("Epoch: {}, Batch: {}, Validation Loss: {}".format(epoch+1, batch, valid_loss))
            
            '''       
            if valid_loss < valid_loss_min:
                torch.save(model.state_dict(), save_path)
                print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
                valid_loss_min,
                valid_loss))
                valid_loss_min = valid_loss
            ''' 
            
            #save the model
            torch.save(model.state_dict, save_path)
            
    #return net       
    print("Finished Training and Validating")

In [15]:
#train the model
n_epochs = 1
train(n_epochs,net,loaders,optimizer,criterion,'saved_model/dog_model.pt')

torch.Size([15, 3, 224, 224])
torch.Size([1, 15])
torch.Size([15, 133])


  return F.mse_loss(input, target, reduction=self.reduction)


RuntimeError: The size of tensor a (133) must match the size of tensor b (15) at non-singleton dimension 1

In [None]:
# load the model that got the best validation accuracy
#net.load_state_dict(torch.load('saved_model/dog_model.pt'))

In [46]:
#test model
def test(loader, model, criterion):

    test_loss = 0
    correct = 0
    total = 0

    for batch, (data, target) in enumerate(loaders['test']):
        # move to GPU
        #if use_cuda:
         #   data, target = data.cuda(), target.cuda()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = net(data)
        # calculate the loss
        loss = criterion(output, target)
        #test loss
        test_loss = test_loss + ((1 / (batch + 1)) * (loss.data - test_loss))
        _, predicted = torch.max(output.data, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()
            
    print('Test Loss: {:.6f}\n'.format(test_loss))

    print('\nTest Accuracy: %2d%% (%2d/%2d)' % (100. * correct / total, correct, total))

# call test function    
test(loaders, net, criterion)

Test Loss: 4.745484


Test Accuracy:  2% (18/836)
