## Combined CXE + MSE

In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix
from torch.optim import Adam
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# Setting seeds to try and ensure we have the same results - this is not guaranteed across PyTorch releases.
import torch
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [3]:
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, SubsetRandomSampler
import random
from os import listdir
from shutil import copyfile

In [4]:
import tensorflow as tf 
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from os import makedirs

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

In [6]:
from tensorflow.keras import models
from tensorflow.keras.applications import *
from tensorflow.keras import layers



## Creating directories

In [29]:
#creating directories for test and train data set
dataset_home = 'cat_vs_dog/'
subdirs = ['train/', 'test/']
for subdir in subdirs:
	# create label subdirectories
	labeldirs = ['dogs/', 'cats/']
	for labldir in labeldirs:
		newdir = dataset_home + subdir + labldir
		makedirs(newdir, exist_ok=True)

In [30]:
#load the csv file
df = pd.read_csv("cadod.csv")
df.head()


Unnamed: 0,ImageID,Source,LabelName,Confidence,XMin,XMax,YMin,YMax,IsOccluded,IsTruncated,...,IsDepiction,IsInside,XClick1X,XClick2X,XClick3X,XClick4X,XClick1Y,XClick2Y,XClick3Y,XClick4Y
0,0000b9fcba019d36,xclick,/m/0bt9lr,1,0.165,0.90375,0.268333,0.998333,1,1,...,0,0,0.63625,0.90375,0.74875,0.165,0.268333,0.506667,0.998333,0.661667
1,0000cb13febe0138,xclick,/m/0bt9lr,1,0.0,0.651875,0.0,0.999062,1,1,...,0,0,0.3125,0.0,0.3175,0.651875,0.0,0.410882,0.999062,0.999062
2,0005a9520eb22c19,xclick,/m/0bt9lr,1,0.094167,0.611667,0.055626,0.998736,1,1,...,0,0,0.4875,0.611667,0.243333,0.094167,0.055626,0.226296,0.998736,0.305942
3,0006303f02219b07,xclick,/m/0bt9lr,1,0.0,0.999219,0.0,0.998824,1,1,...,0,0,0.508594,0.999219,0.0,0.478906,0.0,0.375294,0.72,0.998824
4,00064d23bf997652,xclick,/m/0bt9lr,1,0.240938,0.906183,0.0,0.694286,0,0,...,0,0,0.678038,0.906183,0.240938,0.522388,0.0,0.37,0.424286,0.694286


In [31]:
df.LabelName.replace({'/m/01yrx':'cat', '/m/0bt9lr':'dog'}, inplace=True)

In [32]:
dog_list = df[df.LabelName == 'dog']['ImageID']
cat_list = df[df.LabelName == 'cat']['ImageID']
#list(dog_list)

In [33]:
file_name_test = []
file_name_train = []

In [34]:

# moving images to test and train folder

random.seed(10)
# define ratio of pictures to use for test
test_ratio = 0.20
count_c = 0
count_d = 0

# copy training dataset images into subdirectories
src_directory = 'cadod/'
for file in listdir(src_directory):
    #print(file.replace('.jpg','').replace('._','') in list(cat_list))
    #print(file.replace('.jpg','').replace('._','') in list(dog_list))
    src = src_directory + '/' + file
    dst_dir = 'train/'
    if random.random() < test_ratio:
        dst_dir = 'test/'
        file_name_test.append(file.replace('.jpg','').replace('._',''))
    if file.replace('.jpg','').replace('._','') in list(cat_list) and count_c < 500:
        dst = dataset_home + dst_dir + 'cats/'  + file
        count_c +=1
        copyfile(src, dst)
        file_name_train.append(file.replace('.jpg','').replace('._',''))
    elif file.replace('.jpg','').replace('._','') in list(dog_list) and count_d < 500:
        dst = dataset_home + dst_dir + 'dogs/'  + file
        count_d +=1
        copyfile(src, dst)
        file_name_train.append(file.replace('.jpg','').replace('._',''))

In [35]:
train_id = pd.DataFrame (file_name_train, columns = ['ImageID'])
train_id.head()

Unnamed: 0,ImageID
0,2b55a824f4a375d3
1,bc26925fd646efe5
2,f8fccbefa2e8e33f
3,9c730899f38007cc
4,01901b6370020f3c


In [36]:
test_id = pd.DataFrame (file_name_test, columns = ['ImageID'])
test_id.head()

Unnamed: 0,ImageID
0,d00eb685487904b0
1,ddfc5237d20952a7
2,e60d548f2f124a01
3,f4add7bb2ee11f8d
4,8054527db8754ab1


In [37]:
df.ImageID.astype('O')
train_id.ImageID.astype('O')
df_n = df[['ImageID','XMin', 'YMin', 'XMax', 'YMax']]
df_n.set_index('ImageID')
train_id.set_index('ImageID')
train_id_n = df_n.join(train_id, how = 'left', lsuffix = '_left', rsuffix = '_right')
train_id_n.drop(columns = ['ImageID_right'], inplace = True)
train_id_n.head(5)

Unnamed: 0,ImageID_left,XMin,YMin,XMax,YMax
0,0000b9fcba019d36,0.165,0.268333,0.90375,0.998333
1,0000cb13febe0138,0.0,0.0,0.651875,0.999062
2,0005a9520eb22c19,0.094167,0.055626,0.611667,0.998736
3,0006303f02219b07,0.0,0.0,0.999219,0.998824
4,00064d23bf997652,0.240938,0.0,0.906183,0.694286


In [38]:
df.ImageID.astype('O')
test_id.ImageID.astype('O')
#df.set_index('ImageID')
test_id.set_index('ImageID')
test_id_n = df_n.join(test_id, how = 'left', lsuffix = '_left', rsuffix = '_right')
test_id_n.drop(columns = ['ImageID_right'], inplace = True)
test_id_n.head(5)

Unnamed: 0,ImageID_left,XMin,YMin,XMax,YMax
0,0000b9fcba019d36,0.165,0.268333,0.90375,0.998333
1,0000cb13febe0138,0.0,0.0,0.651875,0.999062
2,0005a9520eb22c19,0.094167,0.055626,0.611667,0.998736
3,0006303f02219b07,0.0,0.0,0.999219,0.998824
4,00064d23bf997652,0.240938,0.0,0.906183,0.694286


In [39]:
expLog = pd.DataFrame(columns=["exp_name", 
                               "Train Loss", 
                               "Valid Loss",
                               "Test Loss",
                              ])

### Image Classification

In [40]:
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

transform_test = transforms.Compose([
    #transforms.ToPILImage(),             
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
    ])
transform_train = transforms.Compose([
    #transforms.ToPILImage(),             
    transforms.Resize((128, 128)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(40),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
    #transforms.RandomAutocontrast()
    
    ])

In [41]:
train_it = datasets.ImageFolder('cat_vs_dog/train/', transform=transform_train)
test_it = datasets.ImageFolder('cat_vs_dog/test/', transform=transform_test)

dataset_size = len(train_it)
dataset_indices = list(range(dataset_size))
np.random.shuffle(dataset_indices)

In [42]:
dataset_size

800

In [43]:
idx2class = {v: k for k, v in train_it.class_to_idx.items()}
idx2class

{0: 'cats', 1: 'dogs'}

In [44]:
dataset_size = len(train_it)
dataset_indices = list(range(dataset_size))
#dataset_indices[val_split_index:]

In [45]:
np.random.shuffle(dataset_indices)

In [46]:
val_split_index = int(np.floor(0.2 * dataset_size))

In [47]:
train_idx, val_idx = dataset_indices[val_split_index:], dataset_indices[:val_split_index]

In [48]:
train_sampler = SubsetRandomSampler(train_idx)
val_sampler = SubsetRandomSampler(val_idx)

In [50]:
bs_train = 16
bs_test = 4
bs_valid = 8
trainloader = DataLoader(dataset=train_it, shuffle=False, batch_size=bs_train, sampler=train_sampler)
valloader = DataLoader(dataset=train_it, shuffle=False, batch_size=bs_valid, sampler=val_sampler)
testloader = DataLoader(test_it, batch_size=bs_test, shuffle=False)

In [51]:
y_box_train = train_id_n[val_split_index:]
y_box_val =  train_id_n[:val_split_index]
y_box_val.shape

(160, 5)

In [52]:
for images, labels in trainloader:
    print(images.size(), labels.size())
    print(labels)
    break

torch.Size([16, 3, 128, 128]) torch.Size([16])
tensor([1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1])


In [53]:
for images, labels in valloader:
    print(images.size(), labels.size())
    print(labels)
    break

torch.Size([8, 3, 128, 128]) torch.Size([8])
tensor([0, 0, 0, 0, 0, 1, 0, 0])


In [54]:
import numpy as np
X = np.load('data/img.npy', allow_pickle=True)
y_label = np.load('data/y_label.npy', allow_pickle=True)
y_bbox = np.load('data/y_bbox.npy', allow_pickle=True)

In [55]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [56]:
y_train_tensor = torch.from_numpy(y_box_train[['XMin', 'YMin', 'XMax', 'YMax']].to_numpy())
y_val_tensor = torch.from_numpy(y_box_val[['XMin', 'YMin', 'XMax', 'YMax']].to_numpy())
y_test_tensor = torch.from_numpy(test_id_n[['XMin', 'YMin', 'XMax', 'YMax']].to_numpy())
#y_val_tensor

In [58]:
from torch.optim import Adam
#defining neural network layers
class cadod_c(nn.Module): 
    def __init__(self):
        super(cadod_c, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=10, kernel_size=3)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=3)
        self.conv2_drop = nn.Dropout2d(0.1)
        self.fc1 = nn.Linear(18000, 400)
        self.fc2 = nn.Linear(400, 2)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(x.shape[0],-1)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return x



model_c = cadod_c()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_c.parameters(), lr=0.0002, weight_decay = 3e-3)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones = [500,1000,1500], gamma = 0.5)

In [60]:
#regression neural network
class cadod_r(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3)
        self.pool1 = nn.MaxPool2d(2, 2)

        self.conv2 = nn.Conv2d(in_channels=64, out_channels=32, kernel_size=3)
        self.pool2 = nn.MaxPool2d(2, 2)

        self.conv3 = nn.Conv2d(in_channels=32, out_channels=16, kernel_size=3)
        self.pool3 = nn.MaxPool2d(2, 2)

        self.fc1 = nn.Linear(in_features=14*14*16, out_features=64)
        self.fc3 = nn.Linear(in_features=64, out_features=32)
        self.fc5 = nn.Linear(in_features=32, out_features=4) 

  
    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = self.pool3(F.relu(self.conv3(x)))

        x = nn.Flatten()(x)

        x = F.relu(self.fc1(x))
       
    
        x = F.relu(self.fc3(x))
       
        r = self.fc5(x)
       
        return r
    
model_r = cadod_r()
# MSE loss scaffolding layer
loss_fn = torch.nn.MSELoss() 
optimizer = torch.optim.Adam(model_r.parameters(), lr=0.0005, weight_decay = 3e-4)

In [61]:
accuracy_stats = {
    'train': [],
    "val": []
}
loss_stats = {
    'train': [],
    "val": []
}

In [62]:
def binary_acc(y_pred, y_test):
    y_pred_tag = torch.log_softmax(y_pred, dim = 1)
    _, y_pred_tags = torch.max(y_pred_tag, dim = 1)    
    correct_results_sum = (y_pred_tags == y_test).sum().float()    
    acc = correct_results_sum/y_test.shape[0]
    acc = torch.round(acc * 100)    
    return acc

In [64]:
from torch.utils.tensorboard import SummaryWriter
import numpy as np

writer = SummaryWriter()

In [66]:
model_c = model_c.to(device)
model_r = model_r.to(device)


num_epochs = 50


for e in range(num_epochs):
    cum_epoch_loss = 0
    cum_acc = 0
    batch_loss = 0
    mse_loss = 0
    train = 0
    test = 0
    val = 0
    model_c.train()
    model_r.train()
    #Training the model
    for batch, (images, labels) in enumerate(trainloader,1):
        
        images = images.to(device)
        labels = labels.to(device)
        #images_2 = images_2.to(device)
        
        bbox = y_train_tensor[train:train+bs_train].to(device)
        train +=bs_train
        # Clear gradient buffers because we don't want any gradient from previous epoch to carry forward, dont want to cummulate gradients
        optimizer.zero_grad()
        label_pred = model_c(images).squeeze()  #training the classifier model
        box_pred = model_r(images)  #training the regressor model
        loss_1 = criterion(label_pred, labels) #CXE loss
        acc = binary_acc(label_pred, labels)
        loss_2 = loss_fn(box_pred, torch.unsqueeze(bbox.float(), dim=1)) #MSE
        loss = loss_1 + loss_2 #combined loss
        loss.backward() #backpropagating loss
        optimizer.step()  #gradient update
        batch_loss += loss.item()
        cum_acc += acc.item()
        
        scheduler.step()
        #print(f'Epoch({e}/{num_epochs} : Batch number({batch}/{len(trainloader)})')
    
    #Evaluating the model on validation set
    with torch.no_grad():
        model_c.eval()
        model_r.eval()
        val_epoch_loss = 0
        val_epoch_acc = 0
        
        val = 0
        for batch, (X_val_batch, y_val_batch) in enumerate(valloader,1):
            X_val_batch, y_val_batch = X_val_batch.to(device), y_val_batch.to(device)
            y_box_val = y_val_tensor[val:val+bs_valid].to(device)
            
            y_val_pred = model_c(X_val_batch).squeeze()
            y_box_pred = model_r(X_val_batch).squeeze()
                        
            val_loss = criterion(y_val_pred, y_val_batch)
            val_acc = binary_acc(y_val_pred, y_val_batch) 
            
            mse_loss = loss_fn(y_box_pred, torch.unsqueeze(y_box_val.float(), dim=1))
            
            val_epoch_loss += val_loss.item() + mse_loss.item()
            val_epoch_acc += val_acc.item()
            val += bs_valid
            
     #saving the results for plotting   
    loss_stats['train'].append(batch_loss/len(trainloader))
    loss_stats['val'].append(val_epoch_loss/len(valloader))
    accuracy_stats['train'].append(cum_acc/len(trainloader))
    accuracy_stats['val'].append(val_epoch_acc/len(valloader))
    
    print(f'Epoch({e}/{num_epochs})')
    print(f'Training loss : {batch_loss/len(trainloader)}')  
    print(f'Training accuracy : {cum_acc/len(trainloader)}')  
    print(f'Validation loss : {val_epoch_loss/len(valloader)}')  
    print(f'Validation accuracy : {val_epoch_acc/len(valloader)}') 
    writer.add_scalars('CXE + MSE Loss', {'Training': np.round(batch_loss/len(trainloader), 3),
                      'Validation': np.round(val_epoch_loss/len(valloader), 3),}, e)
    
writer.close()   

Epoch(0/50)
Training loss : 0.7118749812245369
Training accuracy : 49.35
Validation loss : 0.7100486849900335
Validation accuracy : 51.95
Epoch(1/50)
Training loss : 0.7090466111898422
Training accuracy : 48.6
Validation loss : 0.7075532027520239
Validation accuracy : 51.75
Epoch(2/50)
Training loss : 0.7081201106309891
Training accuracy : 52.5
Validation loss : 0.704474622849375
Validation accuracy : 51.9
Epoch(3/50)
Training loss : 0.7084089532494545
Training accuracy : 49.725
Validation loss : 0.7061632547527552
Validation accuracy : 51.9
Epoch(4/50)
Training loss : 0.7080016046762466
Training accuracy : 49.575
Validation loss : 0.7052379011176526
Validation accuracy : 51.9
Epoch(5/50)
Training loss : 0.7057248204946518
Training accuracy : 50.7
Validation loss : 0.705911069130525
Validation accuracy : 51.9
Epoch(6/50)
Training loss : 0.706674014031887
Training accuracy : 50.5
Validation loss : 0.7070628259563818
Validation accuracy : 51.7
Epoch(7/50)
Training loss : 0.70575727671384

In [67]:
%load_ext tensorboard

In [68]:
%tensorboard --logdir=runs

In [69]:
#Getting testing loss
y_pred_list = []
y_true_list = []
#model.eval()
#with torch.no_grad():
with torch.no_grad():
        model_c.eval()
        model_r.eval()
        test_epoch_loss = 0
        test_epoch_acc = 0
        #val_mse_loss = 0
        test = 0
        for batch, (X_test_batch, y_test_batch) in enumerate(testloader,1):
            X_test_batch, y_test_batch = X_test_batch.to(device), y_test_batch.to(device)
            y_box_test = y_test_tensor[test:test+bs_valid].to(device)
            #print(y_box_val)
            y_test_pred = model_c(X_test_batch).squeeze()
            y_box_pred = model_r(X_test_batch).squeeze()
            #y_val_pred = torch.unsqueeze(y_val_pred, 0)            
            test_loss = criterion(y_test_pred, y_test_batch)
            test_acc = binary_acc(y_test_pred, y_test_batch) 
            #print(y_box_val)
            #print(y_box_pred)
            mse_loss = loss_fn(y_box_pred, torch.unsqueeze(y_box_test.float(), dim=1))
            #print(val_loss.item(),mse_loss.item())
            test_epoch_loss += test_loss.item() + mse_loss.item()
            test_epoch_acc += test_acc.item()
            test += bs_valid
            
        

        #print(f'Epoch({e}/{num_epochs})')
        print(f'Test loss : {test_epoch_loss/len(testloader)}')  
          
        

  return F.mse_loss(input, target, reduction=self.reduction)


Test loss : 0.7089972050581127
