The model found during the base model finding phase. The future models will be compared to this.

In [2]:
# Organize data for pytorch DataLoader
import os
import shutil
import random
import torch

DATA_DIR = '../../dl2021-image-corpus-proj/'
# DATA_DIR = '/Users/hartih/Documents/School/Deep learning/Final_project/dl2021-image-corpus-proj/'
ANNOTATIONS_DIR = DATA_DIR + 'annotations/'
IMAGES_DIR = DATA_DIR + 'images/'

# New fodlers for train, test, and dev sets
TRAIN_DIR = DATA_DIR + 'train/'
DEV_DIR = DATA_DIR + 'dev/'
TEST_DIR = DATA_DIR + 'test/'

annotations = ["baby",
               "bird",
               "car",
               "clouds",
               "dog",
               "female",
               "flower",
               "male",
               "night",
               "people",
               "portrait",
               "river",
               "sea",
               "tree"]

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Create labels for images
image_file_names = os.listdir(IMAGES_DIR)
dict_labels = {}
for image_file_name in image_file_names:  # Initiate label tensors
    if os.path.isfile(IMAGES_DIR + image_file_name):
        dict_labels[image_file_name] = torch.zeros(14)
for i in range(len(annotations)):  # Fill label tensors with 1's if found in one of the annotations text files
    with open(ANNOTATIONS_DIR + annotations[i] + ".txt") as f:
        for row in f:
            row = "im" + row.strip() + ".jpg"
            dict_labels[row][i] = 1

In [4]:
dict_labels

{'im1976.jpg': tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]),
 'im12710.jpg': tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]),
 'im14361.jpg': tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]),
 'im17668.jpg': tensor([0., 0., 0., 0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0.]),
 'im10107.jpg': tensor([0., 0., 0., 0., 0., 0., 0., 1., 0., 1., 1., 0., 0., 0.]),
 'im11219.jpg': tensor([0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]),
 'im19645.jpg': tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]),
 'im16576.jpg': tensor([0., 0., 0., 0., 0., 0., 0., 1., 0., 1., 1., 0., 0., 0.]),
 'im19123.jpg': tensor([0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 1., 0., 0., 0.]),
 'im16210.jpg': tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.]),
 'im10661.jpg': tensor([0., 0., 0., 0., 0., 1., 0., 1., 0., 1., 0., 0., 0., 0.]),
 'im3807.jpg': tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]),
 'im8952.jpg': ten

In [5]:
# Split the data to train, test, and dev
os.makedirs(TRAIN_DIR)
os.makedirs(TEST_DIR)
os.makedirs(DEV_DIR)
for image_file_name in image_file_names:
    if os.path.isfile(IMAGES_DIR + image_file_name):
        division = random.randint(1, 4)
        if division == 1 or division == 2:
            shutil.copyfile(IMAGES_DIR + image_file_name, TRAIN_DIR + image_file_name)
        if division == 3:
            shutil.copyfile(IMAGES_DIR + image_file_name, DEV_DIR + image_file_name)
        if division == 4:
            shutil.copyfile(IMAGES_DIR + image_file_name, TEST_DIR + image_file_name)

In [6]:
print( len(os.listdir(TEST_DIR)) + len(os.listdir(TRAIN_DIR)) + len( os.listdir(DEV_DIR)) == len(os.listdir(IMAGES_DIR)) )
print(len(os.listdir(TEST_DIR)))
print(len(os.listdir(TRAIN_DIR)))
print(len(os.listdir(DEV_DIR)))
print(len(os.listdir(IMAGES_DIR)))


True
6716
6705
6579
20000


In [7]:
import torch.utils.data
from PIL import Image

# Enable creating train, test, and dev test datasets for PyTorch
class myDataset(torch.utils.data.Dataset):
    def __init__(self, root_dir=IMAGES_DIR, transform=None):
        self.transform = transform
        self.root_dir = root_dir
        self.images = [root_dir + img for img in os.listdir(root_dir)]                
      
    def __len__(self):
        return len(self.images)       

    def __getitem__(self, idx):
        img_path = self.images[idx]
        img = img = Image.open(img_path)     
       
        if self.transform:
            img = self.transform(img)     

        return img, dict_labels[img_path.split("/")[-1]]

In [8]:
from torchvision import transforms


train_transform = transforms.Compose([
                                        transforms.Grayscale(num_output_channels=3),
                                        transforms.RandomCrop(128),
                                        transforms.RandomHorizontalFlip(),
                                        transforms.RandomRotation(2),
                                        transforms.ToTensor()])
test_transform = transforms.Compose([
                                        transforms.Grayscale(num_output_channels=3),
                                        transforms.ToTensor()])

# Create datasets for CNN
train_set = myDataset(TRAIN_DIR, transform=train_transform)
test_set = myDataset(TEST_DIR, transform=train_transform)
dev_set = myDataset(DEV_DIR, transform=test_transform)

In [9]:
import torch 

# Calculate the amount of correctly predicted as well as total predictions done
def calc_correct(pred: torch.Tensor, target: torch.Tensor):
    pred = torch.sigmoid(pred)  # Since our neural network does not apply sigmoid
    correct_dict = {'tot': [0,0]}  # First number in value is correct ones, the second one is total amount
    correct_dict['tot_strict'] = [0,0] # All correct
    for i in range(len(pred)): # [100,14]
        all_correct = 0
        total = 0
        # estim_pred_array = []
        for j in range(len(pred[i])):
            estim_pred = 0 if float(pred[i][j]) < 0.5 else 1
            # estim_pred_array.append(estim_pred)
            if annotations[j] not in correct_dict.keys():
                correct_dict[annotations[j]] = [0,0]
            correct_dict['tot'][1] += 1
            correct_dict[annotations[j]][1] += 1
            correct_dict['tot'][0] += int(estim_pred == target[i][j])
            correct_dict[annotations[j]][0] += int(estim_pred == target[i][j])
            all_correct += int(estim_pred == target[i][j])
            total += 1
        correct_dict['tot_strict'][1] += 1
        correct_dict['tot_strict'][0] += int(total==all_correct)
    # To-do how many pictures were entirely correct (accuracy)
    return correct_dict


In [10]:
def class_evaluation(pred: torch.Tensor, target: torch.Tensor):
    pred = torch.sigmoid(pred)
    
    true_positive = 0
    false_positive = 0
    true_negative = 0
    false_negative = 0
    negative = 0
    positive = 0
    
    for i in range(len(pred)): # [100,14]
        for j in range(len(pred[i])):
            estim_pred = 0 if float(pred[i][j]) < 0.5 else 1
            
            # Negative target values
            if target[i][j] == 0:
                negative += 1
                if estim_pred == 1:
                    false_positive += 1
                if estim_pred == 0:
                    true_negative += 1
            
            # Positive target values
            if target[i][j] == 1:
                positive += 1
                if estim_pred == 1:
                    true_positive += 1
                if estim_pred == 0:
                    false_negative += 1
    
    result = {"true_positive": true_positive,
            "false_positive": false_positive,
            "true_negative": true_negative,
            "false_negative": false_negative,
            "negative": negative,
            "positive": positive}
                    
    return result

In [11]:
def class_evaluation_by_annotation(pred: torch.Tensor, target: torch.Tensor):
    
    pred = torch.sigmoid(pred)
    
    # Initiate vslues for every annotation
    eval_dict = {}
    for a in range(len(annotations)):
        eval_dict[annotations[a]] = {"true_positive": 0,
                                        "false_positive": 0,
                                        "true_negative": 0,
                                        "false_negative": 0,
                                        "negative": 0,
                                        "positive": 0}
    
    for i in range(len(pred)):
        for j in range(len(pred[i])):
            estim_pred = 0 if float(pred[i][j]) < 0.5 else 1
            
            # Negative target values
            if target[i][j] == 0:
                eval_dict[annotations[j]]["negative"] += 1
                if estim_pred == 1:
                    eval_dict[annotations[j]]["false_positive"] += 1
                if estim_pred == 0:
                    eval_dict[annotations[j]]["true_negative"] += 1
            
            # Positive target values
            if target[i][j] == 1:
                eval_dict[annotations[j]]["positive"] += 1
                if estim_pred == 1:
                    eval_dict[annotations[j]]["true_positive"] += 1
                if estim_pred == 0:
                    eval_dict[annotations[j]]["false_negative"] += 1
                    
    return eval_dict

In [12]:
import torch
import torch.optim as optim
import torch.utils.data
import torch.backends.cudnn as cudnn
import torchvision
from torchvision import transforms, datasets, models
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import math


#--- hyperparameters ---
N_EPOCHS = 15
BATCH_SIZE_TRAIN = 100
BATCH_SIZE_TEST = 100
LR = 0.001
WEIGHT_DECAY = 0.00
MOMENTUM = 0.0


#--- fixed constants ---
NUM_CLASSES = 14
NUM_CHANNELS = 3



# --- Dataset initialization ---

# We transform image files' contents to tensors
# Plus, we can add random transformations to the training data if we like
# Think on what kind of transformations may be meaningful for this data.
# Eg., horizontal-flip is definitely a bad idea for sign language data.
# You can use another transformation here if you find a better one.


# Create Pytorch data loaders
train_loader = torch.utils.data.DataLoader(dataset=train_set, batch_size=BATCH_SIZE_TRAIN, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_set, batch_size=BATCH_SIZE_TEST, shuffle=False)
dev_loader = torch.utils.data.DataLoader(dataset=dev_set, shuffle=False)

alexnet = models.alexnet(pretrained=True)

#--- model ---
class CNN(nn.Module):
    def __init__(self, num_classes=NUM_CLASSES):
        super(CNN, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(NUM_CHANNELS, 32, kernel_size = 3, padding = 1),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size = 3, padding = 1),
            nn.ReLU(),
            nn.MaxPool2d(2,2),

            nn.Conv2d(64, 128, kernel_size = 3, padding = 1),
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size = 3, padding = 1),
            nn.ReLU(),
            nn.MaxPool2d(2,2),

            nn.Conv2d(128, 256, kernel_size = 3, padding = 1),
            nn.ReLU(),
            nn.Conv2d(256,256, kernel_size = 3, padding = 1),
            nn.ReLU(),
            nn.MaxPool2d(2,2)
        )
        self.flatten = nn.Flatten()
        self.lin = nn.Sequential(
            nn.Linear(65536, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.conv(x)
        x = self.flatten(x)
        x = self.lin(x)
        return x



#--- set up ---
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

model = CNN().to(device)


optimizer = optim.Adam(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
# optimizer = optim.SGD(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY, momentum=MOMENTUM)
loss_function = nn.BCEWithLogitsLoss()

dev_loss = math.inf
dev_losses = []
dev_accuracies = []
stop_early = False

#--- set up ---
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

model = CNN().to(device)


optimizer = optim.Adam(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
loss_function = nn.BCEWithLogitsLoss()

dev_loss = math.inf
dev_losses = []
dev_accuracies = []
num_to_early_stop = 5  # Compares the current loss to loss we had before, this defines how far back we look  
stop_early = False
num_same_dev = 1

#--- training ---
for epoch in range(N_EPOCHS):
    if stop_early:
        break
    train_loss = 0
    train_correct = {annotation: [0,0] for annotation in annotations}
    train_correct['tot'] = [0,0]
    train_correct['tot_strict'] = [0,0]
    evaluation = {"true_positive": 0,
                    "false_positive": 0,
                    "true_negative": 0,
                    "false_negative": 0,
                    "negative": 0,
                    "positive": 0}
    total = 0

    for batch_num, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        pred = model(data)
        loss = loss_function(pred, target)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total += len(data)
        train_loss += loss.item()
        new_correct = calc_correct(pred, target)
        for annotation in annotations:
            new = new_correct[annotation]
            train_correct[annotation][0] += new[0]
            train_correct[annotation][1] += new[1]
        train_correct['tot'][0] += new_correct['tot'][0]
        train_correct['tot'][1] += new_correct['tot'][1]
        train_correct['tot_strict'][0] += new_correct['tot_strict'][0]
        train_correct['tot_strict'][1] += new_correct['tot_strict'][1]
        
        evaluations = class_evaluation(pred, target)
        evaluation["true_positive"] += evaluations["true_positive"]
        evaluation["false_positive"] += evaluations["false_positive"]
        evaluation["true_negative"] += evaluations["true_negative"]
        evaluation["false_negative"] += evaluations["false_negative"]
        evaluation["positive"] += evaluations["positive"]
        evaluation["negative"] += evaluations["negative"]

        print("------------------------")
        print('Training: Epoch %d - Batch %d/%d: Loss: %.4f | Train Acc: %.3f%% (%d/%d) | Strict Acc: %.3f%% (%d/%d)' % 
              (epoch+1, batch_num+1, len(train_loader), train_loss / (batch_num + 1), 
               100. * train_correct['tot'][0] / train_correct['tot'][1], train_correct['tot'][0], train_correct['tot'][1],
               100. * train_correct['tot_strict'][0] / train_correct['tot_strict'][1], train_correct['tot_strict'][0], train_correct['tot_strict'][1]))
        print('True positive rate: %.3f%% (%d/%d)' % 
              (100. * evaluation["true_positive"] / evaluation["positive"], evaluation["true_positive"], evaluation["positive"]) )
        print('False negative rate: %.3f%% (%d/%d)' % 
              (100. * evaluation["false_negative"] / evaluation["positive"], evaluation["false_negative"], evaluation["positive"]) )
        print('True negative rate: %.3f%% (%d/%d)' % 
              (100. * evaluation["true_negative"] / evaluation["negative"], evaluation["true_negative"], evaluation["negative"]) )
        print('False positive rate: %.3f%% (%d/%d)' % 
              (100. * evaluation["false_positive"] / evaluation["negative"], evaluation["false_positive"], evaluation["negative"]) )
        print("------------------------")

    cur_dev_loss = 0
    dev_correct = {annotation: [0,0] for annotation in annotations}
    dev_correct['tot'] = [0,0]
    dev_correct['tot_strict'] = [0,0]
    evaluation = {"true_positive": 0,
                    "false_positive": 0,
                    "true_negative": 0,
                    "false_negative": 0,
                    "negative": 0,
                    "positive": 0}

    with torch.no_grad():
        for batch_num, (data, target) in enumerate(dev_loader):
            data, target = data.to(device), target.to(device)
            pred = model(data)
            loss = loss_function(pred, target)

            cur_dev_loss += loss.item()
            new_dev_correct = calc_correct(pred, target)
            for annotation in annotations:
                new = new_dev_correct[annotation]
                dev_correct[annotation][0] += new[0]
                dev_correct[annotation][1] += new[1]
            dev_correct['tot'][0] += new_dev_correct['tot'][0]
            dev_correct['tot'][1] += new_dev_correct['tot'][1]
            dev_correct['tot_strict'][0] += new_dev_correct['tot_strict'][0]
            dev_correct['tot_strict'][1] += new_dev_correct['tot_strict'][1]
            
            evaluations = class_evaluation(pred, target)
            evaluation["true_positive"] += evaluations["true_positive"]
            evaluation["false_positive"] += evaluations["false_positive"]
            evaluation["true_negative"] += evaluations["true_negative"]
            evaluation["false_negative"] += evaluations["false_negative"]
            evaluation["positive"] += evaluations["positive"]
            evaluation["negative"] += evaluations["negative"]

        current_loss = cur_dev_loss / (len(dev_loader) + 1)
        dev_losses.append(current_loss)
        current_accuracy = {annotation: 100. * dev_correct[annotation][0] / dev_correct[annotation][1]  for annotation in annotations}  # Accuracies for all classes 
        current_accuracy['tot'] = 100. * dev_correct['tot'][0] / dev_correct['tot'][1]
        current_accuracy['tot_strict'] = 100. * dev_correct['tot_strict'][0] / dev_correct['tot_strict'][1]
        dev_accuracies.append(current_accuracy)

        if current_loss <= dev_loss:
            if current_loss == dev_loss:
                num_same_dev += 1
            else:
                num_same_dev = 1
            dev_loss = current_loss
            if num_same_dev >= 5:  # If dev does not improve, stop doing epochs
                stop_early = True
        # else:
        #     stop_early = True
        if len(dev_losses) >= 5 and dev_losses[-5]<dev_losses[-1]:
            stop_early = True

        print("------------------------")
        print('Evaluating: Batch %d/%d: Loss: %.4f | Dev Acc: %.3f%% (%d/%d) | Strict Acc: %.3f%% (%d/%d)' % 
            (batch_num+1, len(dev_loader), cur_dev_loss / (len(dev_loader) + 1), 
            100. * dev_correct['tot'][0] / dev_correct['tot'][1], dev_correct['tot'][0], dev_correct['tot'][1],
            100. * dev_correct['tot_strict'][0] / dev_correct['tot_strict'][1], dev_correct['tot_strict'][0], dev_correct['tot_strict'][1]))
        print('True positive rate: %.3f%% (%d/%d)' % 
              (100. * evaluation["true_positive"] / evaluation["positive"], evaluation["true_positive"], evaluation["positive"]) )
        print('False negative rate: %.3f%% (%d/%d)' % 
              (100. * evaluation["false_negative"] / evaluation["positive"], evaluation["false_negative"], evaluation["positive"]) )
        print('True negative rate: %.3f%% (%d/%d)' % 
              (100. * evaluation["true_negative"] / evaluation["negative"], evaluation["true_negative"], evaluation["negative"]) )
        print('False positive rate: %.3f%% (%d/%d)' % 
              (100. * evaluation["false_positive"] / evaluation["negative"], evaluation["false_positive"], evaluation["negative"]) )
        print("------------------------")


------------------------
Training: Epoch 1 - Batch 1/68: Loss: 0.6954 | Train Acc: 51.071% (715/1400)
True positive rate: 57.009% (61/107)
False negative rate: 42.991% (46/107)
True negative rate: 50.580% (654/1293)
False positive rate: 49.420% (639/1293)
------------------------
------------------------
Training: Epoch 1 - Batch 2/68: Loss: 0.5907 | Train Acc: 71.821% (2011/2800)
True positive rate: 28.910% (61/211)
False negative rate: 71.090% (150/211)
True negative rate: 75.319% (1950/2589)
False positive rate: 24.681% (639/2589)
------------------------
------------------------
Training: Epoch 1 - Batch 3/68: Loss: 0.5769 | Train Acc: 78.667% (3304/4200)
True positive rate: 19.182% (61/318)
False negative rate: 80.818% (257/318)
True negative rate: 83.539% (3243/3882)
False positive rate: 16.461% (639/3882)
------------------------
------------------------
Training: Epoch 1 - Batch 4/68: Loss: 0.5284 | Train Acc: 80.125% (4487/5600)
True positive rate: 26.386% (119/451)
False nega

------------------------
Training: Epoch 1 - Batch 30/68: Loss: 0.2849 | Train Acc: 89.624% (37642/42000)
True positive rate: 11.192% (340/3038)
False negative rate: 88.808% (2698/3038)
True negative rate: 95.739% (37302/38962)
False positive rate: 4.261% (1660/38962)
------------------------
------------------------
Training: Epoch 1 - Batch 31/68: Loss: 0.2829 | Train Acc: 89.712% (38935/43400)
True positive rate: 10.811% (340/3145)
False negative rate: 89.189% (2805/3145)
True negative rate: 95.876% (38595/40255)
False positive rate: 4.124% (1660/40255)
------------------------
------------------------
Training: Epoch 1 - Batch 32/68: Loss: 0.2803 | Train Acc: 89.839% (40248/44800)
True positive rate: 10.520% (340/3232)
False negative rate: 89.480% (2892/3232)
True negative rate: 96.007% (39908/41568)
False positive rate: 3.993% (1660/41568)
------------------------
------------------------
Training: Epoch 1 - Batch 33/68: Loss: 0.2782 | Train Acc: 89.920% (41543/46200)
True positiv

------------------------
Training: Epoch 1 - Batch 58/68: Loss: 0.2515 | Train Acc: 91.086% (73962/81200)
True positive rate: 5.745% (340/5918)
False negative rate: 94.255% (5578/5918)
True negative rate: 97.795% (73622/75282)
False positive rate: 2.205% (1660/75282)
------------------------
------------------------
Training: Epoch 1 - Batch 59/68: Loss: 0.2506 | Train Acc: 91.130% (75273/82600)
True positive rate: 5.660% (340/6007)
False negative rate: 94.340% (5667/6007)
True negative rate: 97.833% (74933/76593)
False positive rate: 2.167% (1660/76593)
------------------------
------------------------
Training: Epoch 1 - Batch 60/68: Loss: 0.2499 | Train Acc: 91.160% (76574/84000)
True positive rate: 5.568% (340/6106)
False negative rate: 94.432% (5766/6106)
True negative rate: 97.869% (76234/77894)
False positive rate: 2.131% (1660/77894)
------------------------
------------------------
Training: Epoch 1 - Batch 61/68: Loss: 0.2493 | Train Acc: 91.184% (77871/85400)
True positive r

In [13]:
#--- test ---
test_loss = 0
test_correct = {annotation: [0,0] for annotation in annotations}
test_correct['tot'] = [0,0]
test_correct['tot_strict'] = [0,0]
evaluation = {"true_positive": 0,
                    "false_positive": 0,
                    "true_negative": 0,
                    "false_negative": 0,
                    "negative": 0,
                    "positive": 0}

evaluation_by_annotation = {}
for a in range(len(annotations)):
    evaluation_by_annotation[annotations[a]] = {"true_positive": 0,
                                                "false_positive": 0,
                                                "true_negative": 0,
                                                "false_negative": 0,
                                                "negative": 0,
                                                "positive": 0}

with torch.no_grad():
    for batch_num, (data, target) in enumerate(test_loader):
        data, target = data.to(device), target.to(device)
        pred = model(data)
        loss = loss_function(pred, target)

        test_loss += loss.item()
        new_test_correct = calc_correct(pred, target)
        for annotation in annotations:
            new = new_test_correct[annotation]
            test_correct[annotation][0] += new[0]
            test_correct[annotation][1] += new[1]
        test_correct['tot'][0] += new_test_correct['tot'][0]
        test_correct['tot'][1] += new_test_correct['tot'][1]
        test_correct['tot_strict'][0] += new_test_correct['tot_strict'][0]
        test_correct['tot_strict'][1] += new_test_correct['tot_strict'][1]
        
        evaluations = class_evaluation(pred, target)
        evaluation["true_positive"] += evaluations["true_positive"]
        evaluation["false_positive"] += evaluations["false_positive"]
        evaluation["true_negative"] += evaluations["true_negative"]
        evaluation["false_negative"] += evaluations["false_negative"]
        evaluation["positive"] += evaluations["positive"]
        evaluation["negative"] += evaluations["negative"]
        
        evaluations_by_annotation = class_evaluation_by_annotation(pred, target)
        print(evaluation_by_annotation)
        
        for a in range(len(annotations)):
            evaluation_by_annotation[annotations[a]]["true_positive"] += evaluations_by_annotation[annotations[a]]["true_positive"]
            evaluation_by_annotation[annotations[a]]["false_positive"] += evaluations_by_annotation[annotations[a]]["false_positive"]
            evaluation_by_annotation[annotations[a]]["true_negative"] += evaluations_by_annotation[annotations[a]]["true_negative"]
            evaluation_by_annotation[annotations[a]]["false_negative"] += evaluations_by_annotation[annotations[a]]["false_negative"]
            evaluation_by_annotation[annotations[a]]["positive"] += evaluations_by_annotation[annotations[a]]["positive"]
            evaluation_by_annotation[annotations[a]]["negative"] += evaluations_by_annotation[annotations[a]]["negative"]
        
        print("------------------------")
        print('Evaluating: Batch %d/%d: Loss: %.4f | Test Acc: %.3f%% (%d/%d) | Strict Acc: %.3f%% (%d/%d)' % 
              (batch_num+1, len(test_loader), test_loss / (batch_num + 1), 
               100. * test_correct['tot'][0] / test_correct['tot'][1], test_correct['tot'][0], test_correct['tot'][1],
               100. * test_correct['tot_strict'][0] / test_correct['tot_strict'][1], test_correct['tot_strict'][0], test_correct['tot_strict'][1]))
        print('True positive rate: %.3f%% (%d/%d)' % 
              (100. * evaluation["true_positive"] / evaluation["positive"], evaluation["true_positive"], evaluation["positive"]) )
        print('False negative rate: %.3f%% (%d/%d)' % 
              (100. * evaluation["false_negative"] / evaluation["positive"], evaluation["false_negative"], evaluation["positive"]) )
        print('True negative rate: %.3f%% (%d/%d)' % 
              (100. * evaluation["true_negative"] / evaluation["negative"], evaluation["true_negative"], evaluation["negative"]) )
        print('False positive rate: %.3f%% (%d/%d)' % 
              (100. * evaluation["false_positive"] / evaluation["negative"], evaluation["false_positive"], evaluation["negative"]) )
        print("------------------------")
        
print(dev_losses)
print(dev_accuracies)

{'baby': {'true_positive': 0, 'false_positive': 0, 'true_negative': 0, 'false_negative': 0, 'negative': 0, 'positive': 0}, 'bird': {'true_positive': 0, 'false_positive': 0, 'true_negative': 0, 'false_negative': 0, 'negative': 0, 'positive': 0}, 'car': {'true_positive': 0, 'false_positive': 0, 'true_negative': 0, 'false_negative': 0, 'negative': 0, 'positive': 0}, 'clouds': {'true_positive': 0, 'false_positive': 0, 'true_negative': 0, 'false_negative': 0, 'negative': 0, 'positive': 0}, 'dog': {'true_positive': 0, 'false_positive': 0, 'true_negative': 0, 'false_negative': 0, 'negative': 0, 'positive': 0}, 'female': {'true_positive': 0, 'false_positive': 0, 'true_negative': 0, 'false_negative': 0, 'negative': 0, 'positive': 0}, 'flower': {'true_positive': 0, 'false_positive': 0, 'true_negative': 0, 'false_negative': 0, 'negative': 0, 'positive': 0}, 'male': {'true_positive': 0, 'false_positive': 0, 'true_negative': 0, 'false_negative': 0, 'negative': 0, 'positive': 0}, 'night': {'true_pos

{'baby': {'true_positive': 0, 'false_positive': 0, 'true_negative': 496, 'false_negative': 4, 'negative': 496, 'positive': 4}, 'bird': {'true_positive': 0, 'false_positive': 0, 'true_negative': 487, 'false_negative': 13, 'negative': 487, 'positive': 13}, 'car': {'true_positive': 0, 'false_positive': 0, 'true_negative': 494, 'false_negative': 6, 'negative': 494, 'positive': 6}, 'clouds': {'true_positive': 0, 'false_positive': 0, 'true_negative': 471, 'false_negative': 29, 'negative': 471, 'positive': 29}, 'dog': {'true_positive': 0, 'false_positive': 0, 'true_negative': 494, 'false_negative': 6, 'negative': 494, 'positive': 6}, 'female': {'true_positive': 0, 'false_positive': 0, 'true_negative': 424, 'false_negative': 76, 'negative': 424, 'positive': 76}, 'flower': {'true_positive': 0, 'false_positive': 0, 'true_negative': 481, 'false_negative': 19, 'negative': 481, 'positive': 19}, 'male': {'true_positive': 0, 'false_positive': 0, 'true_negative': 431, 'false_negative': 69, 'negative':

{'baby': {'true_positive': 0, 'false_positive': 0, 'true_negative': 895, 'false_negative': 5, 'negative': 895, 'positive': 5}, 'bird': {'true_positive': 0, 'false_positive': 0, 'true_negative': 883, 'false_negative': 17, 'negative': 883, 'positive': 17}, 'car': {'true_positive': 0, 'false_positive': 0, 'true_negative': 883, 'false_negative': 17, 'negative': 883, 'positive': 17}, 'clouds': {'true_positive': 0, 'false_positive': 0, 'true_negative': 843, 'false_negative': 57, 'negative': 843, 'positive': 57}, 'dog': {'true_positive': 0, 'false_positive': 0, 'true_negative': 883, 'false_negative': 17, 'negative': 883, 'positive': 17}, 'female': {'true_positive': 0, 'false_positive': 0, 'true_negative': 757, 'false_negative': 143, 'negative': 757, 'positive': 143}, 'flower': {'true_positive': 0, 'false_positive': 0, 'true_negative': 863, 'false_negative': 37, 'negative': 863, 'positive': 37}, 'male': {'true_positive': 0, 'false_positive': 0, 'true_negative': 783, 'false_negative': 117, 'neg

{'baby': {'true_positive': 0, 'false_positive': 0, 'true_negative': 1294, 'false_negative': 6, 'negative': 1294, 'positive': 6}, 'bird': {'true_positive': 0, 'false_positive': 0, 'true_negative': 1277, 'false_negative': 23, 'negative': 1277, 'positive': 23}, 'car': {'true_positive': 0, 'false_positive': 0, 'true_negative': 1277, 'false_negative': 23, 'negative': 1277, 'positive': 23}, 'clouds': {'true_positive': 0, 'false_positive': 0, 'true_negative': 1224, 'false_negative': 76, 'negative': 1224, 'positive': 76}, 'dog': {'true_positive': 0, 'false_positive': 0, 'true_negative': 1272, 'false_negative': 28, 'negative': 1272, 'positive': 28}, 'female': {'true_positive': 0, 'false_positive': 0, 'true_negative': 1092, 'false_negative': 208, 'negative': 1092, 'positive': 208}, 'flower': {'true_positive': 0, 'false_positive': 0, 'true_negative': 1242, 'false_negative': 58, 'negative': 1242, 'positive': 58}, 'male': {'true_positive': 0, 'false_positive': 0, 'true_negative': 1125, 'false_negat

{'baby': {'true_positive': 0, 'false_positive': 0, 'true_negative': 1691, 'false_negative': 9, 'negative': 1691, 'positive': 9}, 'bird': {'true_positive': 0, 'false_positive': 0, 'true_negative': 1671, 'false_negative': 29, 'negative': 1671, 'positive': 29}, 'car': {'true_positive': 0, 'false_positive': 0, 'true_negative': 1670, 'false_negative': 30, 'negative': 1670, 'positive': 30}, 'clouds': {'true_positive': 0, 'false_positive': 0, 'true_negative': 1598, 'false_negative': 102, 'negative': 1598, 'positive': 102}, 'dog': {'true_positive': 0, 'false_positive': 0, 'true_negative': 1662, 'false_negative': 38, 'negative': 1662, 'positive': 38}, 'female': {'true_positive': 0, 'false_positive': 0, 'true_negative': 1432, 'false_negative': 268, 'negative': 1432, 'positive': 268}, 'flower': {'true_positive': 0, 'false_positive': 0, 'true_negative': 1627, 'false_negative': 73, 'negative': 1627, 'positive': 73}, 'male': {'true_positive': 0, 'false_positive': 0, 'true_negative': 1470, 'false_neg

{'baby': {'true_positive': 0, 'false_positive': 0, 'true_negative': 2088, 'false_negative': 12, 'negative': 2088, 'positive': 12}, 'bird': {'true_positive': 0, 'false_positive': 0, 'true_negative': 2057, 'false_negative': 43, 'negative': 2057, 'positive': 43}, 'car': {'true_positive': 0, 'false_positive': 0, 'true_negative': 2067, 'false_negative': 33, 'negative': 2067, 'positive': 33}, 'clouds': {'true_positive': 0, 'false_positive': 0, 'true_negative': 1976, 'false_negative': 124, 'negative': 1976, 'positive': 124}, 'dog': {'true_positive': 0, 'false_positive': 0, 'true_negative': 2049, 'false_negative': 51, 'negative': 2049, 'positive': 51}, 'female': {'true_positive': 0, 'false_positive': 0, 'true_negative': 1760, 'false_negative': 340, 'negative': 1760, 'positive': 340}, 'flower': {'true_positive': 0, 'false_positive': 0, 'true_negative': 2010, 'false_negative': 90, 'negative': 2010, 'positive': 90}, 'male': {'true_positive': 0, 'false_positive': 0, 'true_negative': 1802, 'false_n

{'baby': {'true_positive': 0, 'false_positive': 0, 'true_negative': 2485, 'false_negative': 15, 'negative': 2485, 'positive': 15}, 'bird': {'true_positive': 0, 'false_positive': 0, 'true_negative': 2450, 'false_negative': 50, 'negative': 2450, 'positive': 50}, 'car': {'true_positive': 0, 'false_positive': 0, 'true_negative': 2461, 'false_negative': 39, 'negative': 2461, 'positive': 39}, 'clouds': {'true_positive': 0, 'false_positive': 0, 'true_negative': 2355, 'false_negative': 145, 'negative': 2355, 'positive': 145}, 'dog': {'true_positive': 0, 'false_positive': 0, 'true_negative': 2444, 'false_negative': 56, 'negative': 2444, 'positive': 56}, 'female': {'true_positive': 0, 'false_positive': 0, 'true_negative': 2094, 'false_negative': 406, 'negative': 2094, 'positive': 406}, 'flower': {'true_positive': 0, 'false_positive': 0, 'true_negative': 2396, 'false_negative': 104, 'negative': 2396, 'positive': 104}, 'male': {'true_positive': 0, 'false_positive': 0, 'true_negative': 2145, 'false

{'baby': {'true_positive': 0, 'false_positive': 0, 'true_negative': 2883, 'false_negative': 17, 'negative': 2883, 'positive': 17}, 'bird': {'true_positive': 0, 'false_positive': 0, 'true_negative': 2843, 'false_negative': 57, 'negative': 2843, 'positive': 57}, 'car': {'true_positive': 0, 'false_positive': 0, 'true_negative': 2854, 'false_negative': 46, 'negative': 2854, 'positive': 46}, 'clouds': {'true_positive': 0, 'false_positive': 0, 'true_negative': 2736, 'false_negative': 164, 'negative': 2736, 'positive': 164}, 'dog': {'true_positive': 0, 'false_positive': 0, 'true_negative': 2835, 'false_negative': 65, 'negative': 2835, 'positive': 65}, 'female': {'true_positive': 0, 'false_positive': 0, 'true_negative': 2419, 'false_negative': 481, 'negative': 2419, 'positive': 481}, 'flower': {'true_positive': 0, 'false_positive': 0, 'true_negative': 2784, 'false_negative': 116, 'negative': 2784, 'positive': 116}, 'male': {'true_positive': 0, 'false_positive': 0, 'true_negative': 2491, 'false

{'baby': {'true_positive': 0, 'false_positive': 0, 'true_negative': 3283, 'false_negative': 17, 'negative': 3283, 'positive': 17}, 'bird': {'true_positive': 0, 'false_positive': 0, 'true_negative': 3232, 'false_negative': 68, 'negative': 3232, 'positive': 68}, 'car': {'true_positive': 0, 'false_positive': 0, 'true_negative': 3244, 'false_negative': 56, 'negative': 3244, 'positive': 56}, 'clouds': {'true_positive': 0, 'false_positive': 0, 'true_negative': 3116, 'false_negative': 184, 'negative': 3116, 'positive': 184}, 'dog': {'true_positive': 0, 'false_positive': 0, 'true_negative': 3226, 'false_negative': 74, 'negative': 3226, 'positive': 74}, 'female': {'true_positive': 0, 'false_positive': 0, 'true_negative': 2767, 'false_negative': 533, 'negative': 2767, 'positive': 533}, 'flower': {'true_positive': 0, 'false_positive': 0, 'true_negative': 3169, 'false_negative': 131, 'negative': 3169, 'positive': 131}, 'male': {'true_positive': 0, 'false_positive': 0, 'true_negative': 2840, 'false

{'baby': {'true_positive': 0, 'false_positive': 0, 'true_negative': 3682, 'false_negative': 18, 'negative': 3682, 'positive': 18}, 'bird': {'true_positive': 0, 'false_positive': 0, 'true_negative': 3629, 'false_negative': 71, 'negative': 3629, 'positive': 71}, 'car': {'true_positive': 0, 'false_positive': 0, 'true_negative': 3639, 'false_negative': 61, 'negative': 3639, 'positive': 61}, 'clouds': {'true_positive': 0, 'false_positive': 0, 'true_negative': 3491, 'false_negative': 209, 'negative': 3491, 'positive': 209}, 'dog': {'true_positive': 0, 'false_positive': 0, 'true_negative': 3619, 'false_negative': 81, 'negative': 3619, 'positive': 81}, 'female': {'true_positive': 0, 'false_positive': 0, 'true_negative': 3102, 'false_negative': 598, 'negative': 3102, 'positive': 598}, 'flower': {'true_positive': 0, 'false_positive': 0, 'true_negative': 3557, 'false_negative': 143, 'negative': 3557, 'positive': 143}, 'male': {'true_positive': 0, 'false_positive': 0, 'true_negative': 3181, 'false

{'baby': {'true_positive': 0, 'false_positive': 0, 'true_negative': 4082, 'false_negative': 18, 'negative': 4082, 'positive': 18}, 'bird': {'true_positive': 0, 'false_positive': 0, 'true_negative': 4022, 'false_negative': 78, 'negative': 4022, 'positive': 78}, 'car': {'true_positive': 0, 'false_positive': 0, 'true_negative': 4034, 'false_negative': 66, 'negative': 4034, 'positive': 66}, 'clouds': {'true_positive': 0, 'false_positive': 0, 'true_negative': 3866, 'false_negative': 234, 'negative': 3866, 'positive': 234}, 'dog': {'true_positive': 0, 'false_positive': 0, 'true_negative': 4009, 'false_negative': 91, 'negative': 4009, 'positive': 91}, 'female': {'true_positive': 0, 'false_positive': 0, 'true_negative': 3436, 'false_negative': 664, 'negative': 3436, 'positive': 664}, 'flower': {'true_positive': 0, 'false_positive': 0, 'true_negative': 3937, 'false_negative': 163, 'negative': 3937, 'positive': 163}, 'male': {'true_positive': 0, 'false_positive': 0, 'true_negative': 3523, 'false

{'baby': {'true_positive': 0, 'false_positive': 0, 'true_negative': 4480, 'false_negative': 20, 'negative': 4480, 'positive': 20}, 'bird': {'true_positive': 0, 'false_positive': 0, 'true_negative': 4417, 'false_negative': 83, 'negative': 4417, 'positive': 83}, 'car': {'true_positive': 0, 'false_positive': 0, 'true_negative': 4428, 'false_negative': 72, 'negative': 4428, 'positive': 72}, 'clouds': {'true_positive': 0, 'false_positive': 0, 'true_negative': 4242, 'false_negative': 258, 'negative': 4242, 'positive': 258}, 'dog': {'true_positive': 0, 'false_positive': 0, 'true_negative': 4397, 'false_negative': 103, 'negative': 4397, 'positive': 103}, 'female': {'true_positive': 0, 'false_positive': 0, 'true_negative': 3765, 'false_negative': 735, 'negative': 3765, 'positive': 735}, 'flower': {'true_positive': 0, 'false_positive': 0, 'true_negative': 4320, 'false_negative': 180, 'negative': 4320, 'positive': 180}, 'male': {'true_positive': 0, 'false_positive': 0, 'true_negative': 3870, 'fal

{'baby': {'true_positive': 0, 'false_positive': 0, 'true_negative': 4879, 'false_negative': 21, 'negative': 4879, 'positive': 21}, 'bird': {'true_positive': 0, 'false_positive': 0, 'true_negative': 4808, 'false_negative': 92, 'negative': 4808, 'positive': 92}, 'car': {'true_positive': 0, 'false_positive': 0, 'true_negative': 4823, 'false_negative': 77, 'negative': 4823, 'positive': 77}, 'clouds': {'true_positive': 0, 'false_positive': 0, 'true_negative': 4625, 'false_negative': 275, 'negative': 4625, 'positive': 275}, 'dog': {'true_positive': 0, 'false_positive': 0, 'true_negative': 4786, 'false_negative': 114, 'negative': 4786, 'positive': 114}, 'female': {'true_positive': 0, 'false_positive': 0, 'true_negative': 4093, 'false_negative': 807, 'negative': 4093, 'positive': 807}, 'flower': {'true_positive': 0, 'false_positive': 0, 'true_negative': 4698, 'false_negative': 202, 'negative': 4698, 'positive': 202}, 'male': {'true_positive': 0, 'false_positive': 0, 'true_negative': 4210, 'fal

{'baby': {'true_positive': 0, 'false_positive': 0, 'true_negative': 5276, 'false_negative': 24, 'negative': 5276, 'positive': 24}, 'bird': {'true_positive': 0, 'false_positive': 0, 'true_negative': 5205, 'false_negative': 95, 'negative': 5205, 'positive': 95}, 'car': {'true_positive': 0, 'false_positive': 0, 'true_negative': 5219, 'false_negative': 81, 'negative': 5219, 'positive': 81}, 'clouds': {'true_positive': 0, 'false_positive': 0, 'true_negative': 5003, 'false_negative': 297, 'negative': 5003, 'positive': 297}, 'dog': {'true_positive': 0, 'false_positive': 0, 'true_negative': 5173, 'false_negative': 127, 'negative': 5173, 'positive': 127}, 'female': {'true_positive': 0, 'false_positive': 0, 'true_negative': 4427, 'false_negative': 873, 'negative': 4427, 'positive': 873}, 'flower': {'true_positive': 0, 'false_positive': 0, 'true_negative': 5087, 'false_negative': 213, 'negative': 5087, 'positive': 213}, 'male': {'true_positive': 0, 'false_positive': 0, 'true_negative': 4548, 'fal

{'baby': {'true_positive': 0, 'false_positive': 0, 'true_negative': 5675, 'false_negative': 25, 'negative': 5675, 'positive': 25}, 'bird': {'true_positive': 0, 'false_positive': 0, 'true_negative': 5598, 'false_negative': 102, 'negative': 5598, 'positive': 102}, 'car': {'true_positive': 0, 'false_positive': 0, 'true_negative': 5613, 'false_negative': 87, 'negative': 5613, 'positive': 87}, 'clouds': {'true_positive': 0, 'false_positive': 0, 'true_negative': 5372, 'false_negative': 328, 'negative': 5372, 'positive': 328}, 'dog': {'true_positive': 0, 'false_positive': 0, 'true_negative': 5562, 'false_negative': 138, 'negative': 5562, 'positive': 138}, 'female': {'true_positive': 0, 'false_positive': 0, 'true_negative': 4768, 'false_negative': 932, 'negative': 4768, 'positive': 932}, 'flower': {'true_positive': 0, 'false_positive': 0, 'true_negative': 5477, 'false_negative': 223, 'negative': 5477, 'positive': 223}, 'male': {'true_positive': 0, 'false_positive': 0, 'true_negative': 4881, 'f

{'baby': {'true_positive': 0, 'false_positive': 0, 'true_negative': 6074, 'false_negative': 26, 'negative': 6074, 'positive': 26}, 'bird': {'true_positive': 0, 'false_positive': 0, 'true_negative': 5989, 'false_negative': 111, 'negative': 5989, 'positive': 111}, 'car': {'true_positive': 0, 'false_positive': 0, 'true_negative': 6005, 'false_negative': 95, 'negative': 6005, 'positive': 95}, 'clouds': {'true_positive': 0, 'false_positive': 0, 'true_negative': 5754, 'false_negative': 346, 'negative': 5754, 'positive': 346}, 'dog': {'true_positive': 0, 'false_positive': 0, 'true_negative': 5952, 'false_negative': 148, 'negative': 5952, 'positive': 148}, 'female': {'true_positive': 0, 'false_positive': 0, 'true_negative': 5115, 'false_negative': 985, 'negative': 5115, 'positive': 985}, 'flower': {'true_positive': 0, 'false_positive': 0, 'true_negative': 5863, 'false_negative': 237, 'negative': 5863, 'positive': 237}, 'male': {'true_positive': 0, 'false_positive': 0, 'true_negative': 5224, 'f

{'baby': {'true_positive': 0, 'false_positive': 0, 'true_negative': 6474, 'false_negative': 26, 'negative': 6474, 'positive': 26}, 'bird': {'true_positive': 0, 'false_positive': 0, 'true_negative': 6382, 'false_negative': 118, 'negative': 6382, 'positive': 118}, 'car': {'true_positive': 0, 'false_positive': 0, 'true_negative': 6402, 'false_negative': 98, 'negative': 6402, 'positive': 98}, 'clouds': {'true_positive': 0, 'false_positive': 0, 'true_negative': 6130, 'false_negative': 370, 'negative': 6130, 'positive': 370}, 'dog': {'true_positive': 0, 'false_positive': 0, 'true_negative': 6343, 'false_negative': 157, 'negative': 6343, 'positive': 157}, 'female': {'true_positive': 0, 'false_positive': 0, 'true_negative': 5457, 'false_negative': 1043, 'negative': 5457, 'positive': 1043}, 'flower': {'true_positive': 0, 'false_positive': 0, 'true_negative': 6246, 'false_negative': 254, 'negative': 6246, 'positive': 254}, 'male': {'true_positive': 0, 'false_positive': 0, 'true_negative': 5559, 

In [24]:
for i in evaluation_by_annotation:
    print("------------------------")
    print(i + ":")
    print("")
    print("positive: " + str(evaluation_by_annotation[i]["positive"]))
    print("true_positive: " + str(evaluation_by_annotation[i]["true_positive"]))
    print("false_negative: " + str(evaluation_by_annotation[i]["false_negative"]))
    print("")
    print("negative: " + str(evaluation_by_annotation[i]["negative"]))
    print("true_negative: " + str(evaluation_by_annotation[i]["true_negative"]))
    print("false_positive: " + str(evaluation_by_annotation[i]["false_positive"]))
        


------------------------
baby:

positive: 29
true_positive: 0
false_negative: 29

negative: 6687
true_negative: 6687
false_positive: 0
------------------------
bird:

positive: 124
true_positive: 0
false_negative: 124

negative: 6592
true_negative: 6592
false_positive: 0
------------------------
car:

positive: 103
true_positive: 0
false_negative: 103

negative: 6613
true_negative: 6613
false_positive: 0
------------------------
clouds:

positive: 383
true_positive: 0
false_negative: 383

negative: 6333
true_negative: 6333
false_positive: 0
------------------------
dog:

positive: 167
true_positive: 0
false_negative: 167

negative: 6549
true_negative: 6549
false_positive: 0
------------------------
female:

positive: 1072
true_positive: 0
false_negative: 1072

negative: 5644
true_negative: 5644
false_positive: 0
------------------------
flower:

positive: 257
true_positive: 0
false_negative: 257

negative: 6459
true_negative: 6459
false_positive: 0
------------------------
male:

posit

Now saving the model:

In [None]:
PATH = DATA_DIR + "cnn_base.pt"

torch.save(model.state_dict(), PATH)