In [1]:
import torch
from torch.autograd import Variable as V
import torchvision.models as models
from torchvision import transforms as trn
from torch.nn import functional as F
import numpy as np
import pandas as pd

import os
import cv2
import argparse
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

from sklearn.metrics import precision_recall_fscore_support, accuracy_score, classification_report, confusion_matrix
from sklearn.metrics import brier_score_loss
from sklearn.neural_network import MLPClassifier
from sklearn.feature_extraction.text import TfidfVectorizer, TfidfTransformer, CountVectorizer
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV

from scipy.special import softmax

In /Users/rikka/opt/anaconda3/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/_classic_test.mplstyle: 
The text.latex.preview rcparam was deprecated in Matplotlib 3.3 and will be removed two minor releases later.
In /Users/rikka/opt/anaconda3/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/_classic_test.mplstyle: 
The mathtext.fallback_to_cm rcparam was deprecated in Matplotlib 3.3 and will be removed two minor releases later.
In /Users/rikka/opt/anaconda3/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/_classic_test.mplstyle: Support for setting the 'mathtext.fallback_to_cm' rcParam is deprecated since 3.3 and will be removed two minor releases later; use 'mathtext.fallback : 'cm' instead.
In /Users/rikka/opt/anaconda3/lib/python3.7/site-packages/matplotlib/mpl-data/stylelib/_classic_test.mplstyle: 
The validate_bool_maybe_none function was deprecated in Matplotlib 3.3 and will be removed two minor releases later.
In /Users/rikka/opt/anaconda3/lib/python3.7/

In [13]:
class CELoss(object):

    def compute_bin_boundaries(self, probabilities = np.array([])):

        #uniform bin spacing
        if probabilities.size == 0:
            bin_boundaries = np.linspace(0, 1, self.n_bins + 1)
            self.bin_lowers = bin_boundaries[:-1]
            self.bin_uppers = bin_boundaries[1:]
        else:
            #size of bins 
            bin_n = int(self.n_data/self.n_bins)

            bin_boundaries = np.array([])

            probabilities_sort = np.sort(probabilities)  

            for i in range(0,self.n_bins):
                bin_boundaries = np.append(bin_boundaries,probabilities_sort[i*bin_n])
            bin_boundaries = np.append(bin_boundaries,1.0)

            self.bin_lowers = bin_boundaries[:-1]
            self.bin_uppers = bin_boundaries[1:]


    def get_probabilities(self, output, labels, logits):
        #If not probabilities apply softmax!
        if logits:
            self.probabilities = softmax(output, axis=1)
        else:
            self.probabilities = output

        self.labels = labels
        self.confidences = np.max(self.probabilities, axis=1)
        self.predictions = np.argmax(self.probabilities, axis=1)
        self.accuracies = np.equal(self.predictions,labels)

    def binary_matrices(self):
        idx = np.arange(self.n_data)
        #make matrices of zeros
        pred_matrix = np.zeros([self.n_data,self.n_class])
        label_matrix = np.zeros([self.n_data,self.n_class])
        #self.acc_matrix = np.zeros([self.n_data,self.n_class])
        pred_matrix[idx,self.predictions] = 1
        label_matrix[idx,self.labels] = 1

        self.acc_matrix = np.equal(pred_matrix, label_matrix)


    def compute_bins(self, index = None):
        self.bin_prop = np.zeros(self.n_bins)
        self.bin_acc = np.zeros(self.n_bins)
        self.bin_conf = np.zeros(self.n_bins)
        self.bin_score = np.zeros(self.n_bins)

        if index == None:
            confidences = self.confidences
            accuracies = self.accuracies
        else:
            confidences = self.probabilities[:,index]
            accuracies = self.acc_matrix[:,index]


        for i, (bin_lower, bin_upper) in enumerate(zip(self.bin_lowers, self.bin_uppers)):
            # Calculated |confidence - accuracy| in each bin
            in_bin = np.greater(confidences,bin_lower.item()) * np.less_equal(confidences,bin_upper.item())
            self.bin_prop[i] = np.mean(in_bin)

            if self.bin_prop[i].item() > 0:
                self.bin_acc[i] = np.mean(accuracies[in_bin])
                self.bin_conf[i] = np.mean(confidences[in_bin])
                self.bin_score[i] = np.abs(self.bin_conf[i] - self.bin_acc[i])

class MaxProbCELoss(CELoss):
    def loss(self, output, labels, n_bins = 15, logits = True):
        self.n_bins = n_bins
        super().compute_bin_boundaries()
        super().get_probabilities(output, labels, logits)
        super().compute_bins()

#http://people.cs.pitt.edu/~milos/research/AAAI_Calibration.pdf
class ECELoss(MaxProbCELoss):

    def loss(self, output, labels, n_bins = 15, logits = False):
        super().loss(output, labels, n_bins, logits)
        return np.dot(self.bin_prop,self.bin_score)

class MCELoss(MaxProbCELoss):
    
    def loss(self, output, labels, n_bins = 15, logits = True):
        super().loss(output, labels, n_bins, logits)
        return np.max(self.bin_score)

#https://arxiv.org/abs/1905.11001
#Overconfidence Loss (Good in high risk applications where confident but wrong predictions can be especially harmful)
class OELoss(MaxProbCELoss):

    def loss(self, output, labels, n_bins = 15, logits = True):
        super().loss(output, labels, n_bins, logits)
        return np.dot(self.bin_prop,self.bin_conf * np.maximum(self.bin_conf-self.bin_acc,np.zeros(self.n_bins)))


#https://arxiv.org/abs/1904.01685
class SCELoss(CELoss):

    def loss(self, output, labels, n_bins = 15, logits = True):
        sce = 0.0
        self.n_bins = n_bins
        self.n_data = len(output)
        self.n_class = len(output[0])

        super().compute_bin_boundaries()
        super().get_probabilities(output, labels, logits)
        super().binary_matrices()

        for i in range(self.n_class):
            super().compute_bins(i)
            sce += np.dot(self.bin_prop,self.bin_score)

        return sce/self.n_class

class TACELoss(CELoss):

    def loss(self, output, labels, threshold = 0.01, n_bins = 15, logits = True):
        tace = 0.0
        self.n_bins = n_bins
        self.n_data = len(output)
        self.n_class = len(output[0])

        super().get_probabilities(output, labels, logits)
        self.probabilities[self.probabilities < threshold] = 0
        super().binary_matrices()

        for i in range(self.n_class):
            super().compute_bin_boundaries(self.probabilities[:,i]) 
            super().compute_bins(i)
            tace += np.dot(self.bin_prop,self.bin_score)

        return tace/self.n_class

#create TACELoss with threshold fixed at 0
class ACELoss(TACELoss):

    def loss(self, output, labels, n_bins = 15, logits = False):
        return super().loss(output, labels, 0.0 , n_bins, logits)

In [14]:
def load_model(model_path, labels):
    model = models.resnet152(num_classes=len(labels))
    checkpoint = torch.load(model_path,map_location='cpu')
    state_dict = {str.replace(k,'module.',''): v for k, v in checkpoint['state_dict'].items()}
    model.load_state_dict(state_dict)

    # hacky way to deal with the upgraded batchnorm2D and avgpool layers...
    for i, (name, module) in enumerate(model._modules.items()):
        module = recursion_change_bn(model)

    model.eval()
    # hook the feature extractor
    features_names = ['layer4'] # this is the last conv layer of the resnet
    for name in features_names:
        model._modules.get(name).register_forward_hook(hook_feature)
    return model

def returnTF():
    # load the image transformer
    normalize = trn.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    tf = trn.Compose([
        trn.Resize(256),
        trn.CenterCrop(224),
        trn.ToTensor(),
        normalize,
    ])
    return tf

#this function generates saliency maps
def returnCAM(feature_conv, weight_softmax, class_idx):
    # generate the class activation maps upsample to 256x256
    size_upsample = (256, 256)
    nc, h, w = feature_conv.shape
    output_cam = []
    for idx in class_idx:
        cam = weight_softmax[class_idx].dot(feature_conv.reshape((nc, h*w)))
        cam = cam.reshape(h, w)
        cam = cam - np.min(cam)
        cam_img = cam / np.max(cam)
        #
        cam_img = np.where(cam_img < 0.5, 0, cam_img)
        cam_img = np.uint8(255 * cam_img)
        output_cam.append(cv2.resize(cam_img, size_upsample))
        
    return output_cam

def hook_feature(module, input, output):
    features_blobs.append(np.squeeze(output.data.cpu().numpy()))
    
def load_labels(label_path):
    classes = list()
    with open(label_path) as class_file:
        for line in class_file:
            classes.append(line.strip().split('\t')[1])
    classes = tuple(classes)
    return classes

def recursion_change_bn(module):
    if isinstance(module, torch.nn.BatchNorm2d):
        module.track_running_stats = 1
    else:
        for i, (name, module1) in enumerate(module._modules.items()):
            module1 = recursion_change_bn(module1)
    return module

In [15]:
# the route to dataset, model, label, ground truth (for one binary class), bclass (string class name)
# return y_true, y_pred, y_logits
def run_model(dataset, model, label, gt):
    def load_model(model_path, labels):
        model = models.resnet152(num_classes=len(labels))
        checkpoint = torch.load(model_path,map_location='cpu')
        state_dict = {str.replace(k,'module.',''): v for k, v in checkpoint['state_dict'].items()}
        model.load_state_dict(state_dict)

        # hacky way to deal with the upgraded batchnorm2D and avgpool layers...
        for i, (name, module) in enumerate(model._modules.items()):
            module = recursion_change_bn(model)

        model.eval()
        # hook the feature extractor
        features_names = ['layer4'] # this is the last conv layer of the resnet
        for name in features_names:
            model._modules.get(name).register_forward_hook(hook_feature)
        return model

    def returnTF():
        # load the image transformer
        normalize = trn.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        tf = trn.Compose([
            trn.Resize(256),
            trn.CenterCrop(224),
            trn.ToTensor(),
            normalize,
        ])
        return tf

    #this function generates saliency maps
    def returnCAM(feature_conv, weight_softmax, class_idx):
        # generate the class activation maps upsample to 256x256
        size_upsample = (256, 256)
        nc, h, w = feature_conv.shape
        output_cam = []
        for idx in class_idx:
            cam = weight_softmax[class_idx].dot(feature_conv.reshape((nc, h*w)))
            cam = cam.reshape(h, w)
            cam = cam - np.min(cam)
            cam_img = cam / np.max(cam)
            #
            cam_img = np.where(cam_img < 0.5, 0, cam_img)
            cam_img = np.uint8(255 * cam_img)
            output_cam.append(cv2.resize(cam_img, size_upsample))

        return output_cam

    def hook_feature(module, input, output):
        features_blobs.append(np.squeeze(output.data.cpu().numpy()))

    def load_labels(label_path):
        classes = list()
        with open(label_path) as class_file:
            for line in class_file:
                classes.append(line.strip().split('\t')[1])
        classes = tuple(classes)
        return classes

    def recursion_change_bn(module):
        if isinstance(module, torch.nn.BatchNorm2d):
            module.track_running_stats = 1
        else:
            for i, (name, module1) in enumerate(module._modules.items()):
                module1 = recursion_change_bn(module1)
        return module

    y_trues = []
    y_preds = []
    y_probs = []
    
    # save the predicted results to calculate value

    labels = {'45': 'bathroom', '52': 'bedroom', '89': 'childs_room', '92': 'classroom', '102': 'conference_room', '121': 'dining_room', '124': 'dorm_room', '179': 'hospital_room', '182': 'hotel_room', '202': 'kindergarten_classroom', '203': 'kitchen', '215': 'living_room'}
    directory = dataset
    features_blobs = []

    # load the labels
    classes = load_labels(label)

    # load the model
    model = load_model(model, classes)

    # load the transformer
    tf = returnTF()  # image transformer

    # get the softmax weight
    params = list(model.parameters())
    weight_softmax = params[-2].data.numpy()
    weight_softmax[weight_softmax < 0] = 0

    for filename in os.listdir(directory):
        if (filename.endswith(".jpg") or filename.endswith(".png")):
            #try:
                y_true = gt[gt['image_name']==filename]['label'].any()
                y_trues.append(y_true)
                
                # load the test image
                img = Image.open(os.path.join(directory, filename))
                input_img = V(tf(img).unsqueeze(0))

                # forward pass
                logit = model.forward(input_img)
                h_x = F.softmax(logit, 1).data.squeeze()
                probs, idx = h_x.sort(0, True)
                probs = probs.numpy()
                idx = idx.numpy()

                p_label = classes[idx[0]]

#                 # output the prediction of scene category
#                 items = []
#                 print('--SCENE CATEGORIES:')
#                 for i in range(0, 1):
#                     items.append(float(probs[i]))
#                     items.append(classes[idx[i]])
#                     #print('{:.3f} -> {}'.format(probs[i], classes[idx[i]]))
                
                y_preds.append(classes[idx[0]])
                y_probs.append(np.array(h_x))

                print(filename + ": " + classes[idx[0]] + "(" + str(probs[0]) + ")")
#             except:
#                 print("can't process "+filename)
    return y_trues, y_preds, y_probs

In [16]:
# input: output from run_model, and the binary class of interest
def format_binary_class(y_trues, y_preds, y_probs, bclass, label):
    b_true = []
    b_pred = []
    b_prob = []
    classes = load_labels(label)
    for i in range(len(y_trues)):
        y_true = y_trues[i]
        y_pred = y_preds[i]
        y_prob = y_probs[i]
        if y_true == bclass:
            b_true.append(1)
        else:
            b_true.append(0)
#         if y_pred == bclass:
#             b_pred.append(1)
#         else:
#             b_pred.append(0)
        idx = classes.index(bclass)
#         if y_pred == bclass:
#             b_prob.append(y_prob[idx])
#         else:
#             b_prob.append(1 - y_prob[idx])
        b_prob.append(np.array([1 - y_prob[idx], y_prob[idx]])) #0, 1
        if y_prob[idx] > 1 - y_prob[idx]:
            b_pred.append(1)
        else:
            b_pred.append(0)

    return b_true, b_pred, np.array(b_prob)

In [17]:
def evaluate_model(y_true_val, y_pred_val, y_prob_val, y_true_test, y_pred_test, y_prob_test, bclass, label, res_path, logfile_name):
    
    y_val, y_pred_val, logits_val = format_binary_class(y_true_val, y_pred_val, y_prob_val, bclass, label)
    y_test, y_pred_test, logits_test = format_binary_class(y_true_test, y_pred_test, y_prob_test, bclass, label)
    
    # check if binary or multi class classification
    num_classes = len(set(y_val))
    if num_classes == 2:
        average = 'binary'
    else:
        average = 'macro'

    acc_test = accuracy_score(y_test, y_pred_test)
    pre_test, rec_test, f1_test, _ = precision_recall_fscore_support(y_test, y_pred_test, average=average, beta=1)
    print("f1_test: ", f1_test)

    ece = ECELoss()
    ace = ACELoss()

    acc_val = accuracy_score(y_val, y_pred_val)
    pre_val, rec_val, f1_val, _ = precision_recall_fscore_support(y_val, y_pred_val, average=average, beta=1)
    
    ece_val = ece.loss(logits_val, y_val, logits=False)
    ace_val = ace.loss(logits_val, y_val, logits=False)

    print("ece_val: ", ece_val)
    print("ace_val: ", ace_val)
    _, _, f01_val, _ = precision_recall_fscore_support(y_val, y_pred_val, average=average, beta=0.1)
    _, _, f10_val, _ = precision_recall_fscore_support(y_val, y_pred_val, average=average, beta=10)

    acc_test = accuracy_score(y_test, y_pred_test)
    pre_test, rec_test, f1_test, _ = precision_recall_fscore_support(y_test, y_pred_test, average=average, beta=1)
    
    ece_test = ece.loss(logits_test, y_test, logits=False)
    ace_test = ace.loss(logits_test, y_test, logits=False)

    print("ece_test: ", ece_test)
    _, _, f01_test, _ = precision_recall_fscore_support(y_test, y_pred_test, average=average, beta=0.1)
    _, _, f10_test, _ = precision_recall_fscore_support(y_test, y_pred_test, average=average, beta=10)

    # create log file
    r_path = res_path + logfile_name + ".csv"
    with open(r_path, 'w') as f:
        c = 'acc_val, pre_val, rec_val, f01_val, f1_val, f10_val, ece_val, ace_val, acc_test, pre_test, rec_test, f01_test, f1_test, f10_test, ece_test, ace_test, f1_test'
        f.write(c + '\n')

    with open(r_path, 'a') as f:
        res_i = '{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}\n'.format(acc_val, pre_val, rec_val, f01_val, f1_val, f10_val, ece_val, ace_val, acc_test, pre_test, rec_test, f01_test, f1_test, f10_test, ece_test, ace_test, f1_test)
        f.write(res_i)

    return logits_val, logits_test, y_pred_val, y_pred_test, y_val, y_test

In [28]:
def cost_based_threshold(k):
    t = (k)/(k+1)
    return t

def calculate_value(y_hat_proba, y_pred, y, t, Vw, Vc, Vr):    
    y_pred = y_pred
    y_prob = np.max(y_hat_proba, axis=1)
    y_pred[y_prob < t] = -1
    
    # now lets compute the actual value of each prediction
    value_vector = np.full(y_pred.shape[0], Vc)

    #loss due to false positives and false negatives
    false_positives_idx = (y_pred == 1) & ( y == 0)
    false_negatives_idx = (y_pred == 0) & ( y == 1)

    value_vector[false_positives_idx] = Vw
    value_vector[false_negatives_idx] = Vw

    #loss due to asking humans
    value_vector[y_pred == -1] = Vr
#     print(t)
#     print(value_vector)

    value = np.sum(value_vector) / len(y)

    numOfRejectedSamples = np.count_nonzero(y_pred == -1)
    numOfWrongPredictions = np.count_nonzero((y_pred != y) & (y_pred != -1))
    return value, numOfRejectedSamples, numOfWrongPredictions

def find_optimum_confidence_threshold(y_hat_proba, y_pred, y, t_list, Vw, Vc, Vr):

    cost_list = {}

    for t in t_list:
        # here we define K = fn_c_norm, change it based on task. 
        value = calculate_value(y_hat_proba, y_pred, y, t, Vw, Vc, Vr)
        cost_list["{}".format(t)] = value
    # find t values with maximum value
    maxValue = max(cost_list.values())
    optTList = [k for k, v in cost_list.items() if v == maxValue]
    #print(len(optTList))
    

    return optTList[0], cost_list

#cost based calibration analysis
def cost_based_analysis(y_hat_proba_val, y_pred_val, y_val, y_hat_proba_test, y_pred_test, y_test, res_path, logfile_name, Vr, Vc, Vw_list, confT_list):
#     np.save("data/logfile/"+"logits_val_inFunc.npy",y_hat_proba_val)
#     np.save("data/logfile/"+"y_pred_val_inFunc.npy",y_pred_val)
#     np.save("data/logfile/"+"y_true_val_inFUnc.npy",y_val)
#     np.save("data/logfile/"+"logits_test_inFunc.npy",y_hat_proba_test)
#     np.save("data/logfile/"+"y_pred_test_inFunc.npy",y_pred_test)
#     np.save("data/logfile/"+"y_true_test_inFUnc.npy",y_test)
    # create log file
    rc_path = res_path + logfile_name + "_costBased_test.csv"
    with open(rc_path, 'w') as f:
        c = 'Vr, Vc, Vw, k, t, value, rejected, wrong, t_optimal, value_optimal, rejected_opt, wrong_opt'
        f.write(c + '\n')

    for Vw in Vw_list:
        #  Vr_norm, Vc_norm, Vw_norm = normalize_value(Vr, Vc, Vw)
        #print("ch: {}, V: {}, fp_c: {} fn_c: {}".format(ch_norm, V_norm, fp_c_norm, fn_c_norm))
        k = (-1)*(Vw / Vc)
        t = cost_based_threshold(k)

        value_test, rej_test, wrong_test = calculate_value(y_hat_proba_test, np.array(y_pred_test), np.array(y_test), t, Vw, Vc, Vr)

        t_optimal, cost_list = find_optimum_confidence_threshold(y_hat_proba_val, np.array(y_pred_val), np.array(y_val), confT_list, Vw, Vc, Vr)
        #print("t_optimal: ", cost_list)
        value_test_opt, rej_test_opt, wrong_test_opt = calculate_value(y_hat_proba_test, np.array(y_pred_test), np.array(y_test), np.float(t_optimal), Vw, Vc, Vr)

        with open(rc_path, 'a') as f:
            res_i = '{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}\n'.format(Vr, Vc, Vw, k, t, value_test, rej_test, wrong_test, t_optimal, value_test_opt, rej_test_opt, wrong_test_opt)
            f.write(res_i)


In [221]:
import random
# create a val set with 300 images

# allFN = os.listdir("data/FN/Testing/fn/")
# cpFN = os.listdir("data/FN/Testing/correct/")
# wpFN = os.listdir("data/FN/Testing/FNwp/")
# temp = np.setdiff1d(allFN, cpFN)
# rest = np.setdiff1d(temp, wpFN)

allFP = os.listdir("data/FP/Testing/fp/")
cpFP = os.listdir("data/FP/Testing/correct/")
wpFP = os.listdir("data/FP/Testing/crowdsourcedPic/")
temp = np.setdiff1d(allFP, cpFP)
rest = np.setdiff1d(temp, wpFP)

val = random.sample(list(rest), k = 300)

In [224]:
# move all val images in a folder
import shutil

# src = 'data/FN/Testing/fn/'
# dst = 'data/FN/Testing/val/'

src = 'data/FP/Testing/fp/'
dst = 'data/FP/Testing/val/'

for file in val:
    shutil.copyfile(src+file, dst+file)

In [86]:
# val_dataset = 'data/FN/Testing/val/'
# test_dataset = 'data/FN/Testing/test/'
# model = 'data/FN/Model/model_best.pth.tar'
# label = 'data/FN/category.txt'
# gt = pd.read_excel('data/FN/Testing/scene.xlsx')

val_dataset = 'data/FP/Testing/val/'
test_dataset = 'data/FP/Testing/test/'
model = 'data/FP/Model/model_best.pth.tar'
label = 'data/FP/category.txt'
gt = pd.read_excel('data/FP/Testing/scene.xlsx')

y_true_val, y_pred_val, y_prob_val = run_model(val_dataset, model, label, gt)
y_true_test, y_pred_test, y_prob_test = run_model(test_dataset, model, label, gt)

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


179_00001748.jpg: bedroom(0.6531285)
45_00002403.jpg: bathroom(0.45262715)
202_00002864.jpg: kindergarden_classroom(0.9816565)
121_00004904.jpg: bathroom(0.41562957)
121_00001840.jpg: bedroom(0.30088535)
102_00004287.jpg: kindergarden_classroom(0.36087516)
179_00002645.jpg: kitchen(0.26318944)
202_00002247.jpg: kindergarden_classroom(0.7257557)
45_00003319.jpg: bathroom(0.46453482)
102_00001506.jpg: bathroom(0.36224824)
202_00004839.jpg: bathroom(0.6884703)
203_00004022.jpg: kitchen(0.29383403)
102_00004457.jpg: bathroom(0.70239586)
203_00003215.jpg: bathroom(0.6200692)
202_00001767.jpg: kindergarden_classroom(0.47088715)
203_00001589.jpg: kitchen(0.55328995)
124_00004842.jpg: bathroom(0.5159559)
52_00003276.jpg: bedroom(0.7781757)
124_00003446.jpg: kindergarden_classroom(0.6779015)
179_00003837.jpg: kitchen(0.68141615)
124_00003485.jpg: kitchen(0.46798736)
121_00001102.jpg: bathroom(0.43268263)
124_00002000.jpg: bedroom(0.44196928)
121_00002147.jpg: bedroom(0.3366864)
202_00000490.jpg

203_00003047.jpg: kitchen(0.53054434)
124_00002330.jpg: kitchen(0.62974316)
45_00002081.jpg: bathroom(0.40365002)
102_00002102.jpg: kitchen(0.27163154)
45_00004221.jpg: bedroom(0.5407777)
124_00004437.jpg: kindergarden_classroom(0.5872746)
202_00004275.jpg: kindergarden_classroom(0.8428171)
202_00000991.jpg: kindergarden_classroom(0.9595928)
179_00004663.jpg: bedroom(0.26075086)
45_00003216.jpg: bathroom(0.75130683)
121_00001581.jpg: bedroom(0.41521013)
45_00002530.jpg: bathroom(0.61152136)
52_00001584.jpg: bathroom(0.38487536)
202_00004329.jpg: kindergarden_classroom(0.91135323)
45_00001950.jpg: bathroom(0.34643838)
124_00002119.jpg: kitchen(0.33290744)
102_00004835.jpg: kitchen(0.62409997)
202_00002765.jpg: kindergarden_classroom(0.93833447)
215_00002031.jpg: bedroom(0.68617475)
102_00002707.jpg: kitchen(0.4054731)
45_00004381.jpg: kitchen(0.47936016)
203_00003246.jpg: bathroom(0.330909)
124_00000336.jpg: bedroom(0.58163846)
124_00001996.jpg: bathroom(0.7219307)
124_00001014.jpg: bed

52_00001316.jpg: bedroom(0.88035166)
52_00004291.jpg: bedroom(0.5911639)
202_00003258.jpg: kindergarden_classroom(0.91057575)
203_00002551.jpg: kindergarden_classroom(0.7295797)
203_00003129.jpg: kitchen(0.5044113)
202_00000792.jpg: kindergarden_classroom(0.47126603)
203_00002974.jpg: kitchen(0.88168687)
124_00000461.jpg: dorm_room(0.23285252)
52_00000245.jpg: bedroom(0.7895466)
202_00000369.jpg: kindergarden_classroom(0.9731541)
124_00004954.jpg: bedroom(0.6109973)
215_00004465.jpg: living_room(0.48538512)
102_00001565.jpg: bedroom(0.67285585)
45_00004398.jpg: bathroom(0.7122371)
45_00001592.jpg: bathroom(0.9401208)
102_00002736.jpg: kindergarden_classroom(0.69671154)
121_00001002.jpg: bedroom(0.6936086)
179_00004727.jpg: bedroom(0.67085713)
124_00003223.jpg: dorm_room(0.38336074)
215_00003690.jpg: bedroom(0.5687899)
203_00002418.jpg: bathroom(0.87842256)
203_00000019.jpg: kitchen(0.7783258)
45_00001222.jpg: bathroom(0.8753156)
215_00004538.jpg: bedroom(0.83224803)
215_00002607.jpg: b

102_00001543.jpg: kindergarden_classroom(0.83885914)
102_00001231.jpg: conference_room(0.6045356)
124_00002442.jpg: bedroom(0.68288696)
124_00004755.jpg: kindergarden_classroom(0.72880423)
124_00002318.jpg: kindergarden_classroom(0.7691739)
45_00004427.jpg: kindergarden_classroom(0.7425374)
52_00000539.jpg: bedroom(0.77040577)
179_00001241.jpg: kindergarden_classroom(0.8287724)
52_00000707.jpg: childs_room(0.61592937)
202_00002823.jpg: kindergarden_classroom(0.8066058)
202_00002360.jpg: kindergarden_classroom(0.8860491)
121_00000066.jpg: bathroom(0.83734304)
179_00003468.jpg: kindergarden_classroom(0.9251012)
52_00003740.jpg: bedroom(0.99134445)
52_00002304.jpg: bedroom(0.6523984)
203_00000821.jpg: kitchen(0.36015365)
52_00001989.jpg: bathroom(0.6641996)
121_00002507.jpg: bedroom(0.6212739)
45_00003612.jpg: bathroom(0.5405174)
45_00004182.jpg: bathroom(0.6449952)
203_00003093.jpg: kitchen(0.75345564)
52_00003345.jpg: bedroom(0.7787996)
202_00001085.jpg: kindergarden_classroom(0.8216636

In [87]:
res_path = "data/logfile1/"

# np.save(res_path + 'y_true_val_FN.npy', y_true_val)
# np.save(res_path + 'y_pred_val_FN.npy', y_pred_val)
# np.save(res_path + 'y_prob_val_FN.npy', y_prob_val)
# np.save(res_path + 'y_true_test_FN.npy', y_true_test)
# np.save(res_path + 'y_pred_test_FN.npy', y_pred_test)
# np.save(res_path + 'y_prob_test_FN.npy', y_prob_test)

np.save(res_path + 'y_true_val_FP.npy', y_true_val)
np.save(res_path + 'y_pred_val_FP.npy', y_pred_val)
np.save(res_path + 'y_prob_val_FP.npy', y_prob_val)
np.save(res_path + 'y_true_test_FP.npy', y_true_test)
np.save(res_path + 'y_pred_test_FP.npy', y_pred_test)
np.save(res_path + 'y_prob_test_FP.npy', y_prob_test)

In [417]:
classes

('bathroom',
 'bedroom',
 'childs_room',
 'classroom',
 'conference_room',
 'dining_room',
 'dorm_room',
 'hospital_room',
 'hotel_room',
 'kindergarden_classroom',
 'kitchen',
 'living_room')

In [53]:
# y_true_val = np.load("data/logfile/y_true_val_FP.npy")
# y_pred_val = np.load("data/logfile/y_pred_val_FP.npy")
# y_prob_val = np.load("data/logfile/y_prob_val_FP.npy")
# y_true_test = np.load("data/logfile/y_true_test_FP.npy")
# y_pred_test = np.load("data/logfile/y_pred_test_FP.npy")
# y_prob_test = np.load("data/logfile/y_prob_test_FP.npy")

y_true_val = np.load("data/logfile/y_true_val_FN.npy")
y_pred_val = np.load("data/logfile/y_pred_val_FN.npy")
y_prob_val = np.load("data/logfile/y_prob_val_FN.npy")
y_true_test = np.load("data/logfile/y_true_test_FN.npy")
y_pred_test = np.load("data/logfile/y_pred_test_FN.npy")
y_prob_test = np.load("data/logfile/y_prob_test_FN.npy")

In [68]:
res_path = "data/costbased/"
bclass = "living_room"
logfile_name = "FN_"+bclass
#logfile_name = "FP_"+bclass
label = 'data/FN/category.txt'

logits_val, logits_test, y_pred_val, y_pred_test, y_true_val, y_true_test = evaluate_model(y_true_val, y_pred_val, y_prob_val, y_true_test, y_pred_test, y_prob_test, bclass, label, res_path, logfile_name)

f1_test:  1.0
ece_val:  0.009644113481096284
ace_val:  0.497937395200886
ece_test:  0.003946798122445805


In [69]:
#cost-based parameters
Vr = 0.0
Vc = 1.0

Vw_list = list(np.arange(0, -10.1, -0.1))

confT_list = list(np.arange(0, 1.01, 0.01))

# y_pred_val = np.array(y_pred_val)
# y_true_val = np.array(y_true_val)
# y_pred_test = np.array(y_pred_test)
# y_true_test = np.array(y_true_test)

cost_based_analysis(logits_val, np.array(y_pred_val), np.array(y_true_val), logits_test, np.array(y_pred_test), np.array(y_true_test), res_path, logfile_name, Vr, Vc, Vw_list, confT_list)

In [70]:
df = pd.read_csv('data/costbased/'+"FN_"+"bedroom"+"_costBased_test.csv")
df

Unnamed: 0,Vr,Vc,Vw,k,t,value,rejected,wrong,t_optimal,value_optimal,rejected_opt,wrong_opt
0,0.0,1.0,0.0,-0.0,-0.000000,0.962222,0,17,0.00,0.962222,0,17
1,0.0,1.0,-0.1,0.1,0.090909,0.958444,0,17,0.00,0.958444,0,17
2,0.0,1.0,-0.2,0.2,0.166667,0.954667,0,17,0.00,0.954667,0,17
3,0.0,1.0,-0.3,0.3,0.230769,0.950889,0,17,0.00,0.950889,0,17
4,0.0,1.0,-0.4,0.4,0.285714,0.947111,0,17,0.00,0.947111,0,17
...,...,...,...,...,...,...,...,...,...,...,...,...
96,0.0,1.0,-9.6,9.6,0.905660,0.762222,107,0,0.74,0.853778,34,3
97,0.0,1.0,-9.7,9.7,0.906542,0.762222,107,0,0.74,0.853111,34,3
98,0.0,1.0,-9.8,9.8,0.907407,0.760000,108,0,0.74,0.852444,34,3
99,0.0,1.0,-9.9,9.9,0.908257,0.760000,108,0,0.74,0.851778,34,3


In [72]:
# np.save("data/logfile/"+"logits_val.npy",logits_val)
# np.save("data/logfile/"+"y_pred_val.npy",y_pred_val)
# np.save("data/logfile/"+"y_true_val.npy",y_true_val)
# np.save("data/logfile/"+"logits_test.npy",logits_test)
# np.save("data/logfile/"+"y_pred_test.npy",y_pred_test)
# np.save("data/logfile/"+"y_true_test.npy",y_true_test)
y_true_val = np.load("data/logfile/y_true_val_FN.npy")
y_pred_val = np.load("data/logfile/y_pred_val_FN.npy")
y_prob_val = np.load("data/logfile/y_prob_val_FN.npy")
y_true_test = np.load("data/logfile/y_true_test_FN.npy")
y_pred_test = np.load("data/logfile/y_pred_test_FN.npy")
y_prob_test = np.load("data/logfile/y_prob_test_FN.npy")
res_path = "data/logfile/"
bclass = "kindergarden_classroom"
#logfile_name = "FN_"+bclass
logfile_name = "FN_"+bclass
label = 'data/FN/category.txt'

logits_val, logits_test, y_pred_val, y_pred_test, y_true_val, y_true_test = evaluate_model(y_true_val, y_pred_val, y_prob_val, y_true_test, y_pred_test, y_prob_test, bclass, label, res_path, logfile_name)

y_pred_val = np.array(y_pred_val)
y_true_val = np.array(y_true_val)
y_pred_test = np.array(y_pred_test)
y_true_test = np.array(y_true_test)

Vr = 0.0
Vc = 1.0
Vw_list = list(np.arange(0, -10.1, -0.1))
t_list = list(np.arange(0, 1.01, 0.01))
t = 0
v = []
k1 = []
temp1 = []
for Vw in Vw_list:
        #  Vr_norm, Vc_norm, Vw_norm = normalize_value(Vr, Vc, Vw)
        #print("ch: {}, V: {}, fp_c: {} fn_c: {}".format(ch_norm, V_norm, fp_c_norm, fn_c_norm))
        k = ((-1)*(Vw / Vc))
        k1.append(k)
        t = cost_based_threshold(k)

        value_test, rej_test, wrong_test = calculate_value(logits_test, y_pred_test, y_true_test, t, Vw, Vc, Vr)

        t_optimal, cost_list = find_optimum_confidence_threshold(logits_val, y_pred_val, y_true_val, confT_list, Vw, Vc, Vr)
        #print("t_optimal: ", cost_list)
        value_test_opt, rej_test_opt, wrong_test_opt = calculate_value(logits_test, y_pred_test, y_true_test, np.float(t_optimal[0]), Vw, Vc, Vr)
        temp1.append(value_test_opt)

f1_test:  0.8805970149253731
ece_val:  0.03800384070789427
ace_val:  0.45422227833418827
ece_test:  0.022925100517216707
