In [55]:
import glob
import os
import csv
import random

import pandas as pd
df = pd.DataFrame(columns=['metrics', 'model', 'time','apfd','rgd'])
df['metrics']=range(29)
row=0

# 获得apfd figure 表格,用于图像绘制

class Item:
    def __init__(self, item, header, mode, metric):
        self.item = item
        self.header = header
        self.mode = mode
        self.metric = metric

    def get_order(self):
        order = int(self.item[self.header[self.mode]])
        if order == 0 and self.mode == "cam":
            if "ctm" in self.header:
                if self.metric == "lsc" or self.metric == "dsc":
                    order = 500000 + random.randint(1, 500000)
                else:
                    order = 500000 + int(self.item[self.header["ctm"]])
            else:
                print("!!!!!! cam has order 0, but the sheet does not have ctm")
                order = 500000
        return order

    def get_best_order(self):
        right = int(self.item[self.header["right"]])
        if right == 1:
            return 1000
        else:
            return 0

    def get_worst_order(self):
        right = int(self.item[self.header["right"]])
        if right == 1:
            return 0
        else:
            return 1000


def get_order(item):
    return item.get_order()


def get_best_order(item):
    return item.get_best_order()


def get_worst_order(item):
    return item.get_worst_order()


metric_index = {
    "nac": 0,
    "nbc": 1,
    "deepgini": 2,
    "tknc": 3,
    "dsc": 4,
    "random": 5,
    "deeplogic": 6,
    "entropy": 7,
    "mcp":8
}

metric_conf = [
    ["cam"],
    ["cam"],
    ["cam"],
    ["cam"],
    ["cam"],
    ["cam"],
    ["cam"],
    ["cam"],
    ["cam"]
]


def calc_apfd(items):
    n_tests = len(items)
    sigma_o = 0
    k_mis_tests = 0
    o = 0
    for i in items:
        o = o + 1
        if int(i.item[i.header["right"]]) == 0:
            sigma_o = sigma_o + o
            k_mis_tests = k_mis_tests + 1

    apfd = 1 - (1.0 * sigma_o / (k_mis_tests * n_tests)) + 1.0 / (2 * n_tests)
    return apfd


def best(items):
    items.sort(key=get_best_order)
    return calc_apfd(items)


def worst(items):
    items.sort(key=get_worst_order)
    return calc_apfd(items)


def get_apfd(inputfile, method, sortmode, metric, verbose):
    items = []
    header_map = {}
    csv_file = csv.reader(open(inputfile, 'r'))
    i = 0
    for line in csv_file:
        if i == 0:
            i += 1
            j = 0
            for x in line:
                header_map[x] = j
                j += 1
            if sortmode not in header_map.keys():
                print("=======================================")
                print(method + " does not have mode " + sortmode)
                print("=======================================")
                return None, None, None
            if "right" not in header_map.keys():
                print("=======================================")
                print(method + " does not col right")
                print("=======================================")
                return None, None, None
        else:
            items.append(Item(line, header_map, sortmode, metric))

    best_apfd = best(items)
    worst_apfd = worst(items)

    items.sort(key=get_order)
    orig_apfd = calc_apfd(items)

    norm_apfd = (orig_apfd - worst_apfd) / (best_apfd - worst_apfd)
    if verbose:
        print("best : " + str(best_apfd))
        print("worst : " + str(worst_apfd))

        print(sortmode + " orig apfd : " + str(orig_apfd))
        print(sortmode + " norm apfd : " + str(norm_apfd))
    return norm_apfd, items, header_map


def compute(csvname, abspath, outputdir="", to_csv=False, verbose=False,model_name='vgg16'):
    conf = csvname.split("_")
    dataset = conf[0]
    withadv = conf[1] == "adv"
    if withadv:
        metric = conf[2].lower()
        metric_param = "_".join(conf[3:])
    else:
        metric = conf[1].lower()
        metric_param = "_".join(conf[2:])
    if verbose:
        print("dataset: " + dataset + "; withadv: " + str(withadv) + "; metric: " + metric + "; param: " + metric_param)

    inputfile = abspath
    sortmodes = metric_conf[metric_index[metric]]
    res = {"cam": "N/A", "ctm": "N/A"}
    for sortmode in sortmodes:
        method = sortmode + "_" + os.path.basename(inputfile)
        outputfile = outputdir + method

        # if metric == "kmnc" and sortmode == "cam" and withadv == True:
        #     # continue
        #     print(1)
        norm_apfd, items, header_map = get_apfd(inputfile, method, sortmode, metric, verbose)
        if norm_apfd is None:
            continue
        res[sortmode] = norm_apfd
    
    global row
    global df
    df['metrics'].loc[row] = metric
    df['model'].loc[row] = model_name
    df['time'].loc[row]= 3
    df['apfd'].loc[row] = norm_apfd
    df['rgd'].loc[row] = 0.45
    row+=1
    return res


In [56]:


input_base_path = "./all_output"
output_base_path = "./result/apfd_figure_csv"
dir_list = ["output_cifar","output_fashionminist","output_imagenet"]
for path_dir in dir_list:
    dataset_name = os.path.basename(path_dir)[7:]
    lst = glob.glob(input_base_path + '/' + path_dir + '/*')
    for inputdir in lst:  # 遍历每个模型
        model_name = os.path.basename(inputdir)
        outputdir = output_base_path + "/" + dataset_name + "/" + model_name + "/"
        if not os.path.exists(outputdir):
            os.makedirs(outputdir)
        print(inputdir, outputdir)
        for filename in os.listdir(inputdir):
            if filename.endswith(".csv"):
                abspath = os.path.join(inputdir, filename)
                print("analyzing " + filename + "...")
                res = compute(filename, abspath, outputdir=outputdir, to_csv=False, verbose=True,model_name=model_name)
                print(res)

./all_output/output_cifar/vgg16 ./result/apfd_figure_csv/cifar/vgg16/
analyzing cifar_deeplogic_0.csv...
dataset: cifar; withadv: False; metric: deeplogic; param: 0.csv
best : 0.8428786057692308
worst : 0.1571213942307692
cam orig apfd : 0.8132472828585404
cam norm apfd : 0.9567903590190265
{'cam': 0.9567903590190265, 'ctm': 'N/A'}
analyzing cifar_mcp_0.csv...
dataset: cifar; withadv: False; metric: mcp; param: 0.csv
best : 0.8428786057692308
worst : 0.1571213942307692
cam orig apfd : 0.7006147012500552
cam norm apfd : 0.7925447926387624


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


{'cam': 0.7925447926387624, 'ctm': 'N/A'}
analyzing cifar_random_0.csv...
dataset: cifar; withadv: False; metric: random; param: 0.csv
best : 0.8428786057692308
worst : 0.1571213942307692
cam orig apfd : 0.50144294554592
cam norm apfd : 0.5021041638668047
{'cam': 0.5021041638668047, 'ctm': 'N/A'}
analyzing cifar_deepgini_0.csv...
dataset: cifar; withadv: False; metric: deepgini; param: 0.csv
best : 0.8428786057692308
worst : 0.1571213942307692
cam orig apfd : 0.7863755445316063
cam norm apfd : 0.9176048603107465
{'cam': 0.9176048603107465, 'ctm': 'N/A'}
analyzing cifar_entropy_0.csv...
dataset: cifar; withadv: False; metric: entropy; param: 0.csv
best : 0.8428786057692308
worst : 0.1571213942307692
cam orig apfd : 0.7861156450435441
cam norm apfd : 0.9172258639492221
{'cam': 0.9172258639492221, 'ctm': 'N/A'}
analyzing cifar_dsc_0.csv...
dataset: cifar; withadv: False; metric: dsc; param: 0.csv
best : 0.8770132211538462
worst : 0.12298677884615383
cam orig apfd : 0.7486062405443628
cam 

In [57]:
df.sort_values("metrics",inplace=True)

TypeError: '<' not supported between instances of 'int' and 'str'

In [58]:
df

Unnamed: 0,metrics,model,time,apfd,rgd
0,deeplogic,vgg16,3.0,0.95679,0.45
1,mcp,vgg16,3.0,0.792545,0.45
2,random,vgg16,3.0,0.502104,0.45
3,deepgini,vgg16,3.0,0.917605,0.45
4,entropy,vgg16,3.0,0.917226,0.45
5,dsc,vgg16,3.0,0.829705,0.45
6,nbc,vgg16,3.0,0.807633,0.45
7,nac,vgg16,3.0,0.807633,0.45
8,deeplogic,resnet34,3.0,0.963424,0.45
9,mcp,resnet34,3.0,0.639131,0.45
