In [1]:
import numpy as np
from pprint import pprint
import os
import pickle
import glob
import io
import torch

In [2]:
def write_yaml_file(file, data):
    with open(file, "w") as f:
        f.writelines(x + '\n' for x in data)

In [3]:
def load_pkl_dict(filename, folder):
    print(f"Loaded_file {filename}")
    with open(folder + filename, "rb") as f:
        loaded_file = pickle.load(f)
        return loaded_file

In [4]:
def write_pkl(filename,data, folder):
    with open(folder + filename + ".pkl", "wb") as f:
        pickle.dump(data, f)

In [5]:
def dict_to_file(options, path):
    data = []
    for opt in options.keys():
        data.append(f"{opt}: {str(options[opt])}")
    
    if options['type'] == "base":
        file_name = f"params_{options['dataset']}_{options['type']}_{options['alphas'][0]}_{options['negative_fix'][0]}_{options['normalize_factor']}_{options['tail_sizes'][0]}-{options['tail_sizes'][-1]}_{options['distance_multipls'][0]}-{options['distance_multipls'][-1]}.yaml"
    else:
        file_name = f"params_{options['dataset']}_{options['type']}_{options['alphas'][0]}_{options['negative_fix'][0]}_{options['normalize_factor']}_{options['tail_sizes'][0]}_{options['distance_multipls'][0]}_{options['num_clusters_per_class_input'][0]}-{options['num_clusters_per_class_input'][-1]}_{options['num_clusters_per_class_features'][0]}-{options['num_clusters_per_class_features'][-1]}.yaml"

    write_yaml_file(path + file_name, data)

In [6]:
options = {
    "type": "" ,
    "dataset": "EMNIST",
    "learning_rate": 0.01,
    "momentum": 0.9,
    "epochs": 1,
    "batch_size": 32,
    "num_clusters_per_class_input": [],
    "num_clusters_per_class_features": [],
    "alphas": "",
    "tail_sizes" : [],
    "distance_multipls": [],
    "negative_fix": [],
    "normalize_factor": "",
    "logger_output": "false",
    "run_model": "false",
    "post_process": "true",
    "precomputed_clusters": "true",
    "log_dir": "./logs/",
    "saved_models_dir": "./saved_models/pytorch_models/",
    "saved_network_output_dir": "./saved_models/network_outputs/",
    "experiment_data_dir": "./experiment_data/",
    "clusters_dir": "./saved_models/clusters/",
    "emnist_dir": "./downloads/",
    "thresholds": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
}

In [7]:
types = ["base", "input-cluster", "validation-features-cluster", "training-features-cluster", "input-validation-features-cluster", "input-validation-features-cluster"]
negative_fixes = ["ORIGINAL", "VALUE_SHIFT", "ADJUSTED_NEGATIVE_VALUE"]
norm = ["NONE", "WEIGHTS", "N-CLASSES"]
datasets = ["CIFAR"]
path = "../configs/base/"

## Experiment 1

In [8]:
typ = types[0]
alphas = [-1, 3, 5, 7, 10]
tail_sizes = [10, 100, 250, 500, 750, 1000]
n = 7
per_file_tail_sizes = [tail_sizes[i:i + n] for i in range(0, len(tail_sizes), n)]
print(per_file_tail_sizes)
dist_multpl = [1.0, 1.25, 1.5, 1.7, 2.0, 2.3, 3.0, 4.0]
n = 8
per_file_dist = [np.round(dist_multpl[i:i + n],2).tolist() for i in range(0, len(dist_multpl), n)]
print(per_file_dist)

print(len(per_file_dist), len(per_file_dist), len(alphas), len(negative_fixes), len(norm), len(datasets) )
print(len(per_file_tail_sizes) * len(per_file_dist) * len(alphas) * len(negative_fixes) * len(norm) * len(datasets))

[[10, 100, 250, 500, 750, 1000]]
[[1.0, 1.25, 1.5, 1.7, 2.0, 2.3, 3.0, 4.0]]
1 1 5 3 3 1
45


In [9]:
def generate():
    for ds in datasets:
        for a in alphas:
            for nf in negative_fixes:
                for no in norm:
                    for ts in per_file_tail_sizes:
                        for d in per_file_dist:
                            for n in norm:
                                options['type'] = typ
                                options['alphas'] = [a]
                                options['tail_sizes'] = ts
                                options['distance_multipls'] = d
                                options['negative_fix'] = [nf]
                                options['normalize_factor'] = n
                                options['dataset'] = ds
                                dict_to_file(options, path)

In [10]:
def condense_exp(alphas, negative_fixes, norms, dataset):
    ls = os.listdir('../experiment_data/output/base/cifar/')
    keys_of_interest = ["ACC", "CCR-FPR", "GAMMA", "EPSILON"]
    folder = "../experiment_data/output/base/cifar/"
    folder_write = "../experiment_data/condensed/base/cifar/"
    for alpha in alphas:
        for negative_fix in negative_fixes:
            for norm in norms:
                prefix = f"oscr_data_base_{dataset}_1_1_{alpha}_{negative_fix}_{norm}"
                files = [filename for filename in ls if filename.startswith(prefix)]
                condensed_dict = {}
                for k in keys_of_interest:
                    condensed_dict[k] = {}
                    
                for file in files:
                    if file != prefix + ".pkl":
                        loaded_file = load_pkl_dict(file, folder)
                        for key in keys_of_interest:
                            condensed_dict[key] = {**condensed_dict[key], **loaded_file[key]}
                write_pkl(prefix + ".pkl", condensed_dict, folder_write)
    print(sorted(condensed_dict[keys_of_interest[0]].keys()))

In [11]:
#condense_exp(alphas, negative_fixes, norm, "CIFAR")
#generate()

# Experiment 2

In [12]:
def condense_exp_cluster(model_types, alphas):
    folder = "../experiment_data/final/emnist/all-clusters/"
    ls = os.listdir(folder)
    keys_of_interest = ["ACC", "CCR-FPR", "GAMMA", "EPSILON"]
    keys_of_interest_integers = ["INPUT-CLUSTER", "FEATURES-CLUSTER"]
    folder_write = "../experiment_data/condensed/clusters/emnist/"
    for a in alphas:
        for typ in model_types:
            prefix = f"oscr_data_{typ}"
            files = [filename for filename in ls if filename.startswith(prefix) and f"_{a}_ORIGINAL" in filename]

            condensed_dict = {}
            for k in keys_of_interest + keys_of_interest_integers:
                condensed_dict[k] = {}

            for file in files:
                loaded_file = load_pkl_dict(file, folder)
                for key in keys_of_interest:
                    condensed_dict[key] = {**condensed_dict[key], **loaded_file[key]}
                for key in keys_of_interest_integers:
                    condensed_dict[key] = {**condensed_dict[key], **{list(loaded_file["ACC"].keys())[0]: loaded_file[key]}}

            write_pkl(prefix + f"_{a}", condensed_dict, folder_write)
    print(sorted(condensed_dict[keys_of_interest[0]].keys()))

In [13]:
types = ["input-cluster", "validation-features-cluster", "training-features-cluster", "input-validation-features-cluster", "input-training-features-cluster"]
negative_fixes = ["ORIGINAL"]
norm = ["NONE"]
datasets = ["EMNIST"]
#types = ["input-cluster", "input-validation-features-cluster"]
#types = ["input-training-features-cluster"]
#types = ["validation-features-cluster", "training-features-cluster"]

print(types)
alphas = [3, 5, 7, 10]

tail_sizes = [10, 100, 250, 500, 750, 1000]
dist_multpl = [-0.5, -0.2, 0.2, 0.5, 0.7] + [1.0, 1.25, 1.5, 1.7, 2.0, 2.3, 3.0, 4.0]
input_cluster = np.arange(2, 8, 1).tolist()
feature_cluster = np.arange(2, 8, 1).tolist()

c = 1
fc = 1
n = 3
m = 3
per_file_input = [input_cluster[i:i + c] for i in range(0, len(input_cluster), c)]
per_file_feature = [feature_cluster[i:i + fc] for i in range(0, len(feature_cluster), fc)]
per_file_dist = [np.round(dist_multpl[i:i + m],2).tolist() for i in range(0, len(dist_multpl), m)]
per_file_tail_sizes = [tail_sizes[i:i + n] for i in range(0, len(tail_sizes), n)]


print(per_file_input, per_file_feature, per_file_dist, per_file_tail_sizes)
print(len(per_file_input)*len(per_file_feature) * len(per_file_dist) * len(per_file_tail_sizes)*len(alphas))

feature_clusters = np.arange(2, 8, 1)
path = "../configs/cluster/"

['input-cluster', 'validation-features-cluster', 'training-features-cluster', 'input-validation-features-cluster', 'input-training-features-cluster']
[[2], [3], [4], [5], [6], [7]] [[2], [3], [4], [5], [6], [7]] [[-0.5, -0.2, 0.2], [0.5, 0.7, 1.0], [1.25, 1.5, 1.7], [2.0, 2.3, 3.0], [4.0]] [[10, 100, 250], [500, 750, 1000]]
1440


In [14]:
def generate_cluster():
    for t in types:
        for i in per_file_input:
            for f in per_file_feature:
                for ds in datasets:
                    for a in alphas:
                        for nf in negative_fixes:
                            for no in norm:
                                for ts in per_file_tail_sizes:
                                    for d in per_file_dist:
                                        for n in norm:
                                            options['type'] = t
                                            options['alphas'] = [a]
                                            options['tail_sizes'] = ts
                                            options['distance_multipls'] = d
                                            options['negative_fix'] = [nf]
                                            options['normalize_factor'] = no
                                            options['num_clusters_per_class_input'] = i if "input" in t else [1]
                                            options['num_clusters_per_class_features'] = f if "features" in t else [1]
                                            options['dataset'] = ds
                                            options['precomputed_clusters'] = "false" if t == "input-cluster" else "true"
                                            dict_to_file(options, path)

In [15]:
#generate_cluster()
condense_exp_cluster(types, alphas)

Loaded_file oscr_data_input-cluster_EMNIST_7_1_3_ORIGINAL_NONE_22-12-2023_09-39-22.pkl
Loaded_file oscr_data_input-cluster_EMNIST_2_1_3_ORIGINAL_NONE_22-12-2023_09-38-47.pkl
Loaded_file oscr_data_input-cluster_EMNIST_4_1_3_ORIGINAL_NONE_22-12-2023_09-41-09.pkl
Loaded_file oscr_data_input-cluster_EMNIST_4_1_3_ORIGINAL_NONE_22-12-2023_09-40-25.pkl
Loaded_file oscr_data_input-cluster_EMNIST_5_1_3_ORIGINAL_NONE_22-12-2023_09-39-17.pkl
Loaded_file oscr_data_input-cluster_EMNIST_2_1_3_ORIGINAL_NONE_22-12-2023_09-40-17.pkl
Loaded_file oscr_data_input-cluster_EMNIST_5_1_3_ORIGINAL_NONE_22-12-2023_09-40-53.pkl
Loaded_file oscr_data_input-cluster_EMNIST_2_1_3_ORIGINAL_NONE_22-12-2023_09-39-26.pkl
Loaded_file oscr_data_input-cluster_EMNIST_3_1_3_ORIGINAL_NONE_22-12-2023_09-38-51.pkl
Loaded_file oscr_data_input-cluster_EMNIST_3_1_3_ORIGINAL_NONE_22-12-2023_09-39-12.pkl
Loaded_file oscr_data_input-cluster_EMNIST_5_1_3_ORIGINAL_NONE_22-12-2023_09-41-11.pkl
Loaded_file oscr_data_input-cluster_EMNIST_

In [16]:
class CPU_Unpickler(pickle.Unpickler):
    def find_class(self, module, name):
        if module == "torch.storage" and name == "_load_from_bytes":
            return lambda b: torch.load(io.BytesIO(b), map_location="cpu")
        else:
            return super().find_class(module, name)


def load_network_output(path, model_name, special=False):
    file_ = path + "dnn_output_" + f"{model_name}" + ".pkl"

    with open(file_, "rb") as f:
        loaded_file = CPU_Unpickler(f).load()
        # loaded_file = pickle.load(f)
        return loaded_file

In [17]:
folder = "../saved_models/network_outputs/"
out = load_network_output(folder, "openmax_cnn_emnist_cluster-20-input_train", True)
o = 3
print(sorted(out[o].keys()))
for key in out[o]:
    print(out[o][key].shape)

[-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
torch.Size([1000, 500])
torch.Size([1000, 500])
torch.Size([1000, 500])
torch.Size([1000, 500])
torch.Size([1000, 500])
torch.Size([1000, 500])
torch.Size([1000, 500])
torch.Size([1000, 500])
torch.Size([1000, 500])
torch.Size([1000, 500])
torch.Size([8800, 500])


In [18]:
folder = "../saved_models/network_outputs/"
out = load_network_output(folder, "openmax_cnn_emnist0", False)
print(sorted(out[o].keys()))
for key in out[o]:
    print(out[o][key].shape)

[-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
torch.Size([1000, 500])
torch.Size([1000, 500])
torch.Size([1000, 500])
torch.Size([1000, 500])
torch.Size([1000, 500])
torch.Size([1000, 500])
torch.Size([1000, 500])
torch.Size([1000, 500])
torch.Size([1000, 500])
torch.Size([1000, 500])
torch.Size([8800, 500])


In [19]:
folder = "../saved_models/clusters/"
out = load_pkl_dict("dnn_cluster_openmax_cnn_emnist_cluster-20_training_2.pkl", folder)
print(sorted(out.keys()))
for key in out:
    print(out[key].shape)

Loaded_file dnn_cluster_openmax_cnn_emnist_cluster-20_training_2.pkl
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
torch.Size([1615, 500])
torch.Size([3499, 500])
torch.Size([2333, 500])
torch.Size([3011, 500])
torch.Size([3158, 500])
torch.Size([1679, 500])
torch.Size([3338, 500])
torch.Size([749, 500])
torch.Size([1191, 500])
torch.Size([2125, 500])
torch.Size([2209, 500])
torch.Size([2780, 500])
torch.Size([2437, 500])
torch.Size([1472, 500])
torch.Size([2753, 500])
torch.Size([1915, 500])
torch.Size([1114, 500])
torch.Size([1719, 500])
torch.Size([1505, 500])
torch.Size([927, 500])
