In [18]:
import numpy as np
import pprint as pp
import os
import pickle

In [19]:
def write_yaml_file(file, data):
    with open(file, "w") as f:
        f.writelines(x + '\n' for x in data)

In [20]:
def load_pkl_dict(filename, folder):
    print(f"Loaded_file {filename}")
    with open(folder + filename, "rb") as f:
        loaded_file = pickle.load(f)
        return loaded_file

In [21]:
def write_pkl(filename,data, folder):
    with open(folder + filename, "wb") as f:
        pickle.dump(data, f)

In [22]:
def dict_to_file(options, path):
    data = []
    for opt in options.keys():
        data.append(f"{opt}: {str(options[opt])}")
    
    if options['type'] == "base":
        file_name = f"params_{options['type']}_{options['alphas'][0]}_{options['negative_fix'][0]}_{options['normalize_factor']}_{options['tail_sizes'][0]}-{options['tail_sizes'][-1]}_{options['distance_multipls'][0]}-{options['distance_multipls'][-1]}.yaml"
    else:
        file_name = f"params_{options['type']}_{options['alphas'][0]}_{options['negative_fix']}_{options['normalize_factor']}_{options['tail_sizes'][0]}_{options['distance_multipls'][0]}_{options['num_clusters_per_class_input'][0]}-{options['num_clusters_per_class_input'][-1]}_{options['num_clusters_per_class_features'][0]}-{options['num_clusters_per_class_features'][-1]}.yaml"

    write_yaml_file(path + file_name, data)

In [23]:
options = {
    "type": "" ,
    "learning_rate": 0.01,
    "momentum": 0.9,
    "epochs": 1,
    "batch_size": 32,
    "num_clusters_per_class_input": [],
    "num_clusters_per_class_features": [],
    "alphas": "",
    "tail_sizes" : [],
    "distance_multipls": [],
    "negative_fix": "",
    "normalize_factor": "",
    "logger_output": "true",
    "train_only": "false",
    "eval_only": "true",
    "log_dir": "./logs/",
    "saved_models_dir": "./saved_models/pytorch_models/",
    "saved_network_output_dir": "./saved_models/network_outputs/",
    "experiment_data_dir": "./experiment_data/",
    "emnist_dir": "./downloads/",
    "thresholds": [0.0001, 0.001, 0.01, 0.1, 0.2, 0.5, 0.7, 1.0]
}

In [24]:
types = ["base", "input-cluster", "validation-features-cluster", "training-features-cluster", "input-validation-features-cluster", "input-validation-features-cluster"]
negative_fixes = ["ORIGINAL", "NEGATIVE_VALUE", "VALUE_SHIFT"]
norm = ["NONE", "WEIGHTS", "N-CLASSES", "NORM-WEIGHTS"]
path = "../configs/base/"

## Experiment 1

In [25]:
typ = types[0]
alphas = [-1, 3, 5, 7, 10]
tail_sizes = np.arange(3000, 7100, 100).tolist()
n = 10
per_file_tail_sizes = [tail_sizes[i:i + n] for i in range(0, len(tail_sizes), n)]
print(per_file_tail_sizes)
dist_multpl = [1.05] + np.arange(1, 5.1, 0.1).tolist()
n = 5
per_file_dist = [np.round(dist_multpl[i:i + n],2).tolist() for i in range(0, len(dist_multpl), n)]
print(per_file_dist)

[[3000, 3100, 3200, 3300, 3400, 3500, 3600, 3700, 3800, 3900], [4000, 4100, 4200, 4300, 4400, 4500, 4600, 4700, 4800, 4900], [5000, 5100, 5200, 5300, 5400, 5500, 5600, 5700, 5800, 5900], [6000, 6100, 6200, 6300, 6400, 6500, 6600, 6700, 6800, 6900], [7000]]
[[1.05, 1.0, 1.1, 1.2, 1.3], [1.4, 1.5, 1.6, 1.7, 1.8], [1.9, 2.0, 2.1, 2.2, 2.3], [2.4, 2.5, 2.6, 2.7, 2.8], [2.9, 3.0, 3.1, 3.2, 3.3], [3.4, 3.5, 3.6, 3.7, 3.8], [3.9, 4.0, 4.1, 4.2, 4.3], [4.4, 4.5, 4.6, 4.7, 4.8], [4.9, 5.0]]


In [26]:
def generate():
    for a in alphas:
        for nf in negative_fixes:
            for no in norm:
                for ts in per_file_tail_sizes:
                    for d in per_file_dist:
                        for n in norm:
                            options['type'] = typ
                            options['alphas'] = [a]
                            options['tail_sizes'] = ts
                            options['distance_multipls'] = d
                            options['negative_fix'] = [nf]
                            options['normalize_factor'] = n
                            dict_to_file(options, path)

In [27]:
def condense_exp(alphas, negative_fixes, norms):
    ls = os.listdir('../../experiment_data/batch04/')
    keys_of_interest = ["ACC", "CCR-FPR", "GAMMA", "EPSILON"]
    folder = "../../experiment_data/batch04/"
    folder_write = "../../experiment_data/results/"
    for alpha in alphas:
        for negative_fix in negative_fixes:
            for norm in norms:
                prefix = f"oscr_data_base_1_1_{alpha}_{negative_fix}_{norm}"
                files = [filename for filename in ls if filename.startswith(prefix)]
                
                condensed_dict = {}
                for k in keys_of_interest:
                    condensed_dict[k] = {}
                    
                for file in files:
                    if file != prefix + ".pkl":
                        loaded_file = load_pkl_dict(file, folder)
                        for key in keys_of_interest:
                            condensed_dict[key] = {**condensed_dict[key], **loaded_file[key]}
                write_pkl(prefix + ".pkl", condensed_dict, folder_write)
    print(sorted(condensed_dict[keys_of_interest[0]].keys()))
                
        

In [28]:
#condense_exp(alphas, negative_fixes, norm)
#generate()

# Experiment 2

In [29]:
types = ["input-cluster", "validation-features-cluster", "training-features-cluster", "input-validation-features-cluster", "input-training-features-cluster"]
print(types)
alpha = [-1]

tail_size = [6000]
dist_multpl = [1.3]
normy = norm[2]
fix = negative_fixes[1]
print(fix)

input_cluster = np.arange(2, 11, 1).tolist()
feature_cluster = np.arange(2, 11, 1).tolist()

n = 3
per_file_input = [input_cluster[i:i + n] for i in range(0, len(input_cluster), n)]
per_file_feature = [feature_cluster[i:i + n] for i in range(0, len(feature_cluster), n)]
print(per_file_input)
print(per_file_feature)

feature_clusters = np.arange(2, 10, 1)
path = "../configs/cluster/"




['input-cluster', 'validation-features-cluster', 'training-features-cluster', 'input-validation-features-cluster', 'input-training-features-cluster']
NEGATIVE_VALUE
[[2, 3, 4], [5, 6, 7], [8, 9, 10]]
[[2, 3, 4], [5, 6, 7], [8, 9, 10]]


In [30]:
def generate_cluster():
    for t in types:
        for i in per_file_input:
            for f in per_file_feature:
                options['type'] = t
                options['alphas'] = alpha
                options['tail_sizes'] = tail_size
                options['distance_multipls'] = dist_multpl
                options['negative_fix'] = fix
                options['normalize_factor'] = normy
                options['num_clusters_per_class_input'] = i if "input" in t else [1]
                options['num_clusters_per_class_features'] = f if "features" in t else [1]
                options['negative_fix'] = fix
                if t == "input-cluster": options['eval_only'] = "false"
                dict_to_file(options, path)

In [31]:
generate_cluster()