In [4]:
from util import compute_dendrogram_skeleton
import json

In [8]:
# IMPORT INSTANCES
'''
    an instance is to be formatted like
    {
        "filename": string
        "index": int
        "features": int[]
    }
    
    but can also have additional information like
    {
        "filename": string
        "index": int
        "features": int[],
        "true_class": string
        "predicted_class": string
    }

    instances are an array of the instance as formatted above
'''
relative_folder = "./cifar10_model"
predictions_filename = "prediction_results-cifar10-resnet50-test.json"
def import_predictions(filepath):
    data = None
    with open(filepath, "r") as input_file:
        data = json.load(input_file)
    return data

resnet50_cifar10_data = import_predictions(f"{relative_folder}/{predictions_filename}")

instances = resnet50_cifar10_data["test_instances"]
classes = resnet50_cifar10_data["classes"]

In [9]:
def add_instance_class_info(dendrogram, instances):
    # add predictions information on the leaf nodes
    pred_key = "predicted_class"
    true_key = "true_class"
    for leaf in dendrogram.leaves:
        id = leaf["instance_index"]
        instance = instances[id]

        leaf[pred_key] = instance[pred_key]
        leaf[true_key] = instance[true_key]

# add accuracy and correct count to each node since we have prediction info
def add_acc(instances):
    def _add_acc(node):
        correct_count = 0
        for id in node["cluster"]:
            instance = instances[id]
            correct_count += int(instance["predicted_class"] == instance["true_class"])

        node["correct_count"] = correct_count
        node["accuracy"] = correct_count / len(node["cluster"])
    return _add_acc

In [10]:
# compute dendrogram and export for all instances
dendrogram = compute_dendrogram_skeleton(instances, top_similar=100) # necessary step to get the bare minimum dendrogram to work in dendromap
dendrogram.for_each_node(add_acc(instances))
add_instance_class_info(dendrogram, instances)

with open("../cifar10/clusters/cifar10_resnet50.json", "w") as outfile:
    data_export = {"tree": dendrogram.to_json(), "classes": classes}
    json.dump(data_export, outfile)

Extracting Features
Agglomerative Clustering
Constructing Dendrogram
Computing top 100 similar
Adding information to the leaf nodes
Done!


In [11]:
# filter instances for each class
def filter_instances_by_class(instances, classes):
    filter_map = {class_name: [] for class_name in classes}
    for instance in instances:
        class_name = instance["true_class"]
        filter_map[class_name].append(instance)
    return filter_map

filtered = filter_instances_by_class(instances, classes)
print(filtered.keys())
for [k, v] in filtered.items():
    print(k, len(v))
    

dict_keys(['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'])
airplane 1000
automobile 1000
bird 1000
cat 1000
deer 1000
dog 1000
frog 1000
horse 1000
ship 1000
truck 1000


In [16]:
# compute and export dendrogram for each filtered class
data_export = {}
for [class_name, filtered_instances] in filtered.items():
    print(f"\nCOMPUTING ON { class_name }")
    filtered_dendrogram = compute_dendrogram_skeleton(filtered_instances, top_similar=25)
    add_instance_class_info(filtered_dendrogram, filtered_instances)
    filtered_dendrogram.for_each_node(add_acc)
    data_export[class_name] = filtered_dendrogram.to_json()

with open("../cifar10/clusters/cifar10_resnet50_classes.json", "w") as outfile:
    data_export["classes"] = classes
    json.dump(data_export, outfile)


COMPUTING ON airplane
Extracting Features
Agglomerative Clustering
Constructing Dendrogram
Computing top 25 similar
Adding information to the leaf nodes
Done!

COMPUTING ON automobile
Extracting Features
Agglomerative Clustering
Constructing Dendrogram
Computing top 25 similar
Adding information to the leaf nodes
Done!

COMPUTING ON bird
Extracting Features
Agglomerative Clustering
Constructing Dendrogram
Computing top 25 similar
Adding information to the leaf nodes
Done!

COMPUTING ON cat
Extracting Features
Agglomerative Clustering
Constructing Dendrogram
Computing top 25 similar
Adding information to the leaf nodes
Done!

COMPUTING ON deer
Extracting Features
Agglomerative Clustering
Constructing Dendrogram
Computing top 25 similar
Adding information to the leaf nodes
Done!

COMPUTING ON dog
Extracting Features
Agglomerative Clustering
Constructing Dendrogram
Computing top 25 similar
Adding information to the leaf nodes
Done!

COMPUTING ON frog
Extracting Features
Agglomerative Clu