In [1]:
from dendromap_data import compute_dendrogram
import json

In [2]:
# IMPORT INSTANCES
'''
    an instance is to be formatted like
    {
        "filename": string
        "index": int
        "features": int[]
    }
    
    but can also have additional information like
    {
        "filename": string
        "index": int
        "features": int[],
        "true_class": string
        "predicted_class": string
    }

    instances are an array of the instance as formatted above
'''
relative_folder = "./cifar100_model"
predictions_filename = "prediction_results-cifar100-resnet50-test.json"
def import_predictions(filepath):
    data = None
    with open(filepath, "r") as input_file:
        data = json.load(input_file)
    return data

resnet50_cifar100_data = import_predictions(f"{relative_folder}/{predictions_filename}")

instances = resnet50_cifar100_data["test_instances"]
classes = resnet50_cifar100_data["classes"]

In [3]:
def add_instance_class_info(dendrogram, instances):
    # add predictions information on the leaf nodes
    pred_key = "predicted_class"
    true_key = "true_class"
    for leaf in dendrogram.leaves:
        id = leaf["instance_index"]
        instance = instances[id]

        leaf[pred_key] = instance[pred_key]
        leaf[true_key] = instance[true_key]

# add accuracy and correct count to each node since we have prediction info
def add_acc(instances):
    def _add_acc(node):
        correct_count = 0
        for id in node["cluster"]:
            instance = instances[id]
            correct_count += int(instance["predicted_class"] == instance["true_class"])

        node["correct_count"] = correct_count
        node["accuracy"] = correct_count / len(node["cluster"])
    return _add_acc

def add_prediction_info(dendrogram, instances):
    add_instance_class_info(dendrogram, instances)
    dendrogram.for_each_node(add_acc(instances))

In [4]:
# compute dendrogram and add extra info
dendrogram = compute_dendrogram(instances, top_similar=100) # necessary step to get the bare minimum dendrogram to work in dendromap
add_prediction_info(dendrogram, instances)

Extracting Features
Agglomerative Clustering
Constructing Dendrogram
Computing top 100 similar
Adding information to the leaf nodes
Done!


In [5]:
print(dendrogram.root.keys())
print(dendrogram.root["accuracy"])
print(dendrogram.root["correct_count"], "over", dendrogram.root["node_count"])

dict_keys(['leaf', 'node_index', 'parent', 'children', 'cluster', 'node_count', 'correct_count', 'accuracy'])
0.7625
7625 over 10000


In [6]:
# export the data so we can use it in the interface
with open("../public/cifar100/clusters/cifar100_resnet50.json", "w") as outfile:
    data_export = {"tree": dendrogram.to_json(), "classes": classes}
    json.dump(data_export, outfile)

In [7]:
# filter instances for each class
def filter_instances_by_class(instances, classes):
    filter_map = {class_name: [] for class_name in classes}
    for instance in instances:
        class_name = instance["true_class"]
        filter_map[class_name].append(instance)
    return filter_map

filtered = filter_instances_by_class(instances, classes)
print(filtered.keys())
for [k, v] in filtered.items():
    print(k, len(v))
    

dict_keys(['apple', 'aquarium_fish', 'baby', 'bear', 'beaver', 'bed', 'bee', 'beetle', 'bicycle', 'bottle', 'bowl', 'boy', 'bridge', 'bus', 'butterfly', 'camel', 'can', 'castle', 'caterpillar', 'cattle', 'chair', 'chimpanzee', 'clock', 'cloud', 'cockroach', 'couch', 'crab', 'crocodile', 'cup', 'dinosaur', 'dolphin', 'elephant', 'flatfish', 'forest', 'fox', 'girl', 'hamster', 'house', 'kangaroo', 'keyboard', 'lamp', 'lawn_mower', 'leopard', 'lion', 'lizard', 'lobster', 'man', 'maple_tree', 'motorcycle', 'mountain', 'mouse', 'mushroom', 'oak_tree', 'orange', 'orchid', 'otter', 'palm_tree', 'pear', 'pickup_truck', 'pine_tree', 'plain', 'plate', 'poppy', 'porcupine', 'possum', 'rabbit', 'raccoon', 'ray', 'road', 'rocket', 'rose', 'sea', 'seal', 'shark', 'shrew', 'skunk', 'skyscraper', 'snail', 'snake', 'spider', 'squirrel', 'streetcar', 'sunflower', 'sweet_pepper', 'table', 'tank', 'telephone', 'television', 'tiger', 'tractor', 'train', 'trout', 'tulip', 'turtle', 'wardrobe', 'whale', 'wil

In [8]:
# compute and export dendrogram for each filtered class
data_export = {}
for [class_name, filtered_instances] in filtered.items():
    print(f"\nCOMPUTING ON { class_name }")
    filtered_dendrogram = compute_dendrogram(filtered_instances, top_similar=25)
    add_prediction_info(filtered_dendrogram, filtered_instances)
    data_export[class_name] = filtered_dendrogram.to_json()

with open("../public/cifar100/clusters/cifar100_resnet50_classes.json", "w") as outfile:
    data_export["classes"] = classes
    json.dump(data_export, outfile)


COMPUTING ON apple
Extracting Features
Agglomerative Clustering
Constructing Dendrogram
Computing top 25 similar
Adding information to the leaf nodes
Done!

COMPUTING ON aquarium_fish
Extracting Features
Agglomerative Clustering
Constructing Dendrogram
Computing top 25 similar
Adding information to the leaf nodes
Done!

COMPUTING ON baby
Extracting Features
Agglomerative Clustering
Constructing Dendrogram
Computing top 25 similar
Adding information to the leaf nodes
Done!

COMPUTING ON bear
Extracting Features
Agglomerative Clustering
Constructing Dendrogram
Computing top 25 similar
Adding information to the leaf nodes
Done!

COMPUTING ON beaver
Extracting Features
Agglomerative Clustering
Constructing Dendrogram
Computing top 25 similar
Adding information to the leaf nodes
Done!

COMPUTING ON bed
Extracting Features
Agglomerative Clustering
Constructing Dendrogram
Computing top 25 similar
Adding information to the leaf nodes
Done!

COMPUTING ON bee
Extracting Features
Agglomerative C