In [1]:
import json
import numpy as np
import matplotlib.pyplot as plt

In [2]:
with open("vocab.distances.json", "r") as f:
    vocab_distances = json.load(f)

In [3]:
scale = 1.0
offset = 0.0

id_name_mapping = {}
id_name_heuristic_mapping = {}

for row in vocab_distances:
    id_name_mapping[row["word_id"]] = row["word_name"]
    id_name_mapping[row["word_name"]] = row["word_id"]
    
    row["heuristic"] = 0.0
    for n in row["neighbors"]:
        row["heuristic"] += n["word_distance"]
        
    id_name_heuristic_mapping[row["word_id"]] = row["heuristic"]
    id_name_heuristic_mapping[row["word_name"]] = row["heuristic"]

In [4]:
minval = np.inf
maxval = -np.inf

for row in vocab_distances:
    calculated = id_name_heuristic_mapping[row["word_id"]]
    if calculated < minval:
        minval = calculated
        
offset = minval

for row in vocab_distances:
    calculated = id_name_heuristic_mapping[row["word_id"]] - offset
    if calculated > maxval:
        maxval = calculated
        
scale = maxval

In [5]:
with open(
        "neighbor.dump.txt",
        "w") as f:

    for row in vocab_distances:
        f.write("ID: %d | Word: \"%s\" | Heuristic: %.3f\n" % (
            id_name_mapping[row["word_name"]], 
            row["word_name"], 
            (id_name_heuristic_mapping[row["word_name"]] - offset) / scale))
        for n in row["neighbors"]:
            f.write("    Distance: %.3f | ID: %d | Word: \"%s\" | Heuristic: %.3f\n" % (
                n["word_distance"],
                id_name_mapping[n["word_name"]], 
                n["word_name"], 
                (id_name_heuristic_mapping[n["word_name"]] - offset) / scale))

In [6]:
heuristic_array = np.array([(id_name_heuristic_mapping[x["word_id"]] - offset) / scale for x in vocab_distances])
print(heuristic_array.max())

1.0


In [7]:
with open(
        "generality.score",
        "w") as f:
    for row in heuristic_array:
        f.write(str(row) + "\n")

In [8]:
np.savetxt("generality.heuristic.csv", heuristic_array)