In [None]:
from pathlib import Path
import json

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import networkx as nx

In [None]:
hyponym_file = Path("out/experiments/hearst/v1/hyponyms.json")
with open(hyponym_file, "r") as f:
    hyponyms = json.load(f)

In [None]:
G = nx.DiGraph()
for src, tgts in hyponyms.items():
    for tgt, count in tgts.items():
        if count > 1:
            G.add_edge(src, tgt, weight=count)

In [None]:
G.remove_nodes_from(["part"])

In [None]:
component_sizes = [len(c) for c in nx.weakly_connected_components(G)]
# component_sizes = [len(c) for c in nx.strongly_connected_components(G)]
component_sizes = pd.DataFrame(component_sizes, columns=["size"])
component_sizes.groupby("size").size().reset_index(name="count")

In [None]:
largest = max(nx.weakly_connected_components(G), key=len)
G_cat = nx.subgraph(G, largest)
component_sizes = [len(c) for c in nx.weakly_connected_components(G_cat)]
component_sizes = pd.DataFrame(component_sizes, columns=["size"])
component_sizes.groupby("size").size().reset_index(name="count")

In [None]:
nx.write_graphml(G_cat, "out/experiments/hearst/v1/hyponyms.graphml")

In [None]:
import random

# show random subgraphs
random_root = random.choice(list(G_cat.nodes))
while not (5 < len(random_subgraph := nx.ego_graph(G_cat, random_root, radius=2)) < 30):
    random_root = random.choice(list(G_cat.nodes))
# fig, ax = plt.subplots(figsize=(6, 6))
# nx.draw_networkx(random_subgraph, with_labels=True, ax=ax, pos=nx.circular_layout(random_subgraph))
# ax.set(title=f"Random subgraph of {random_root}")

print(random_root)
A = nx.drawing.nx_agraph.to_agraph(random_subgraph)
A.layout("fdp")
A.draw(f"out/experiments/hearst/v1/visualisation/{random_root}.png")
A