In [17]:
import json
import networkx as nx
from community import community_louvain

# Load the data from output1.json
with open('./output1.json', 'r') as f:
    data = json.load(f)

# Initialize nodes and links
nodes = []
links = []


def check_node_or_add(node):
    if any(node['id'] == n['id'] for n in nodes):
        for n in nodes:
            if n['id'] == node['id']:
                n['connections'] += 1
    else:
        nodes.append(node)

def check_link_or_add(link):
    if any(link['source'] == l['source'] and link['target'] == l['target'] for l in links):
        for l in links:
            if l['source'] == link['source'] and l['target'] == link['target']:
                l['value'] += 1
    else:
        links.append(link)        

# Iterate over all keys in the JSON object
for key in data.keys():
    temp_item = {"id": key, "group": 1, "connections": len(data[key])}
    

    check_node_or_add(temp_item)
    for item in data[key]:
        temp_item = {"id": item['word'], "group": 1, "connections": 1}
        check_node_or_add(temp_item)
        temp_link = {"source": key, "target": item['word'], "value": 1}
        check_link_or_add(temp_link)

# sort the nodes by connections
nodes = sorted(nodes, key=lambda k: k['connections'], reverse=True)
links = sorted(links, key=lambda k: k['value'], reverse=True)

print("Number of nodes: ", len(nodes))
print(nodes)

print("Number of links: ", len(links))
print(links)
# Create a graph from the nodes and links
G = nx.Graph()

# Add nodes to the graph
for node in nodes:
    G.add_node(node['id'])

# Add edges to the graph
for link in links:
    G.add_edge(link['source'], link['target'])

# Detect communities
communities = community_louvain.best_partition(G)

# Add community information to nodes
for node in nodes:
    node['group'] = communities[node['id']]

# Now, nodes are grouped based on their interrelated connections
print(nodes)

# save the nodes and links to a JSON file
with open('formatted_output.json', 'w') as f:
    json.dump({"nodes": nodes, "links": links}, f)



Number of nodes:  353
[{'id': 'brain', 'group': 1, 'connections': 283}, {'id': 'adolesc', 'group': 1, 'connections': 5}, {'id': 'develop', 'group': 1, 'connections': 5}, {'id': 'chang', 'group': 1, 'connections': 5}, {'id': 'activ', 'group': 1, 'connections': 5}, {'id': 'prune', 'group': 1, 'connections': 5}, {'id': 'myelin', 'group': 1, 'connections': 5}, {'id': 'c', 'group': 1, 'connections': 4}, {'id': 'matur', 'group': 1, 'connections': 4}, {'id': 'function', 'group': 1, 'connections': 4}, {'id': 'examin', 'group': 1, 'connections': 4}, {'id': 'neuron', 'group': 1, 'connections': 4}, {'id': 'synapt', 'group': 1, 'connections': 4}, {'id': 'make', 'group': 1, 'connections': 4}, {'id': 'call', 'group': 1, 'connections': 4}, {'id': 'experi', 'group': 1, 'connections': 4}, {'id': 'cognit', 'group': 1, 'connections': 4}, {'id': 'time', 'group': 1, 'connections': 3}, {'id': 'ofimport', 'group': 1, 'connections': 3}, {'id': 'structur', 'group': 1, 'connections': 3}, {'id': 'scientist', 'gr