# Analyse KNOWMAK

In this notebook, we will perform analysis on the KNOWMAK classification scheme. 

First, we will import the necessary libraries and modules, and then proceed with the analysis.

### Importing Libraries

We start by importing the required libraries for our analysis. These include:

- `deque` from the `collections` module for implementing a queue
- `pandas` for data manipulation and analysis

In [3]:
import json
from collections import deque
import pandas as pd

In [None]:
input_file = "KNOWMAK.json"

In [16]:
with open(input_file,"r") as file:
    km = json.load(file)

In [17]:
ket = km[0]
sgc = km[1]

# Processing KET

In [18]:
keywords = dict()
topics = dict()
narrowers = dict()
broaders = dict()

def get_nested_infos(tb,tc, keywords, topics, narrowers, broaders):
    for child in tc:
        if child["label"] not in topics:
            topics[child["label"]] = True
        for keyword in child["keywords"]:
            keywords[keyword] = True
        for keyword in child["primaryKeywords"]:
            keywords[keyword] = True
        for keyword in child["secondaryKeywords"]:
            keywords[keyword] = True
        if tb not in narrowers:
            narrowers[tb] = list()
        narrowers[tb].append(child["label"])
        
        if child["label"] not in broaders:
            broaders[child["label"]] = list()
        broaders[child["label"]].append(tb)
        
        if len(child["children"]) > 0:
            get_nested_infos(child["label"], child["children"], keywords, topics, narrowers, broaders)
        
        
get_nested_infos(ket["label"],ket["children"], keywords, topics, narrowers, broaders)

In [None]:
print(narrowers)

In [None]:
print(len(topics))
print(len(keywords))

In [22]:
for key, broad in broaders.items():
    #print(len(broad))
    if (len(broad) > 1):
        print(key, broad)

In [None]:
print(broaders)

In [24]:
unhier = broaders
concepts = topics
for concept, value in concepts.items():
    queue = deque() 
    max_depth = value
    queue.append({"t":concept,"d":value})
    while len(queue) > 0:
        dequeued = queue.popleft()
        if dequeued["t"] in unhier:
            broads = unhier[dequeued["t"]]
            new_depth = dequeued["d"]+1
            if new_depth > max_depth:
                max_depth = new_depth
            for broader in broads:
                queue.append({"t":broader,"d":dequeued["d"]+1})
    
    concepts[concept] = max_depth

In [25]:
import pandas as pd
list_of_depths = pd.DataFrame.from_dict(concepts, orient='index', columns=['depth'])

In [None]:
list_of_depths.sort_values('depth', inplace=True, ascending=False)
list_of_depths.head()