The code was based on the solution found here: https://stackoverflow.com/questions/43757965/convert-csv-to-json-tree-structure

In [1]:
import csv, json

In [2]:
from collections import defaultdict

In [3]:
csvFilePath = 'inputOst3.csv'
jsonFilePath = 'whole-output.json'

In [4]:
def ctree():
    return defaultdict(ctree)

In [5]:
tree = ctree()

In [6]:
# rid = row id
# cid = column id

with open(csvFilePath) as csvfile:
        reader = csv.reader(csvfile)
        for rid, row in enumerate(reader): 

            # skipping first header row
            # remove if your csv is headerless
            if rid == 0:
                continue

            # usage of python magic to construct dynamic tree structure and
            # basically grouping csv values under their parents
            print("row " + str(rid) + ": " + str(row))
            
            leaf = tree[row[0]] 
            # leaf is the complete tree structure starting from row[0]
            # example:
            # defaultdict(<function ctree at 0x10f4bf0d0>, {
            # 'Cartographic collection - selected objects': defaultdict(<function ctree at 0x10f4bf0d0>, {
            # 'BackToTheMap': defaultdict(<function ctree at 0x10f4bf0d0>, {}), 
            # 'Paracosm - mindful maps': defaultdict(<function ctree at 0x10f4bf0d0>, {})
            # })})
            
            for cid in range(1, len(row)):
                # exclude empty values (i.e. no projects) from the tree structure
                if row[cid] == "":
                    continue
                
                leaf = leaf[row[cid]]
                print(str(row[cid]) + " | leaf: ")
                print(leaf)
            
            print("")

row 1: ['National Archives in Krakow', 'Cartographic collection - selected objects', 'BackToTheMap']
Cartographic collection - selected objects | leaf: 
defaultdict(<function ctree at 0x10ef360d0>, {})
BackToTheMap | leaf: 
defaultdict(<function ctree at 0x10ef360d0>, {})

row 2: ['National Archives in Krakow', 'Cartographic collection - selected objects', 'Paracosm - mindful maps']
Cartographic collection - selected objects | leaf: 
defaultdict(<function ctree at 0x10ef360d0>, {'BackToTheMap': defaultdict(<function ctree at 0x10ef360d0>, {})})
Paracosm - mindful maps | leaf: 
defaultdict(<function ctree at 0x10ef360d0>, {})

row 3: ['Jagiellonian Library', 'Średniowieczna astronomia / Medieval Astronomy', 'Loxper virtual guide']
Średniowieczna astronomia / Medieval Astronomy | leaf: 
defaultdict(<function ctree at 0x10ef360d0>, {})
Loxper virtual guide | leaf: 
defaultdict(<function ctree at 0x10ef360d0>, {})

row 4: ['Jagiellonian Library', 'Chopin', '']
Chopin | leaf: 
defaultdict(<

In [7]:
def build_leaf(name, leaf):
    res = {"name": name}
    
    # add children node if the leaf has any children
    if (len(leaf.keys()) > 0):
        res["children"] = [build_leaf(k, v) for k, v in leaf.items()]
        
    # add value=1 in each last leaf (this value is prerequisite for the sunburst construction in d3.js)
    if (len(leaf.keys()) == 0):
        res["value"] = 1
        
    return res

In [8]:
res = []

In [9]:
for name, leaf in tree.items():
    res.append(build_leaf(name, leaf))

In [10]:
print(res)

[{'name': 'National Archives in Krakow', 'children': [{'name': 'Cartographic collection - selected objects', 'children': [{'name': 'BackToTheMap', 'value': 1}, {'name': 'Paracosm - mindful maps', 'value': 1}]}]}, {'name': 'Jagiellonian Library', 'children': [{'name': 'Średniowieczna astronomia / Medieval Astronomy', 'children': [{'name': 'Loxper virtual guide', 'value': 1}]}, {'name': 'Chopin', 'value': 1}]}, {'name': 'bpk-Bildagentur', 'children': [{'name': 'Historische Porträtsammlung - Carte de Visite', 'children': [{'name': 'Radikale Gespräche', 'value': 1}]}]}, {'name': 'Damast-und Frottiermuseum Großschönau', 'children': [{'name': 'Schatzkammer der Damaste', 'children': [{'name': 'Abstract Visioner', 'value': 1}]}]}, {'name': 'Deutsches Stuhlbaumuseum Rabenau', 'children': [{'name': 'Stile, Stühle, Meisterwerke', 'value': 1}]}, {'name': 'Digitales Archiv Mathematischer Modelle', 'children': [{'name': 'Digitales Archiv Mathematischer Modelle', 'children': [{'name': 'Klingende Math

In [11]:
# for the sunburst construction (d3.js) we need a root element that will include all hierarchical data
root = {}
root['name'] = "CdV Ost³ 2022"
root['children'] = res

print(root)

{'name': 'CdV Ost³ 2022', 'children': [{'name': 'National Archives in Krakow', 'children': [{'name': 'Cartographic collection - selected objects', 'children': [{'name': 'BackToTheMap', 'value': 1}, {'name': 'Paracosm - mindful maps', 'value': 1}]}]}, {'name': 'Jagiellonian Library', 'children': [{'name': 'Średniowieczna astronomia / Medieval Astronomy', 'children': [{'name': 'Loxper virtual guide', 'value': 1}]}, {'name': 'Chopin', 'value': 1}]}, {'name': 'bpk-Bildagentur', 'children': [{'name': 'Historische Porträtsammlung - Carte de Visite', 'children': [{'name': 'Radikale Gespräche', 'value': 1}]}]}, {'name': 'Damast-und Frottiermuseum Großschönau', 'children': [{'name': 'Schatzkammer der Damaste', 'children': [{'name': 'Abstract Visioner', 'value': 1}]}]}, {'name': 'Deutsches Stuhlbaumuseum Rabenau', 'children': [{'name': 'Stile, Stühle, Meisterwerke', 'value': 1}]}, {'name': 'Digitales Archiv Mathematischer Modelle', 'children': [{'name': 'Digitales Archiv Mathematischer Modelle',

In [12]:
# export as json
with open(jsonFilePath, 'w') as jsonFile:
        jsonFile.write(json.dumps(root, indent=1)) # by adding indent=1 we prettify the json output file