In [1]:
# Use Python 3.6, networkx 1.11
# Necessary...
import csv # to read CSV file
import networkx as nx # to make and analyze networks/graphs
import unicodedata # to compare strings while ignoring case
import matplotlib.pyplot as plt # to draw figures
from matplotlib.pyplot import figure # to draw figures
from tabulate import tabulate # to display markdown-compatible tables
import operator # to make sorting easier, using .itemgetter
import copy # to make deep copies of lists and such
from copy import deepcopy

# Function to ignore case, when searching for occurrences of definiendum and short definiendum in definiens.
def normalize_caseless(text):
    return unicodedata.normalize("NFKD", text.casefold())

In [2]:
# Set the name of the project, to use as prefix on names of all output files.
## Project: UN Glossary of World Heritage Terms
# analysis_project_name = "UN_GWHT_TDN" 
# analysis_project_data_file = "ilang_UN_GWHT_terminology_raw - Sheet1.csv"
## Project: Smart Retail Network
analysis_project_name = "SRN_DN" 
analysis_project_data_file = "raw_data/ilang_SRN_data.csv"

In [3]:
# Get raw definitions from a CSV file
# Create empty raw_definitions, to hold definiendum, short definiendum (abbreviation of definiens), and definiens.
raw_definitions = []
# Fill raw_definitions as a list of triples, where a triple includes the definiendum, short definiendum, and definiens.
with open(analysis_project_data_file) as raw_definitions_file:
    raw_definitions_csv = csv.reader(raw_definitions_file, delimiter = ',')
    for row in raw_definitions_csv:
        raw_definitions.append({'definiendum':row[0], 'short definiendum':row[1], 'definiens':row[2]})

In [4]:
# Remove the column title row from raw_definitions.
raw_defs = [(raw_definitions[i]) for i in range(1,len(raw_definitions))]

In [74]:
### Definition Network

# Make an empty Definition Network.
dn = nx.DiGraph()

## Populate Definition Network with nodes.

# Make a random 6-item alphanumeric string, to use in node identifiers.
import random
import string
def id_generator(size=6, chars=string.ascii_uppercase + string.digits):
    return ''.join(random.choice(chars) for _ in range(size))

# Checks if node_id is unique among node_id values which are already in a network.
def is_unique_node_id(network, node_identifier):
    j = 0
    for i in network.nodes():
        if node_identifier == i: j = j + 1
    return j

# Make 2*len(raw_defs)+2 nodes, give each a unique node_id (unique in the given network only).
# node_id has format node_[random 8 characters].
for i in range(0, 2*len(raw_defs)+2):
    node_id = 'node_' + id_generator()
    if is_unique_node_id(dn, node_id) == 0:
        dn.add_node(node_id)

In [75]:
## Label all Definition Network nodes.
# Put definientia, definienda, Definiendum, and Definiens node labels in a list.
node_content_list = list()
[ node_content_list.append(raw_defs[i][j]) for j in { 'definiendum', 'definiens' } for i in range(0, len(raw_defs))]
node_content_list.append('Definiendum')
node_content_list.append('Definiens')

# Make a dictionary of (node_id, node content value) pairs.
node_content_dict = dict()
for i in range(0, len(dn.nodes())):
    node_content_dict[list(dn.nodes())[i]] = node_content_list[i]

# Set node 'content' attribute using node_content_dict.
nx.set_node_attributes(dn, name = 'content', values = node_content_dict)

In [76]:
print(nx.get_node_attributes(dn, 'content'))

{'node_67D51X': 'SRN', 'node_UQQBR8': 'SDM', 'node_WYGUK7': 'Shop Display', 'node_6FQK4Q': 'Basic Shop Display', 'node_SLF39T': 'Complex Shop Display', 'node_M03577': 'Shop Display Identifier', 'node_XNNKVT': 'Shop Display Supplier', 'node_GTZ6Y8': 'Point of Purchase', 'node_5OIKMI': 'Field Merchandiser', 'node_5N37IU': 'POP Personnel', 'node_BO6B2X': 'Key Account', 'node_IB9Y9S': 'POP Data', 'node_0FCS5Y': 'Report', 'node_KP5PHT': 'Analyst', 'node_R5FFF2': 'Administrator', 'node_3G4LNZ': 'Smart Retail Network software.', 'node_KDMBS8': 'Shop Display Management software.', 'node_RB0D0B': 'Object installed at a POP, which is used to promote products; Shop Display is synonym with Fixture.', 'node_SC41W2': 'A Shop Display which cannot itself be subdivided.', 'node_EAMVAA': 'A Shop Display made from two or more Basic Shop Displays.', 'node_5LJ8YY': 'Unique numeric identifier generated by SDM for each BSD, used to identify a BSD when a QR code scanner is not available.', 'node_01J0Q2': 'Rep

In [77]:
# Given a network, an attribute, and a value of attribute, returns node_id of the node which carriers that has that value of the given attribute.
def attribute_value_node_id(network, attribute, value):
    result = None
    for i in nx.get_node_attributes(network, attribute):
        if nx.get_node_attributes(network, attribute)[i] == value: result = i
    if result == None: return 'No node for that attribute value'
    else: return result

In [78]:
## Populate network with Is-a relationship edges.
for i in dn.nodes():
    for j in range(0, len(raw_defs)):
        if raw_defs[j]['definiens'] == nx.get_node_attributes(dn, 'content')[i]:
            dn.add_edge(i, attribute_value_node_id(dn, 'content', 'Definiens'), relationship = 'Is-a')
        if raw_defs[j]['definiendum'] == nx.get_node_attributes(dn, 'content')[i]:
            dn.add_edge(i, attribute_value_node_id(dn, 'content', 'Definiendum'), relationship = 'Is-a')

In [81]:
## Populate network with Defines relationship edges.
for i in range(0, len(raw_defs)):
        dn.add_edge(attribute_value_node_id(dn, 'content', raw_defs[i]['definiens']), attribute_value_node_id(dn, 'content', raw_defs[i]['definiendum']), relationship = 'Defines')    

In [82]:
print(dn.edges())

[('node_67D51X', 'node_VZILJP'), ('node_UQQBR8', 'node_VZILJP'), ('node_WYGUK7', 'node_VZILJP'), ('node_6FQK4Q', 'node_VZILJP'), ('node_SLF39T', 'node_VZILJP'), ('node_M03577', 'node_VZILJP'), ('node_XNNKVT', 'node_VZILJP'), ('node_GTZ6Y8', 'node_VZILJP'), ('node_5OIKMI', 'node_VZILJP'), ('node_5N37IU', 'node_VZILJP'), ('node_BO6B2X', 'node_VZILJP'), ('node_IB9Y9S', 'node_VZILJP'), ('node_0FCS5Y', 'node_VZILJP'), ('node_KP5PHT', 'node_VZILJP'), ('node_R5FFF2', 'node_VZILJP'), ('node_3G4LNZ', 'node_G5LY9N'), ('node_3G4LNZ', 'node_67D51X'), ('node_KDMBS8', 'node_G5LY9N'), ('node_KDMBS8', 'node_UQQBR8'), ('node_RB0D0B', 'node_G5LY9N'), ('node_RB0D0B', 'node_WYGUK7'), ('node_SC41W2', 'node_G5LY9N'), ('node_SC41W2', 'node_6FQK4Q'), ('node_EAMVAA', 'node_G5LY9N'), ('node_EAMVAA', 'node_SLF39T'), ('node_5LJ8YY', 'node_G5LY9N'), ('node_5LJ8YY', 'node_M03577'), ('node_01J0Q2', 'node_G5LY9N'), ('node_01J0Q2', 'node_XNNKVT'), ('node_FXEWTP', 'node_G5LY9N'), ('node_FXEWTP', 'node_GTZ6Y8'), ('node_

In [83]:
len(dn.edges())

45

In [84]:
print(nx.get_edge_attributes(dn, 'relationship'))

{('node_67D51X', 'node_VZILJP'): 'Is-a', ('node_UQQBR8', 'node_VZILJP'): 'Is-a', ('node_WYGUK7', 'node_VZILJP'): 'Is-a', ('node_6FQK4Q', 'node_VZILJP'): 'Is-a', ('node_SLF39T', 'node_VZILJP'): 'Is-a', ('node_M03577', 'node_VZILJP'): 'Is-a', ('node_XNNKVT', 'node_VZILJP'): 'Is-a', ('node_GTZ6Y8', 'node_VZILJP'): 'Is-a', ('node_5OIKMI', 'node_VZILJP'): 'Is-a', ('node_5N37IU', 'node_VZILJP'): 'Is-a', ('node_BO6B2X', 'node_VZILJP'): 'Is-a', ('node_IB9Y9S', 'node_VZILJP'): 'Is-a', ('node_0FCS5Y', 'node_VZILJP'): 'Is-a', ('node_KP5PHT', 'node_VZILJP'): 'Is-a', ('node_R5FFF2', 'node_VZILJP'): 'Is-a', ('node_3G4LNZ', 'node_G5LY9N'): 'Is-a', ('node_3G4LNZ', 'node_67D51X'): 'Defines', ('node_KDMBS8', 'node_G5LY9N'): 'Is-a', ('node_KDMBS8', 'node_UQQBR8'): 'Defines', ('node_RB0D0B', 'node_G5LY9N'): 'Is-a', ('node_RB0D0B', 'node_WYGUK7'): 'Defines', ('node_SC41W2', 'node_G5LY9N'): 'Is-a', ('node_SC41W2', 'node_6FQK4Q'): 'Defines', ('node_EAMVAA', 'node_G5LY9N'): 'Is-a', ('node_EAMVAA', 'node_SLF39