In [4]:
def find(parent, i):
    # so base case is when the parent of the node is the node itself
    if parent[i] == i:
        return i
    return find(parent, parent[i])

def union(parent, rank, x, y):
    xroot = find(parent, x)
    yroot = find(parent, y)

    if rank[xroot] < rank[yroot]:
        parent[xroot] = yroot
    elif rank[xroot] > rank[yroot]:
        parent[yroot] = xroot
    else:
        parent[yroot] = xroot
        rank[xroot] += 1

def kruskal(graph):
    result = []
    i, e = 0, 0
    graph = sorted(graph, key=lambda item: item[2])
    parent = {}
    # rank represents the height of the tree
    rank = {}


    for edge in graph:
        for node in edge[:2]: # so you are checking both nodes
            parent[node] = node
            rank[node] = 0 # so initially all the nodes are at height 0 and nodes are their own parents

    while e < len(graph) - 1:
        u, v, w = graph[i]
        i = i + 1
        # idea is to check if the edge is creating a cycle or not
        # in other words whether it is safe to add the edge to the result or not
        x = find(parent, u)
        y = find(parent, v)

        if x != y: # so two different sets means we can safely add the edge
            e = e + 1
            result.append([u, v, w])
            union(parent, rank, x, y) # we need to combine the two sets

    return result

# let's make a helper function to transform long vertex name into shorter name
# also we want to strip optional prefix if it exists
# shortened name will be first letter of each word when split by underscore
def shorten_name(name, prefix='Faculty_of_'):
    # get rid of prefix
    if name.startswith(prefix):
        name = name[len(prefix):]
    return ''.join([word[0] for word in name.split('_')])

def read_graph(file_name, sanity_check=True):
    graph = []
    # for sanity check we will use two sets of original names and shortened names
    original_names = set()
    shortened_names = set()
    with open(file_name, 'r') as file:
        for line in file:
            u, v, w = line.strip().split()
            if sanity_check:
                original_names.add(u)
                original_names.add(v)
                u = shorten_name(u)
                v = shorten_name(v)
                shortened_names.add(u)
                shortened_names.add(v)
            else: # if we don't care about sanity check , TODO see if we can get rid of duplicate code 
                u = shorten_name(u)
                v = shorten_name(v)
            graph.append([u, v, int(w)])
    if sanity_check:
        # length of sets should be equal
        assert len(original_names) == len(shortened_names), 'Original and shortened names have different lengths'

    return graph

graph = read_graph('rtu_network.txt')
result = kruskal(graph)
print(len(result))
sum = 0

for u, v, weight in result:
    sum += weight
    print(f"Department {u} -- Department {v} == {weight}")
    
print(f"Total cost: {sum}")

12
Department EEaM -- Department MSaAC == 95
Department RIO -- Department ETaH == 120
Department CE -- Department EaT == 130
Department RL -- Department TaME == 160
Department PaEE -- Department EEaM == 180
Department CSaIT -- Department CE == 210
Department PaEE -- Department RL == 250
Department AaUP -- Department CSaIT == 320
Department EaT -- Department RIO == 350
Department RBS -- Department RL == 750
Department RBS -- Department RD == 1400
Department RBS -- Department CSaIT == 1800
Total cost: 5765


In [3]:
print(*result, sep='\n' )

['Faculty_of_Engineering_Economics_and_Management', 'Faculty_of_Materials_Science_and_Applied_Chemistry', 95]
['RTU_International_Office', 'Faculty_of_E-Learning_Technologies_and_Humanities', 120]
['Faculty_of_Civil_Engineering', 'Faculty_of_Electronics_and_Telecommunications', 130]
['RTU_Library', 'Faculty_of_Transport_and_Mechanical_Engineering', 160]
['Faculty_of_Power_and_Electrical_Engineering', 'Faculty_of_Engineering_Economics_and_Management', 180]
['Faculty_of_Computer_Science_and_Information_Technology', 'Faculty_of_Civil_Engineering', 210]
['Faculty_of_Power_and_Electrical_Engineering', 'RTU_Library', 250]
['Faculty_of_Architecture_and_Urban_Planning', 'Faculty_of_Computer_Science_and_Information_Technology', 320]
['Faculty_of_Electronics_and_Telecommunications', 'RTU_International_Office', 350]
['Riga_Business_School', 'RTU_Library', 750]
['Riga_Business_School', 'RTU_Dorms', 1400]
['Riga_Business_School', 'Faculty_of_Computer_Science_and_Information_Technology', 1800]


## Visualizing the graph using pyvis

In [15]:
# check pyvis version
import pyvis
print(f"pyvis version: {pyvis.__version__}")

pyvis version: 0.3.2


In [5]:
# we need to import pyvis library to visualize the graph
from pyvis.network import Network


In [29]:
# now we just need to create a network object
g = Network(height='750px', width='100%', notebook=True)
# options = """{
#     "physics": {
#         "solver": "barnesHut",
#         "minVelocity": 0.75,
#         "barnesHut": {
#             "springLength": 200,
#             "springConstant": 0.04,
#             "avoidOverlap": 1
#         }
#     },
#     "edges": {
#         "smooth": {
#             "type": "continuous",
#             "forceDirection": "none"
#         }
#     }
# }"""
options = """
var options = {
    "edges": {
        "scaling": {
            "label": {
                "enabled": true
            }
        }
    }
}
"""

g.set_options(options)
# now we just add nodes and edges
for u, v, w in result:
    g.add_node(u, label=u)
    g.add_node(v, label=v) 
    g.add_edge(u, v, value=w, label=str(w), color='orange' if w > 100 else 'green')
# show labels for nodes AND edges
# g.show_buttons(filter_=['physics'])
# now we just need to call show
# g.show_buttons(filter_=['nodes', 'edges', 'physics'])
g.show('rtu_network.html')

# Issue on adding edge labels visible
# https://github.com/WestHealth/pyvis/issues/153
# TODO check the unicode issue for cdn_resources

rtu_network.html


In [None]:
# TODO figure which hypothesis was correct for the lack of edge labels
# 1. unicode issue
# title and label 
# str issue
# or options issue