In [20]:
import csv
import numpy as np
from random import randint
from operator import itemgetter
from collections import deque

In [21]:
'''
The following code implements the Kruskal's Algorithm using the Adjacency list representation of a graph. The graph consists 
of n = 200 nodes, with varying edge densities of 50%, 75%, and 100%. Corresponding to each varying edge density, there is a .csv
file that is being taken as an input, the format of which is <tail><head><cost><capacity>, although the capacity column is not
needed for the Kruskal's Algorithm.

The .csv file will be created randomly, with the check that none of the edges are being repeated in random generation.
The inbuilt set function will be used to ensure the same. 
'''

AllEdges = set() #The set function ensures that duplicate edges are not present
for count in range(0,1000000):  #We run this loop for more than nC2 (n is the number of nodes) times to ensure that (almost) all possible edges have been 
    #considered in the set. Note that however, this is not guaranteed, since the nodes correspong to the edges are being
    #generated randomly
    i, j = np.random.randint(0, 199), np.random.randint(0,199)
    AllEdges.add((i, j))
    
totalEdges = 0 #Number of edges in the graph
totalNodes = 200 #Number of nodes in the graph
    
'''
Creating input for running the algorithm. 
'''

file = "C:/Users/91858/Downloads/data for kruskal - Sheet1.csv"
with open(file, 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["tail", "head", "cost","capacity"])
    for edge in AllEdges:
        writer.writerow([edge[0], edge[1], np.random.randint(10,2000),0])
        totalEdges = totalEdges + 1
        if totalEdges == int(50*199):  #For edge density of 50% 
        #if totalEdges == int(75*199): #For edge density of 75%
        #if totalEdges == int(100*199):  #For edge density of 100%
            break

In [22]:
'''
Creating an adjacency list using the .csv data.
'''

AdjList = [] 

file = open('C:/Users/91858/Downloads/data for kruskal - Sheet1.csv')
csv = csv.reader(file) 
data = []

'''
The elements of the queue are of the form <tail><head><cost>. The queue of edges will be created using the adjacency list
'''
queue = []


for row in csv:
    data.append(row)

for i in range(totalNodes):
    AdjList.append([])

'''
Here, note that the adjacency list will have the entry <node j><cost> corresponding to <node i>. The entry <node i><cost> 
corresponding to <node j> can be skipped, since we are considering an undirected graph, i.e., if edge (u,v) 
[and therefore (v,u)] has the least cost in current iteration, then the nodes u, v will be marked, thereby marking edge (v,u)
automatically.
'''
for i in range(1, totalEdges+1):
    AdjList[int(data[i][0])].append([data[i][1], data[i][2]])
#     AdjList[int(data[i][1])].append([data[i][0], data[i][2]])

#adding elements to the queue, i.e., creating an edge list
for i in range(len(AdjList)):
    for j in range(len(AdjList[i])):
        queue.append([i, int(AdjList[i][j][0]), int(AdjList[i][j][1])])
        
queue.sort(key=lambda x: x[2]) #sorting the list of edges according to the costs in ascending order


In [23]:
'''
Kruskal's Algorithm
'''
#Minimum spanning tree, initially empty
MST = []
MSTcost = 0

group = []
for i in range(totalNodes):
    group.append(i)
iterCount = 0

while(len(MST)!=totalNodes-1 and len(queue)!=0):
    Set = queue.pop(0) #considering the least weighted edge at the beginning of the sorted queue
    if(group[Set[0]] == group[Set[1]]):
        continue
    MST.append([Set[0], Set[1]]) 
    MSTcost = MSTcost + int(Set[2])
    for node in range(totalNodes):
        iterCount = iterCount + 1
        if group[node]== Set[1]:
#             iterCount = iterCount + 1
            group[node]== Set[0]

if len(MST)!=totalNodes-1 :
    print("No Spanning tree exists in the given graph")
else:
    print(MST)


[[0, 14], [85, 155], [85, 6], [112, 46], [124, 28], [139, 85], [6, 33], [39, 70], [53, 139], [93, 28], [157, 14], [95, 117], [122, 67], [146, 58], [17, 63], [24, 18], [32, 164], [70, 49], [90, 17], [177, 19], [54, 76], [57, 81], [57, 6], [63, 23], [1, 23], [4, 149], [7, 183], [14, 40], [15, 84], [133, 42], [174, 142], [174, 46], [186, 65], [33, 97], [43, 190], [115, 129], [62, 28], [76, 116], [98, 164], [113, 157], [138, 176], [144, 154], [191, 85], [58, 145], [71, 1], [71, 195], [86, 98], [175, 184], [39, 161], [61, 40], [108, 102], [109, 5], [124, 39], [153, 136], [31, 39], [59, 86], [59, 196], [78, 97], [31, 178], [45, 188], [54, 1], [71, 55], [150, 11], [182, 88], [28, 133], [45, 44], [57, 39], [76, 197], [109, 106], [171, 148], [178, 25], [189, 118], [24, 91], [24, 72], [49, 162], [95, 35], [3, 66], [24, 32], [66, 141], [168, 5], [180, 77], [50, 94], [162, 30], [5, 70], [27, 33], [34, 125], [78, 10], [92, 194], [142, 56], [59, 151], [66, 156], [83, 60], [85, 134], [12, 113], [30, 

In [11]:
iterCount

39800

For the case of $n=200$ nodes and edge density of $50\%, 75\%, 100\%$ (viz. 9950, 14925, and 19900 edges), the iteration count will be the same always. This is true, because the Kruskal's algorithm $\textbf{will always}$ find a minimum spanning tree/forest, if the number of edges are larger than the number of nodes in a graph. Therefore, in the cases where edges are much more than the number of nodes, the outer while loop will stop as soon as the cardinality of the MST becomes $n$ - 1. The inner for loop, which checks whether the nodes are in the same or distinct groups, will run for exactly $n$ iterations for every single iteration of the while loop, where $n$ is the number of nodes. Hence, the total iteration count will remain the same. 