In [1]:
import os
import csv
import copy
import math
import random

import pandas as pd

from itertools import combinations

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
graphs_dir = "graphs"
results_dir = "results"

In [22]:
K_sampling = 1

In [3]:
graph_name = "graph_powerlaw_cluster_graph_n9"

graph = {}
with open(os.path.join(graphs_dir, f"{graph_name}.txt"), "r") as f:
    line = f.readline()
    while line:
        node_edges = line.split()
        node = node_edges[0]        
        edges = node_edges[1:]
        graph[node] = set(edges)
        line = f.readline()

graph

{'A': {'D', 'F', 'G'},
 'B': {'D', 'E'},
 'C': {'D', 'E', 'F', 'G', 'I'},
 'D': {'A', 'B', 'C', 'E', 'F', 'H', 'I'},
 'E': {'B', 'C', 'D', 'H'},
 'F': {'A', 'C', 'D', 'G'},
 'G': {'A', 'C', 'F', 'H'},
 'H': {'D', 'E', 'G'},
 'I': {'C', 'D', 'E'}}

In [4]:
nodes = list(graph.keys())
node_positions = {v: i for i, v in enumerate(nodes)}

In [5]:
degree_of_nodes = {n: len(graph[n]) for n in nodes}

print("Degree of all nodes (starting from 0):")
degree_of_nodes # start from 0

Degree of all nodes (starting from 0):


{'A': 3, 'B': 2, 'C': 5, 'D': 7, 'E': 4, 'F': 4, 'G': 4, 'H': 3, 'I': 3}

In [6]:
configurations = {
    tuple([0 for i in range(len(nodes))])
}
# perturb each state at a time for all states in configurations and accumulate the same in the configurations for next state to perturb
for n in nodes:
    node_pos = node_positions[n]
    config_copy = copy.deepcopy(configurations)
    for i in range(1, degree_of_nodes[n]+1):
        for cc in config_copy:
            cc = list(cc)
            cc[node_pos] = i
            configurations.add(tuple(cc))

print("No. of Configurations:", len(configurations))

No. of Configurations: 1152000


In [7]:
invariants = set()
for state in configurations:
    all_paths = combinations(range(len(state)), 2)
    for src, dest in all_paths:
        src_node, dest_node = nodes[src], nodes[dest]
        src_color, dest_color = state[src], state[dest]
        if dest_node in graph[src_node] and src_color == dest_color:
            # found same color node between neighbors
            break
    else:
        invariants.add(state)

print("Invariants and Count of Invariants:")
len(invariants)

Invariants and Count of Invariants:


64944

In [8]:
program_transitions_rank = {}
for inv in invariants:
    program_transitions_rank[inv] = {"L": 0, "C": 1, "A": 0, "Ar": 0, "M": 0}

In [9]:
def find_min_possible_color(colors):
    for i in range(len(colors)+1):
        if i not in colors:
            return i

In [10]:
def is_different_color(color, other_colors):
    """
    return True if "color" is different from all "other_colors"
    """
    for c in other_colors:
        if color == c:
            return False
    return True

In [11]:
def is_program_transition(perturb_pos, start_state, dest_state):
    if start_state in invariants and dest_state in invariants:
        return False

    node = nodes[perturb_pos]
    neighbor_pos = [node_positions[n] for n in graph[node]]
    neighbor_colors = set(dest_state[i] for i in neighbor_pos)
    min_color = find_min_possible_color(neighbor_colors)
    return dest_state[perturb_pos] == min_color

In [12]:
def generate_random_samples(population, k):
    N = copy.deepcopy(population)
    random.shuffle(N)
    indx = 0
    samples = []
    while k > 0 and N:
        sampled_indx = random.randint(0, len(N[indx])-1)
        samples.append(N[indx].pop(sampled_indx))
        
        if not N[indx]: # all elements popped for this list
            N.pop(indx)
            if N:
                indx = indx % len(N)
        else:
            indx = (indx + 1)%len(N)

        if indx == 0 and len(N) > 1:
            random.shuffle(N)

        k -= 1

    return samples

In [23]:
def get_program_transitions(start_state):
    program_transitions = []
    pt_per_node = []
    for position, val in enumerate(start_state):
        # check if node already has different color among the neighbors => If yes => no need to perturb that node's value
        node = nodes[position]
        neighbor_pos = [node_positions[n] for n in graph[node]]
        neighbor_colors = set(start_state[i] for i in neighbor_pos)
        if is_different_color(val, neighbor_colors):
            continue
        
        # if the current node's color is not different among the neighbors => search for the program transitions possible
        possible_node_colors = set(range(degree_of_nodes[nodes[position]]+1)) - {start_state[position]}
        for perturb_val in possible_node_colors:
            perturb_state = list(start_state)
            perturb_state[position] = perturb_val
            perturb_state = tuple(perturb_state)
            if is_program_transition(position, start_state, perturb_state):
                pt_per_node.append(perturb_state)

        if pt_per_node:
            program_transitions.append(pt_per_node)
            pt_per_node = []

    result = generate_random_samples(program_transitions, K_sampling)
    return {"program_transitions": set(result)}

# get_program_transitions((0,0,0,0))

In [14]:
def get_cvfs(start_state):
    cvfs_in = dict()
    cvfs_out = dict()
    for position, _ in enumerate(start_state):
        possible_node_colors = set(range(degree_of_nodes[nodes[position]]+1)) - {start_state[position]}
        for perturb_val in possible_node_colors:
            perturb_state = list(start_state)
            perturb_state[position] = perturb_val
            perturb_state = tuple(perturb_state)
            if start_state in invariants:
                cvfs_in[perturb_state] = position # track the nodes to calculate its overall rank effect
            else:
                cvfs_out[perturb_state] = position
    return {"cvfs_in": cvfs_in, "cvfs_out": cvfs_out}

In [15]:
program_transitions_n_cvf = {}

for state in configurations:
    program_transitions_n_cvf[state] = {**get_program_transitions(state), **get_cvfs(state)}

In [16]:
unranked_states = set(program_transitions_n_cvf.keys()) - set(program_transitions_rank.keys())
print("Unranked states for Program transitions:", len(unranked_states))

Unranked states for Program transitions: 1087056


In [17]:
# rank the states that has all the paths to the ranked one
while unranked_states:
    ranked_states = set(program_transitions_rank.keys())
    remove_from_unranked_states = set()
    for state in unranked_states:
        dests = program_transitions_n_cvf[state]['program_transitions']
        if dests - ranked_states:       # some desitnations states are yet to be ranked
            pass
        else:                           # all the destination has been ranked
            total_path_length = 0
            path_count = 0
            _max = 0
            for succ in dests:
                path_count += program_transitions_rank[succ]["C"]
                total_path_length += program_transitions_rank[succ]["L"] + program_transitions_rank[succ]["C"]
                _max = max(_max, program_transitions_rank[succ]["M"])
            program_transitions_rank[state] = {
                "L": total_path_length,
                "C": path_count,
                "A": total_path_length/path_count,
                "Ar": math.ceil(total_path_length/path_count),
                "M": _max + 1
            }
            remove_from_unranked_states.add(state)
    unranked_states -= remove_from_unranked_states

In [18]:
pt_rank_effect = {}

for state, pt_cvfs in program_transitions_n_cvf.items():
    for pt in pt_cvfs['program_transitions']:
        pt_rank_effect[(state, pt)] = {
            "Ar": program_transitions_rank[pt]["Ar"] - program_transitions_rank[state]["Ar"],
            "M": program_transitions_rank[pt]["M"] - program_transitions_rank[state]["M"]
        }

# Rank count

In [19]:
pt_rank_ = []
for state in program_transitions_rank:
    pt_rank_.append({"state": state, **program_transitions_rank[state]})

pt_rank_df = pd.DataFrame(pt_rank_)

In [20]:
pt_avg_counts = pt_rank_df['Ar'].value_counts()
pt_max_counts = pt_rank_df['M'].value_counts()

In [21]:
fieldnames = ["Rank", "Count (Max)", "Count (Avg)"]
with open(os.path.join(results_dir, f"rank_partial_{graph_name}.csv"), "w", newline="") as f:
    writer = csv.DictWriter(f, fieldnames=fieldnames)
    writer.writeheader()

    for rank in sorted(set(pt_avg_counts.index)|set(pt_max_counts.index)):
        writer.writerow({"Rank": rank, "Count (Max)": pt_max_counts.get(rank, 0), "Count (Avg)": pt_avg_counts.get(rank, 0)})

# Rank Effect Count