In [1]:
import os
import csv
import copy
import math
import random

import pandas as pd

from itertools import combinations

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
graphs_dir = "graphs"
results_dir = "results"

In [3]:
SAMPLE_SIZE = 1

In [4]:
graph_name = "graph_powerlaw_cluster_graph_n5"

graph = {}
with open(os.path.join(graphs_dir, f"{graph_name}.txt"), "r") as f:
    line = f.readline()
    while line:
        node_edges = line.split()
        node = node_edges[0]        
        edges = node_edges[1:]
        graph[node] = set(edges)
        line = f.readline()

graph

{'A': {'D'},
 'B': {'D', 'E'},
 'C': {'D', 'E'},
 'D': {'A', 'B', 'C', 'E'},
 'E': {'B', 'C', 'D'}}

In [5]:
nodes = list(graph.keys())
node_positions = {v: i for i, v in enumerate(nodes)}

In [6]:
degree_of_nodes = {n: len(graph[n]) for n in nodes}

print("Degree of all nodes (starting from 0):")
degree_of_nodes # start from 0

Degree of all nodes (starting from 0):


{'A': 1, 'B': 2, 'C': 2, 'D': 4, 'E': 3}

In [7]:
configurations = {
    tuple([0 for i in range(len(nodes))])
}
# perturb each state at a time for all states in configurations and accumulate the same in the configurations for next state to perturb
for n in nodes:
    node_pos = node_positions[n]
    config_copy = copy.deepcopy(configurations)
    for i in range(1, degree_of_nodes[n]+1):
        for cc in config_copy:
            cc = list(cc)
            cc[node_pos] = i
            configurations.add(tuple(cc))

print("No. of Configurations:", len(configurations))

No. of Configurations: 360


In [8]:
invariants = set()
for state in configurations:
    all_paths = combinations(range(len(state)), 2)
    for src, dest in all_paths:
        src_node, dest_node = nodes[src], nodes[dest]
        src_color, dest_color = state[src], state[dest]
        if dest_node in graph[src_node] and src_color == dest_color:
            # found same color node between neighbors
            break
    else:
        invariants.add(state)

print("Invariants and Count of Invariants:")
len(invariants)

Invariants and Count of Invariants:


90

In [9]:
program_transitions_rank = {}
for inv in invariants:
    program_transitions_rank[inv] = {"L": 0, "C": 1, "A": 0, "Ar": 0, "M": 0}

In [10]:
def find_min_possible_color(colors):
    for i in range(len(colors)+1):
        if i not in colors:
            return i

In [11]:
def is_different_color(color, other_colors):
    """
    return True if "color" is different from all "other_colors"
    """
    for c in other_colors:
        if color == c:
            return False
    return True

In [12]:
def is_program_transition(perturb_pos, start_state, dest_state):
    if start_state in invariants and dest_state in invariants:
        return False

    node = nodes[perturb_pos]
    neighbor_pos = [node_positions[n] for n in graph[node]]
    neighbor_colors = set(dest_state[i] for i in neighbor_pos)
    min_color = find_min_possible_color(neighbor_colors)
    return dest_state[perturb_pos] == min_color

In [13]:
def get_program_transitions(start_state):
    program_transitions = set()

    for position, val in enumerate(start_state):
        # check if node already has different color among the neighbors => If yes => no need to perturb that node's value
        node = nodes[position]
        neighbor_pos = [node_positions[n] for n in graph[node]]
        neighbor_colors = set(start_state[i] for i in neighbor_pos)
        if is_different_color(val, neighbor_colors):
            continue
        
        # if the current node's color is not different among the neighbors => search for the program transitions possible
        possible_node_colors = set(range(degree_of_nodes[nodes[position]]+1)) - {start_state[position]}
        for perturb_val in possible_node_colors:
            perturb_state = list(start_state)
            perturb_state[position] = perturb_val
            perturb_state = tuple(perturb_state)
            if is_program_transition(position, start_state, perturb_state):
                program_transitions.add(perturb_state)

    return program_transitions

# get_program_transitions((0,0,2, 3, 3))

In [14]:
program_transitions_n_cvf = {}

In [15]:
def find_rank_of_successors(state, probe_limit, init=False):
    if state in invariants:
        return program_transitions_rank[state]
    else:
        successors = list(get_program_transitions(state))
        random.shuffle(successors)
        share = probe_limit // len(successors)
        add_extra_to_nodes = probe_limit - share * len(successors)
        total_path_length = 0
        path_count = 0
        _max = 0
        for succ in successors:
            if add_extra_to_nodes > 0:
                result = find_rank_of_successors(succ, share+1)
                add_extra_to_nodes -= 1
            else:
                if share > 0:
                    result = find_rank_of_successors(succ, share)
                else:
                    break

            if init:
                program_transitions_n_cvf[state]["program_transitions"].add(succ)

            path_count += result["C"]
            total_path_length += result["L"] + result["C"]
            _max = max(_max, result["M"])

        return {
                "L": total_path_length,
                "C": path_count,
                "A": total_path_length/path_count,
                "Ar": math.ceil(total_path_length/path_count),
                "M": _max + 1
        }

In [16]:
for state in configurations:
    program_transitions_n_cvf[state] = {"program_transitions": set()}
    program_transitions_rank[state] = find_rank_of_successors(state, SAMPLE_SIZE, True)

# Rank count

In [17]:
pt_rank_ = []
for state in program_transitions_rank:
    pt_rank_.append({"state": state, **program_transitions_rank[state]})

pt_rank_df = pd.DataFrame(pt_rank_)

In [18]:
pt_avg_counts = pt_rank_df['Ar'].value_counts()
pt_max_counts = pt_rank_df['M'].value_counts()

In [19]:
fieldnames = ["Rank", "Count (Max)", "Count (Avg)"]
with open(os.path.join(results_dir, f"rank_partial_{graph_name}.csv"), "w", newline="") as f:
    writer = csv.DictWriter(f, fieldnames=fieldnames)
    writer.writeheader()

    for rank in sorted(set(pt_avg_counts.index)|set(pt_max_counts.index)):
        writer.writerow({"Rank": rank, "Count (Max)": pt_max_counts.get(rank, 0), "Count (Avg)": pt_avg_counts.get(rank, 0)})

# Rank Effect

In [20]:
pt_rank_effect = {}

for state, pt_cvfs in program_transitions_n_cvf.items():
    for pt in pt_cvfs['program_transitions']:
        pt_rank_effect[(state, pt)] = {
            "Ar": program_transitions_rank[pt]["Ar"] - program_transitions_rank[state]["Ar"],
            "M": program_transitions_rank[pt]["M"] - program_transitions_rank[state]["M"]
        }

In [21]:
def get_cvfs(start_state):
    cvfs = {}
    for position, _ in enumerate(start_state):
        possible_node_colors = set(range(degree_of_nodes[nodes[position]]+1))
        for perturb_val in possible_node_colors:
            perturb_state = list(start_state)
            perturb_state[position] = perturb_val
            perturb_state = tuple(perturb_state)
            if perturb_state != start_state:
                cvfs[perturb_state] = position # track the nodes to calculate its overall rank effect
    return cvfs

In [22]:
for state in configurations:
    program_transitions_n_cvf[state].update({"cvfs_in": dict(), "cvfs_out": dict()})
    key = "cvfs_in" if state in invariants else "cvfs_out"
    program_transitions_n_cvf[state][key] = get_cvfs(state)

In [23]:
cvfs_in_rank_effect = {}
cvfs_out_rank_effect = {}

for state, pt_cvfs in program_transitions_n_cvf.items():
    for cvf, node in pt_cvfs['cvfs_in'].items():
        cvfs_in_rank_effect[(state, cvf)] = {
            "node": node,
            "Ar": program_transitions_rank[cvf]["Ar"] - program_transitions_rank[state]["Ar"],
            "M": program_transitions_rank[cvf]["M"] - program_transitions_rank[state]["M"]
        }
    for cvf, node in pt_cvfs['cvfs_out'].items():
        cvfs_out_rank_effect[(state, cvf)] = {
            "node": node,
            "Ar": program_transitions_rank[cvf]["Ar"] - program_transitions_rank[state]["Ar"],
            "M": program_transitions_rank[cvf]["M"] - program_transitions_rank[state]["M"]
        }

In [24]:
pt_rank_effect_ = []
for state in pt_rank_effect:
    pt_rank_effect_.append({"state": state, **pt_rank_effect[state]})

pt_rank_effect_df = pd.DataFrame(pt_rank_effect_)

In [25]:
cvfs_in_rank_effect_ = []
for state in cvfs_in_rank_effect:
    cvfs_in_rank_effect_.append({"state": state, **cvfs_in_rank_effect[state]})
    
cvfs_in_rank_effect_df = pd.DataFrame(cvfs_in_rank_effect_)

In [26]:
cvfs_out_rank_effect_ = []
for state in cvfs_out_rank_effect:
    cvfs_out_rank_effect_.append({"state": state, **cvfs_out_rank_effect[state]})

cvfs_out_rank_effect_df = pd.DataFrame(cvfs_out_rank_effect_)

In [27]:
pt_avg_counts = pt_rank_effect_df['Ar'].value_counts()
pt_max_counts = pt_rank_effect_df['M'].value_counts()
cvf_in_avg_counts = cvfs_in_rank_effect_df['Ar'].value_counts()
cvf_in_max_counts = cvfs_in_rank_effect_df['M'].value_counts()
cvf_out_avg_counts = cvfs_out_rank_effect_df['Ar'].value_counts()
cvf_out_max_counts = cvfs_out_rank_effect_df['M'].value_counts()

In [28]:
fieldnames = ["Rank Effect", "PT (Max)", "PT (Avg)", "CVF In (Max)", "CVF In (Avg)", "CVF Out (Max)", "CVF Out (Avg)"]
with open(os.path.join(results_dir, f"rank_partial_effect_{graph_name}.csv"), "w", newline="") as f:
    writer = csv.DictWriter(f, fieldnames=fieldnames)
    writer.writeheader()

    for re in sorted(
        set(pt_avg_counts.index) |
        set(pt_max_counts.index) |
        set(cvf_in_avg_counts.index) |
        set(cvf_in_max_counts.index) |
        set(cvf_out_avg_counts.index) |
        set(cvf_out_max_counts.index)
    ):
        writer.writerow({
            "Rank Effect": re,
            "PT (Max)": pt_max_counts.get(re, 0),
            "PT (Avg)": pt_avg_counts.get(re, 0),
            "CVF In (Max)": cvf_in_max_counts.get(re, 0),
            "CVF In (Avg)": cvf_in_avg_counts.get(re, 0),
            "CVF Out (Max)": cvf_out_max_counts.get(re, 0),
            "CVF Out (Avg)": cvf_out_avg_counts.get(re, 0),
        })