In [1]:
import pickle
import os
import pandas as pd
import numbers
import networkx as nx
import numpy as np
from tqdm.auto import tqdm
import plotly.express as px
import plotly.io as pio
from datasets import get_dataset

pio.templates.default = "plotly_white"

In [2]:
datasets = {dataset_name: get_dataset(dataset_name) for dataset_name in ["Is_Acyclic_Ones", "MUTAG", "Shapes_Ones"]}
dataset_name = "Is_Acyclic"

In [3]:
def create_graph(adjacency_matrix, node_features):
    """
    Create a NetworkX graph from a numpy adjacency matrix and node feature matrix.

    Parameters:
    - adjacency_matrix (numpy.ndarray): The adjacency matrix of the graph.
    - node_features (numpy.ndarray): The matrix of node features.

    Returns:
    - nx.Graph: The created NetworkX graph.
    """

    # Create an empty graph
    G = nx.Graph()

    # Get the number of nodes in the graph
    num_nodes = adjacency_matrix.shape[0]

    # Add nodes to the graph with corresponding features
    for i in range(num_nodes):
        G.add_node(i, label=node_features[i])

    # Add edges to the graph based on the adjacency matrix
    for i in range(num_nodes):
        for j in range(i + 1, num_nodes):
            if adjacency_matrix[i, j] != 0:
                G.add_edge(i, j)

    return G

def average_edit_distance(Gs):
    # Get the average edit distance between all pairs of graphs
    edit_distances = []
    for i in range(1,len(Gs)):
        for j in range(i):
            edit_distances.append(nx.graph_edit_distance(Gs[i], Gs[j], node_match=lambda x, y: np.isclose(x['label'], y['label']).all()))
    return np.mean(edit_distances)

In [18]:
pickle.load(open("results/runs_Is_Acyclic/n9r0e41k.pkl","rb"), fix_imports=True, encoding="latin1", errors="none")

  pickle.load(open("results/runs_Is_Acyclic/n9r0e41k.pkl","rb"), fix_imports=True, encoding="latin1", errors="none")


TypeError: 'int' object is not an iterator

In [7]:
# %%capture

gdir = f"./results/runs_{dataset_name}/"
d_list = []
for filename in os.listdir(gdir):
    try:
        d = pickle.load(open(gdir+filename, "rb"))
        if d["dataset_name"] != dataset_name:
            continue
        d["run_id"] = filename.split(".")[0]
        d["max_class_name"] = datasets[d["dataset_name"]].GRAPH_CLS[d["max_class"]]
        d_list.append(d)
    except Exception as e:
        print(e)
        continue

  d = pickle.load(open(gdir+filename, "rb"))


'int' object is not an iterator
'int' object is not an iterator
'int' object is not an iterator
'int' object is not an iterator
'int' object is not an iterator
'int' object is not an iterator
'int' object is not an iterator
'int' object is not an iterator
'int' object is not an iterator
'int' object is not an iterator
'int' object is not an iterator
'int' object is not an iterator
'int' object is not an iterator
'int' object is not an iterator
'int' object is not an iterator
'int' object is not an iterator
'int' object is not an iterator
'int' object is not an iterator
'int' object is not an iterator
'int' object is not an iterator
'int' object is not an iterator
'int' object is not an iterator
'int' object is not an iterator
'int' object is not an iterator
'int' object is not an iterator
'int' object is not an iterator
'int' object is not an iterator
'int' object is not an iterator
'int' object is not an iterator
'int' object is not an iterator
'int' object is not an iterator
'int' ob

In [5]:
pd.DataFrame(d_list[1]["mip_information"])

IndexError: list index out of range

In [None]:
sorted(d_list[0].keys()), d_list[0]["solutions"][-1], d_list[0]["mip_information"][0]

(['# Model Parameter',
  'Highest Upper Bound',
  'Lowest Lower Bound',
  'Max Non-Target Class Output',
  'Min ABS Bound',
  'Model Status',
  'Objective Value',
  'Output',
  'Output Logit 0',
  'Output Logit 1',
  'Output Logit 2',
  'Output Logit 3',
  'Output Logit 4',
  'Target Class Output',
  'Upper Bound',
  'X',
  '_runtime',
  '_step',
  '_timestamp',
  'architecture',
  'dataset_name',
  'init_index',
  'init_with_data',
  'initialization',
  'initialization_output',
  'log',
  'max_class',
  'max_class_name',
  'mip_information',
  'model_path',
  'num_nodes',
  'output_file',
  'param_file',
  'regularizer_weights',
  'regularizers',
  'run_id',
  'runtime',
  'solution',
  'solution_output',
  'solutions',
  'trim_unneeded_outputs'],
 {'X': array([[1.],
         [1.],
         [1.],
         [1.],
         [1.],
         [1.]]),
  'A': array([[ 0.,  1., -0., -0., -0., -0.],
         [ 1.,  0.,  1., -0., -0., -0.],
         [ 0.,  1.,  0.,  1., -0.,  1.],
         [ 0.,  

In [None]:
mip_info = None
for d in d_list:
    if not d["num_nodes"] == 7 or not d["max_class"] == 0:
        continue
    print("Ah")
    m= pd.DataFrame(d["mip_information"][::len(d["mip_information"])//1000])
    m["run_id"] = d["run_id"]
    if mip_info is None:
        mip_info = m
    else:
        mip_info = pd.concat([mip_info, m], axis=0)

mip_info.rename(columns={"BestBound": "Best Bound", "ObjBound": "Objective Bound", "WorkUnits": "Work Units", "ExploredNodeCount": "Explored Node Count", "UnexploredNodeCount": "Unexplored Node Count"}, inplace=True)
fig = px.line(mip_info,
x="Work Units", 
y=["Best Bound", "Objective Bound"], 
title="Convergence of Objective Bounds", 
# width=1000, 
# height=1000,
log_y = True,
facet_row="run_id")
fig.update_layout(
    font=dict(
        family= "Roman Modern",
        size=18,
        color='rgb(82, 82, 82)',
    ),
# legend=dict(
#     visible=False
# ),
showlegend=False,
autosize=False,
margin=dict(
    autoexpand=True,
    l=100,
    r=20,
    t=110,
),
)   
fig.update_xaxes(
    ticks='outside',
    tickfont=dict(
        size=18,
        color='rgb(82, 82, 82)',
),)
fig.update_yaxes(
    title="",
    tickfont=dict(
        size=18,
        color='rgb(82, 82, 82)',
),)
# Save figure
# fig.write_image(f"./results/figures/convergence_{d['dataset']}_class_{d['max_class']}_n_{d['num_nodes']}_id_{d['run_id']}.png")
# fig.write_html("./results/figures/convergence_{d['dataset']}_class_{d['max_class']}_n_{d['num_nodes']}_id_{d['run_id']}.html") 
fig.for_each_annotation(lambda a: a.update(text=""))
fig.for_each_yaxis(lambda y: y.update(title = ''))
# and:
fig.add_annotation(x=0,y=0.5,
                   text="Objective Value", textangle=-90,
                    xref="paper", yref="paper")
fig.show()


# fig = px.line(mip_info, 
# x="Work Units", 
# y=["Explored Node Count", "Unexplored Node Count"], 
# title="Node Counts", 
# width=1000, 
# height=800,
# log_y = True,
# facet_row="run_id")
# fig.update_layout(
#     font=dict(
#         family= "Roman Modern",
#         size=18,
#         color='rgb(82, 82, 82)',
#     ),
# xaxis=dict(
#     ticks='outside',
#     tickfont=dict(
#         size=18,
#         color='rgb(82, 82, 82)',
#     ),
# ),
# yaxis=dict(
#     title="",
#     tickfont=dict(
#         size=18,
#         color='rgb(82, 82, 82)',
#     ),
# ),
# showlegend=False,
# autosize=False,
# margin=dict(
#     autoexpand=True,
#     l=100,
#     r=20,
#     t=110,
# ),
# )   
# fig.add_annotation(x=0,y=0.5,
#                    text="Number of Nodes", textangle=-90,
#                     xref="paper", yref="paper")
# # fig.write_image(f"./results/figures/node_counts_{d['dataset']}_class_{d['max_class']}_n_{d['num_nodes']}_id_{d['run_id']}.png")
# fig.for_each_annotation(lambda a: a.update(text=""))
# fig.for_each_yaxis(lambda y: y.update(title = ''))
# fig.show()


In [None]:
mipexplainer_df = pd.DataFrame([{key: value for key, value in d.items() if isinstance(value, numbers.Number) or key in {"run_id", "dataset_name"}} for d in d_list])

mipexplainer_df["G"] = [create_graph(d["solutions"][-1]["A"], d["solutions"][-1]["X"]) for d in d_list]
mipexplainer_df["init_G"] = [create_graph(d["solutions"][0]["A"], d["solutions"][0]["X"]) for d in d_list]
mipexplainer_df["method"] = "MIPExplainer"

mipexplainer_df = mipexplainer_df[mipexplainer_df["dataset_name"]==dataset_name]

In [None]:
gnninterpreter_df = pd.DataFrame(pickle.load(open(f"results/gnninterpreter_{dataset_name}.pkl", "rb")))
xgnn_df = pd.DataFrame(pickle.load(open(f"results/xgnn_{dataset_name}.pkl", "rb")))

df = pd.concat([mipexplainer_df, gnninterpreter_df, xgnn_df])
index_names = ["dataset_name", "max_class", "num_nodes", "method"] 

df = df[df["max_class"] != 4]

df = df.set_index(index_names).sort_index()

In [None]:
a = df[[f"Output Logit {i}" for i in range(4)]].copy() # df[[c for c in df.columns if "Output Logit" in c]]
a.rename(columns={f"Output Logit {i}": f"{datasets[dataset_name].GRAPH_CLS[i]} Output Logit" for i in range(datasets[dataset_name].num_classes)}, inplace=True)
# a = a.div(a.sum(axis=1)**2, axis=0)
logit_table = a.groupby(index_names).mean()
with open(f"results/tables/output_logit_{dataset_name}.tex", "w") as f:
    f.write(logit_table.to_latex(index=True, float_format="{:.3f}".format).replace("_", "\\_"))
logit_table

In [None]:
runtime_table = df.groupby(index_names)["runtime"].mean()
with open(f"results/tables/runtime_{dataset_name}.tex", "w") as f:
    f.write(runtime_table.to_latex(index=True, float_format="{:.3f}".format).replace("_", "\\_"))
runtime_table

In [None]:
distances = []
for name, group in tqdm(df.groupby(index_names)["G"]):
    # Save the average edit distance of the group to a df
    group = list(group)
    distances.append({"Consistency": average_edit_distance(group)} | dict(zip(index_names, name)))
distances_df = pd.DataFrame(distances).set_index(index_names).sort_index()

In [None]:
with open(f"results/tables/consistency_{dataset_name}.tex", "w") as f:
    f.write(distances_df.to_latex(index=True, float_format="{:.3f}".format).replace("_", "\\_"))
runtime_table

In [None]:
# Average over num_nodes
averaged_distances_df = distances_df.groupby(["dataset_name", "max_class", "method"]).mean()
with open(f"results/tables/averaged_consistency_{dataset_name}.tex", "w") as f:
    f.write(averaged_distances_df.to_latex(index=True, float_format="{:.3f}".format).replace("_", "\\_"))