In [43]:
#Grab some relevant part of util.py in qtrkx-gnn-tracking

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from collections import namedtuple
#internal
import os, sys, glob, yaml, datetime, argparse
import csv
import tensorflow as tf

Graph = namedtuple('Graph', ['X', 'Ri', 'Ro', 'y'])

class GraphDataset():
    def __init__(self, input_dir, n_samples=None):
        input_dir = os.path.expandvars(input_dir)
        filenames = [os.path.join(input_dir, f) for f in os.listdir(input_dir)
                     if f.startswith('event') and f.endswith('.npz')]
        self.filenames = (
            filenames[:n_samples] if n_samples is not None else filenames)

    def __getitem__(self, index):
        return load_graph(self.filenames[index])

    def __len__(self):
        return len(self.filenames)
def get_dataset(input_dir,n_files):
    return GraphDataset(input_dir, n_files)
def load_graph(filename):
    """Reade a single graph NPZ"""
    with np.load(filename) as f:
        return sparse_to_graph(**dict(f.items()))
def sparse_to_graph(X, Ri_rows, Ri_cols, Ro_rows, Ro_cols, y, dtype=np.float32):
    n_nodes, n_edges = X.shape[0], Ri_rows.shape[0]
    Ri = np.zeros((n_nodes, n_edges), dtype=dtype)
    Ro = np.zeros((n_nodes, n_edges), dtype=dtype)
    Ri[Ri_rows, Ri_cols] = 1
    Ro[Ro_rows, Ro_cols] = 1
    return Graph(X, Ri, Ro, y)
def graph_coloring(graph_name):
    graph_ex=load_graph(graph_name)
    #Load raw data
    X = graph_ex.X
    Ri=graph_ex.Ri
    Ro=graph_ex.Ro
    y=graph_ex.y
    #Need to change type for later use.
    y=y.astype(np.int32)
    Ri = Ri.astype(np.int32)
    Ro = Ro.astype(np.int32)
    # Get the number of nodes and edges
    NV, NE = Ri.shape
    # Initialize the coloring vector
    coloring = np.zeros(NV, dtype=int)
    # Create a dictionary to store the neighbors of each node
    neighbors = {i: [] for i in range(NV)}
    print("Start getting neighborhood information")
    for k in range(NE):
        if k%2000==0: 
            print("Performing ",k,"-th iteration...")
        if y[k] == 1:
            for j in range(NV):
                if Ro[j, k] == 1:
                    for i in range(NV):
                        if Ri[i, k] == 1 and i != j:
                            neighbors[j].append(i)
    print("Start greedy coloring.")
    # Greedy coloring
    for node in range(NV):
        used_colors = set(coloring[n] for n in neighbors[node])
        # Find the smallest unused color
        color = 0
        while color in used_colors:
            color += 1
        coloring[node] = color
    return X, y, Ri, Ro, coloring
def color_df(original_X,color):
    X_color = pd.DataFrame(original_X,columns=['x','y','z'])
    X_color['color'] = coloring
    return X_color

In [39]:
X, y, Ri, Ro, coloring = graph_coloring('event000001000_g000.npz')
print(coloring)

Start getting neighborhood information
Performing  0 -th iteration...
Performing  2000 -th iteration...
Performing  4000 -th iteration...
Performing  6000 -th iteration...
Performing  8000 -th iteration...
Performing  10000 -th iteration...
Start greedy coloring.
Performing  0 -th iteration...
Performing  1000 -th iteration...
Performing  2000 -th iteration...
Performing  3000 -th iteration...
Performing  4000 -th iteration...
Performing  5000 -th iteration...
Performing  6000 -th iteration...
[1 1 1 ... 1 1 0]


In [44]:
print("Coloring vector sum: ",coloring.sum(), ", y vector sum: ",y.sum())

Coloring vector sum:  5162 , y vector sum:  5162


In [46]:
X_color = color_df(X,coloring)
X_color.head(10)

Unnamed: 0,x,y,z,color
0,0.031446,0.850431,0.059595,1
1,0.07216,0.846662,0.136875,1
2,0.115573,0.843047,0.21927,1
3,0.17142,0.83791,0.325158,1
4,0.259156,0.830064,0.491,1
5,0.356072,0.821702,0.6752,1
6,0.49886,0.809187,0.9468,0
7,0.032048,-0.241159,-0.0565,1
8,0.072665,-0.241464,-0.127993,0
9,0.032094,-0.361217,-0.060734,1
