In [None]:
from pathlib import Path
import numpy as np
import csv

import matplotlib
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter
import matplotlib.ticker as mticker

matplotlib.use('TkAgg')

class MathTextSciFormatter(mticker.Formatter):
    def __init__(self, fmt="%1.2e"):
        self.fmt = fmt
    def __call__(self, x, pos=None):
        s = self.fmt % x
        decimal_point = '.'
        positive_sign = '+'
        tup = s.split('e')
        significand = tup[0].rstrip(decimal_point)
        sign = tup[1][0].replace(positive_sign, '')
        exponent = tup[1][1:].lstrip('0')
        if exponent:
            exponent = '10^{%s%s}' % (sign, exponent)
        if significand and exponent:
            s =  r'%s{\times}%s' % (significand, exponent)
        else:
            s =  r'%s%s' % (significand, exponent)
        return "${}$".format(s)


def read_exec_time(filename):
    with open(filename, newline='') as csvfile:
        graph_reader = csv.reader(csvfile, delimiter=' ')
        header = next(graph_reader)
        t = float(header[0])
    return t
        
    
def graph_to_matrix(filename, task = 'skeleton'):
    with open(filename, newline='') as csvfile:
        graph_reader = csv.reader(csvfile, delimiter=' ')
        header = next(graph_reader)
        nnodes = int(header[0])
        nedges = int(header[1])
#         print(nnodes)
#         print(nedges)
        mtr = np.zeros((nnodes, nnodes), dtype=int)
#         print(mtr.shape)
        cedges = 0        
        for row in graph_reader:
            f = int(row[0])
            t = int(row[1])
            if (task == 'skeleton' and f > t):
                mtr[t][f] = 1
            else:
                mtr[f][t] = 1
            cedges += 1
        assert(cedges == nedges)
#         print(mtr)
    return mtr
        
    
def shd(gsg, lg):
    return(sum(sum(abs(gsg-lg))))


def check_result(graph_name, task = 'skeleton'):
    tool_list = [('csf', 'Proposed: CSF', 'k.'),
                 ('cupc', 'cuPC', 'bx'),
                 ('sf', 'stable.fast', 'r+'),
                 ('ppc', 'ParallelPC', 'c1'),
                 ('bpcs', 'bnlearn', 'm2')]

    gsg_dir = '../graphs/'
    sketch_dir = '../sketch.tradeoff/'
    figs_dir = '../sketch.figures/'
    
    # Gold standard graph
    gsg_filename = gsg_dir + graph_name + '.graph'
    gsg_filepath = Path(gsg_filename)
    if not gsg_filepath.exists():
        return 1
    gsg = graph_to_matrix(gsg_filepath, task)

    # Learned graph and learning time
    for task in ['skeleton', 'cpdag']:
        fig, ax = plt.subplots(figsize=(3.5, 2.8))
#         ax.set_position([0.5,0.5,0.5,0.5], which='both')
        ax.set_xlabel('Execution Time')
        ax.set_ylabel('Structural Hamming Distance')
        ax.yaxis.set_major_formatter(FormatStrFormatter('%4d'))
#         ax.yaxis.set_major_formatter(MathTextSciFormatter("%1.2e"))
        for (tool, toolname, sty) in tool_list:
            lg_filename = sketch_dir + graph_name + '.' + task + '.'+ tool + '.graph'
            lt_filename = sketch_dir + graph_name + '.' + task + '.'+ tool + '.time'
            lg_filepath = Path(lg_filename)
            lt_filepath = Path(lt_filename)

            if lg_filepath.exists() and lt_filepath.exists(): 
                lg = graph_to_matrix(lg_filepath, task)
                q = shd(gsg, lg)
                t = read_exec_time(lt_filename)                
                print(graph_name, task, tool, q, t)
                ax.plot(t, q, sty, markersize=10, label = toolname)
        
        ax.legend()
        fig.tight_layout()
#         fig.show()
        fig_filename = figs_dir + graph_name + '.' + task + '.pdf'
        print(fig_filename)
        fig.savefig(fig_filename)
        plt.close(fig)
        

In [None]:
for dt in ['andes', 'diabetes', 'hailfinder', 'hepar2', 'link',
           'munin', 'pathfinder', 'pigs', 'win95pts',
           'random5kf2', 'random5kf3', 'random5kf4']:
    check_result(dt)