In [109]:
import simexpal
import re
import pandas as pd
import numpy as np
import networkit as nk

In [116]:
# load simex results

def parse(run, f):
    res = {}
    res['status'] = run.get_status().name
    res['experiment'] = run.experiment.name
    res['instance'] = run.instance.shortname
    res['run'] = run
    res['revision'] = run.experiment.revision.name
    for var in run.experiment.variation:
        res[var.axis] = var.name
    
    if run.experiment.name == 'harmonicResistanceGreedy' and run.get_status().name != "FINISHED":
        with open(run.aux_file_path('stderr')) as aux_file:
            for line in aux_file.readlines():
                if 'starting greedy round 0' in line:
                    print(run.instance.shortname, 'greedy start found')
    
    if 'ExactSolution' in run.experiment.name:
        for line in f.readlines():
            if line.startswith('time:'):
                res['time'] = line[7:-1]
            if line.startswith('best edges:'):
                edges = re.findall(r'\(\s?(\d+),\s?(\d+)\)', line)
                res['edges'] = [(int(e[0]), int(e[1])) for e in edges]
    else:
        for line in f.readlines():
            if line.startswith('Running time:'):
                res['time'] = line[14:-1]
            if line.startswith('time:'):
                res['time'] = line[7:-1]
            if line.startswith('Result edges:') or line.startswith('items:'):
                edges = re.findall(r'\(\s?(\d+),\s?(\d+)\)', line)
                res['edges'] = [(int(e[0]), int(e[1])) for e in edges]
    return res

# load all results
cfg = simexpal.config_for_dir('/work/berneluk/harmonic-resistance-experiments/experiments')
results = []
for run in cfg.discover_all_runs():
    if run.get_status() == simexpal.base.Status.NOT_SUBMITTED: continue
    try:
        with run.open_output_file() as f:
            results.append(parse(run, f))
    except RuntimeError as e:
        pass
        # print('could not open file for run', e, 'run status:', run.get_status())

results = pd.DataFrame(results)
pd.reset_option('display.max_rows')

# dataframe types
results['status'] = results['status'].astype('category')
results['experiment'] = results['experiment'].astype('category')
results['instance'] = results['instance'].astype('category')
results['time'] = pd.to_timedelta(results['time'])
results

Unnamed: 0,status,experiment,instance,run,revision,k,edges,time
0,FINISHED,forestIndexExactSolution,ba1,<simexpal.base.Run object at 0x7fcb0e0bba10>,networkit-py-main,k-5,"[(1, 13), (2, 16), (5, 16), (6, 13), (11, 13)]",0 days 00:22:13.783533
1,FINISHED,forestIndexExactSolution,ba2,<simexpal.base.Run object at 0x7fcb0e0b8090>,networkit-py-main,k-5,"[(0, 13), (5, 12), (8, 13), (10, 12), (12, 13)]",0 days 00:21:55.635820
2,FINISHED,forestIndexExactSolution,ba3,<simexpal.base.Run object at 0x7fcb0e0bb750>,networkit-py-main,k-5,"[(1, 17), (3, 10), (5, 10), (6, 10), (10, 17)]",0 days 00:22:45.375821
3,FINISHED,forestIndexExactSolution,disturbedRing,<simexpal.base.Run object at 0x7fcb0e0bbe50>,networkit-py-main,k-5,"[(0, 1), (0, 8), (1, 2), (6, 7), (7, 8)]",0 days 00:00:00.103583
4,FINISHED,forestIndexExactSolution,grid5x3,<simexpal.base.Run object at 0x7fcb0e0bbe10>,networkit-py-main,k-5,"[(0, 1), (0, 5), (5, 6), (5, 10), (10, 11)]",0 days 00:00:12.223374
...,...,...,...,...,...,...,...,...
104,FINISHED,harmonicResistanceGreedy,grid5x3,<simexpal.base.Run object at 0x7fcb0fa89ed0>,harmonicResistance-parallelLamg,k-25,"[(12, 11), (2, 1), (13, 12), (3, 2), (7, 6), (...",0 days 00:00:00.019000
105,STARTED,harmonicResistanceGreedy,p2p-Gnutella04,<simexpal.base.Run object at 0x7fcb0fa88210>,harmonicResistance-parallelLamg,k-25,,NaT
106,STARTED,harmonicResistanceGreedy,p2p-Gnutella09,<simexpal.base.Run object at 0x7fcb0fa88a90>,harmonicResistance-parallelLamg,k-25,,NaT
107,STARTED,harmonicResistanceGreedy,web-indochina,<simexpal.base.Run object at 0x7fcb0fa8add0>,harmonicResistance-parallelLamg,k-25,,NaT


In [111]:
results.query('experiment == "harmonicResistanceGreedy" and status == "FINISHED"')

Unnamed: 0,status,experiment,instance,run,revision,k,edges,time
95,FINISHED,harmonicResistanceGreedy,Bcspwr10,<simexpal.base.Run object at 0x7fcb0fab8290>,harmonicResistance-main,k-25,"[(4675, 719), (5056, 719), (4103, 2517), (5201...",0 days 08:11:35.933000
98,FINISHED,harmonicResistanceGreedy,EmailUniv,<simexpal.base.Run object at 0x7fcb0fabaa50>,harmonicResistance-main,k-25,"[(513, 64), (997, 426), (767, 185), (996, 422)...",0 days 00:45:20.494000
104,FINISHED,harmonicResistanceGreedy,ba1,<simexpal.base.Run object at 0x7fcb0fab8c90>,harmonicResistance-main,k-25,"[(11, 9), (16, 15), (16, 5), (7, 6), (17, 1), ...",0 days 00:00:00.027000
105,FINISHED,harmonicResistanceGreedy,ba2,<simexpal.base.Run object at 0x7fcb0fabaa10>,harmonicResistance-main,k-25,"[(13, 12), (15, 9), (13, 8), (11, 7), (16, 11)...",0 days 00:00:00.026000
106,FINISHED,harmonicResistanceGreedy,ba3,<simexpal.base.Run object at 0x7fcb0fab9450>,harmonicResistance-main,k-25,"[(17, 10), (17, 9), (7, 4), (15, 12), (10, 3),...",0 days 00:00:00.050000
108,FINISHED,harmonicResistanceGreedy,disturbedRing,<simexpal.base.Run object at 0x7fcb0fab9510>,harmonicResistance-main,k-25,"[(3, 2), (7, 6), (2, 1), (8, 7), (1, 0), (8, 0...",0 days 00:00:00.029000
110,FINISHED,harmonicResistanceGreedy,grid5x3,<simexpal.base.Run object at 0x7fcb0fab83d0>,harmonicResistance-main,k-25,"[(12, 11), (2, 1), (13, 12), (3, 2), (7, 6), (...",0 days 00:00:00.063000
111,FINISHED,harmonicResistanceGreedy,grid5x6,<simexpal.base.Run object at 0x7fcb0fabad50>,harmonicResistance-main,k-25,"[(19, 14), (15, 10), (24, 19), (20, 15), (14, ...",0 days 00:00:00.204000
112,FINISHED,harmonicResistanceGreedy,grid7x4,<simexpal.base.Run object at 0x7fcb0fab82d0>,harmonicResistance-main,k-25,"[(24, 23), (3, 2), (25, 24), (4, 3), (26, 25),...",0 days 00:00:00.227000
113,FINISHED,harmonicResistanceGreedy,hotdog5x6,<simexpal.base.Run object at 0x7fcb0fab8310>,harmonicResistance-main,k-25,"[(19, 14), (15, 10), (27, 26), (2, 1), (28, 27...",0 days 00:00:00.618000


In [112]:
def centralityScore(row: pd.Series):
    graph = nk.readGraph(row.run.instance.fullpath)
    cc = nk.centrality.Closeness(graph, False, nk.centrality.ClosenessVariant.GENERALIZED)
    cc.run()
    nodePercentiles = {}
    prevBcValue = None
    prevNode = None
    numSeen = -1
    numCurrent = 0
    for node, bcValue in reversed(cc.ranking()):
        if prevBcValue and abs(prevBcValue - bcValue) < 1e-6:
            nodePercentiles[node] = nodePercentiles[prevNode]
            numCurrent = numCurrent + 1
        else:
            numSeen = numSeen + numCurrent + 1
            numCurrent = 0
            nodePercentiles[node] = numSeen/graph.numberOfNodes()
            prevBcValue = bcValue
            prevNode = node

    centralities = []
    for edge in row.edges:
        centralities.append(np.mean((nodePercentiles[edge[0]], nodePercentiles[edge[1]])))

    return np.mean(centralities)

In [113]:
results['score'] = results.query('status=="FINISHED" and (experiment=="harmonicResistanceGreedy" or experiment == "forestIndexGreedy")').apply(centralityScore, axis=1)

In [114]:
def filterBothFinished(group):
    if np.any(group.experiment == 'harmonicResistanceGreedy'):
        return group

filteredResults = results.query('status=="FINISHED" and (experiment=="harmonicResistanceGreedy" or experiment == "forestIndexGreedy") and revision != "forestIndex-via-resistance-full-search"').groupby('instance').apply(filterBothFinished)
filteredResults.query('instance not in ("ba1", "ba2", "ba3", "ws1", "ws2", "ws3", "grid5x3", "grid5x6", "grid7x4", "hotdog5x6", "disturbedRing")')[["experiment", "revision", "time", "score"]]

  filteredResults = results.query('status=="FINISHED" and (experiment=="harmonicResistanceGreedy" or experiment == "forestIndexGreedy") and revision != "forestIndex-via-resistance-full-search"').groupby('instance').apply(filterBothFinished)
  filteredResults = results.query('status=="FINISHED" and (experiment=="harmonicResistanceGreedy" or experiment == "forestIndexGreedy") and revision != "forestIndex-via-resistance-full-search"').groupby('instance').apply(filterBothFinished)


Unnamed: 0_level_0,Unnamed: 1_level_0,experiment,revision,time,score
instance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Bcspwr10,16,forestIndexGreedy,forestIndex-via-resistance,0 days 00:00:01.786816,0.301728
Bcspwr10,95,harmonicResistanceGreedy,harmonicResistance-main,0 days 08:11:35.933000,0.740547
EmailUniv,19,forestIndexGreedy,forestIndex-via-resistance,0 days 00:00:00.159011,0.526037
EmailUniv,98,harmonicResistanceGreedy,harmonicResistance-main,0 days 00:45:20.494000,0.405384
Erdos992,20,forestIndexGreedy,forestIndex-via-resistance,0 days 00:00:01.180637,0.691134
Erdos992,126,harmonicResistanceGreedy,harmonicResistance-parallelLamg,0 days 06:28:02.225000,0.643315
Reality,23,forestIndexGreedy,forestIndex-via-resistance,0 days 00:00:03.758507,0.508321
Reality,127,harmonicResistanceGreedy,harmonicResistance-parallelLamg,0 days 08:50:06.974000,0.825954
airTrafficControl,24,forestIndexGreedy,forestIndex-via-resistance,0 days 00:00:00.110273,0.063752
airTrafficControl,128,harmonicResistanceGreedy,harmonicResistance-parallelLamg,0 days 00:06:31.988000,0.57938


In [115]:
filteredResults.query('instance not in ("ba1", "ba2", "ba3", "ws1", "ws2", "ws3", "grid5x3", "grid5x6", "grid7x4", "hotdog5x6", "disturbedRing")')[["experiment", "score", "time"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,experiment,score,time
instance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bcspwr10,16,forestIndexGreedy,0.301728,0 days 00:00:01.786816
Bcspwr10,95,harmonicResistanceGreedy,0.740547,0 days 08:11:35.933000
EmailUniv,19,forestIndexGreedy,0.526037,0 days 00:00:00.159011
EmailUniv,98,harmonicResistanceGreedy,0.405384,0 days 00:45:20.494000
Erdos992,20,forestIndexGreedy,0.691134,0 days 00:00:01.180637
Erdos992,126,harmonicResistanceGreedy,0.643315,0 days 06:28:02.225000
Reality,23,forestIndexGreedy,0.508321,0 days 00:00:03.758507
Reality,127,harmonicResistanceGreedy,0.825954,0 days 08:50:06.974000
airTrafficControl,24,forestIndexGreedy,0.063752,0 days 00:00:00.110273
airTrafficControl,128,harmonicResistanceGreedy,0.57938,0 days 00:06:31.988000
