In [1]:
import glob, os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sys
import json
import traceback
%matplotlib inline

In [2]:
def loadTSPInstances(path):
    dataframe = None
    frameCreated = False
    for path in glob.glob(path + "*.json"):
        try:
            with open(path) as file:
                jsonDf = json.load(file)
            newFrame = pd.io.json.json_normalize(jsonDf)
            newFrame["name"] = os.path.splitext(os.path.basename(path))[0]
                        
            if not frameCreated:
                dataframe = newFrame
                frameCreated = True
            else:
                dataframe = pd.concat([dataframe, newFrame])
        except:
            traceback.print_exc()
            
    cols = dataframe.columns.tolist()
    cols.remove("name")
    cols.insert(0, "name")
    dataframe = dataframe[cols]
    
    return dataframe.reset_index().drop("index", axis=1)

In [3]:
instances = loadTSPInstances("/Users/adam/Documents/Rose Repos/Thesis/data/features/tsplib/")
# Remove unnecessary tours
instances = instances.drop(["heuristics.simulatedAnnealingValues", "heuristics.graspValues", "heuristics.tabuValues", "heuristics.antColonyValues", "heuristics.geneticValues"], axis=1)
# Remove due to bug in creation
instances = instances.loc[instances["name"] != "pr2392"]

In [4]:
instances.head()

Unnamed: 0,name,complexFeatures.adjacencyCorrelationCoefficient,complexFeatures.adjacencyCorrelationCoefficientTimes,complexFeatures.alternateClusteringCoefficient,complexFeatures.alternateClusteringCoefficientTimes,complexFeatures.averageGeodesicDistance,complexFeatures.averageGeodesicDistanceTimes,complexFeatures.averageShortestPathGeodesicDistance,complexFeatures.averageShortestPathGeodesicDistanceTimes,complexFeatures.clusteringCoefficientTransitivity,...,simpleFeatures.numberVerticesTimes,simpleFeatures.standardDeviationEdgeCost,simpleFeatures.standardDeviationEdgeCostTimes,simpleFeatures.standardDeviationVertexCost,simpleFeatures.standardDeviationVertexCostTimes,simpleFeatures.sumCostNearestNeighbor,simpleFeatures.sumCostNearestNeighborTimes,simpleFeatures.sumNLowestEdgeCost,simpleFeatures.sumNLowestEdgeCostTimes,simpleFeatures.vertexCostPrepTimes
0,ftv70,1.0,"[0.05023360252380371, 0.04813218116760254, 0.0...",2415.0,"[4.330471992492676, 4.14451265335083, 4.254827...",137.7334,"[0.0002028942108154297, 0.00022172927856445312...",137.733,"[0.2992990016937256, 0.244307279586792, 0.2395...",3.0,...,"[5.7220458984375e-06, 3.5762786865234375e-06, ...",63.468561,"[7.724761962890625e-05, 7.510185241699219e-05,...",20.423354,"[0.000148773193359375, 5.841255187988281e-05, ...",1393.0,"[0.0029914379119873047, 0.0029494762420654297,...",1054.0,"[0.0004134178161621094, 0.0003867149353027344,...","[0.0063934326171875, 0.00632786750793457, 0.00..."
1,pa561,1.0,"[10.317912340164185, 7.950518846511841, 7.9349...",,"[-1, -1, -1, -1, -1]",65.225,"[0.013514041900634766, 0.012034416198730469, 0...",65.2249,"[322.27051281929016, -1, -1, 328.1916763782501...",,...,"[1.049041748046875e-05, 9.5367431640625e-06, 4...",31.437922,"[0.001054525375366211, 0.0008180141448974609, ...",13.453846,"[0.00016617774963378906, 8.153915405273438e-05...",2101.0,"[0.41226673126220703, 0.18448448181152344, 0.1...",1640.0,"[0.03078460693359375, 0.019775390625, 0.020031...","[0.796375036239624, 0.46872520446777344, 0.408..."
2,pr76,1.0,"[0.14427709579467773, 0.1428532600402832, 0.14...",2775.0,"[10.63114595413208, 11.824914455413818, 12.026...",7558.707357,"[0.00021219253540039062, 0.0003428459167480469...",7558.71,"[0.37108445167541504, 0.6816906929016113, 0.40...",3.0,...,"[1.049041748046875e-05, 4.76837158203125e-06, ...",3982.505548,"[0.0001308917999267578, 0.00012731552124023438...",1719.701344,"[0.0002040863037109375, 9.822845458984375e-05,...",65385.318451,"[0.007316112518310547, 0.0116119384765625, 0.0...",53509.282067,"[0.0007195472717285156, 0.0006911754608154297,...","[0.014772653579711914, 0.018717050552368164, 0..."
3,bier127,1.0,"[0.41303396224975586, 0.4057493209838867, 0.53...",7875.0,"[56.059271574020386, 55.766902923583984, 65.63...",4952.476091,"[0.0007309913635253906, 0.0026350021362304688,...",4952.48,"[1.742643117904663, 1.7371268272399902, 1.6888...",3.0,...,"[2.2172927856445312e-05, 4.291534423828125e-06...",3100.926508,"[0.00015664100646972656, 0.0001559257507324218...",1779.076843,"[0.00015735626220703125, 9.298324584960938e-05...",76624.525665,"[0.020291566848754883, 0.03277015686035156, 0....",32450.508921,"[0.0019288063049316406, 0.0019335746765136719,...","[0.048226356506347656, 0.06056499481201172, 0...."
4,ftv47,1.0,"[0.0325469970703125, 0.02184295654296875, 0.02...",1081.0,"[1.4054772853851318, 1.2391138076782227, 1.254...",142.376773,"[0.00015473365783691406, 0.0001189708709716796...",142.377,"[0.09906959533691406, 0.08412504196166992, 0.0...",3.0,...,"[1.4066696166992188e-05, 5.4836273193359375e-0...",65.442092,"[0.00011610984802246094, 0.0001130104064941406...",19.513438,"[0.00021386146545410156, 0.0001006126403808593...",1249.0,"[0.003017902374267578, 0.0030608177185058594, ...",971.0,"[0.00029730796813964844, 0.000293731689453125,...","[0.005849599838256836, 0.0058438777923583984, ..."


In [5]:
import re
columnNames = list(instances)
regexTimes = re.compile(".*Times")
timesColumnNames = list(filter(regexTimes.match, columnNames))

regexValues = re.compile(".*Values")
valuesColumnNames = list(filter(regexValues.match, columnNames))

In [6]:
# From https://stackoverflow.com/a/40449726
def explode(df, lst_cols, fill_value=''):
    # make sure `lst_cols` is a list
    if lst_cols and not isinstance(lst_cols, list):
        lst_cols = [lst_cols]
    # all columns except `lst_cols`
    idx_cols = df.columns.difference(lst_cols)

    # calculate lengths of lists
    lens = df[lst_cols[0]].str.len()

    if (lens > 0).all():
        # ALL lists in cells aren't empty
        return pd.DataFrame({
            col:np.repeat(df[col].values, df[lst_cols[0]].str.len())
            for col in idx_cols
        }).assign(**{col:np.concatenate(df[col].values) for col in lst_cols}) \
          .loc[:, df.columns]
    else:
        # at least one list in cells is empty
        return pd.DataFrame({
            col:np.repeat(df[col].values, df[lst_cols[0]].str.len())
            for col in idx_cols
        }).assign(**{col:np.concatenate(df[col].values) for col in lst_cols}) \
          .append(df.loc[lens==0, idx_cols]).fillna(fill_value) \
          .loc[:, df.columns]

In [7]:
timesColumns = list(timesColumnNames)
timesColumns.remove("heldKarpTimes")
instances = explode(instances, timesColumns + valuesColumnNames)

operationTimes = instances.filter(regex="Times")
operationValues = instances.filter(regex="Values")

In [None]:
instances.loc[instances["complexFeatures.alternateClusteringCoefficient"] == "None"]

In [None]:
instances.loc[instances["complexFeatures.alternateClusteringCoefficient"].isnull()]

In [9]:
instances.head()

Unnamed: 0,name,complexFeatures.adjacencyCorrelationCoefficient,complexFeatures.adjacencyCorrelationCoefficientTimes,complexFeatures.alternateClusteringCoefficient,complexFeatures.alternateClusteringCoefficientTimes,complexFeatures.averageGeodesicDistance,complexFeatures.averageGeodesicDistanceTimes,complexFeatures.averageShortestPathGeodesicDistance,complexFeatures.averageShortestPathGeodesicDistanceTimes,complexFeatures.clusteringCoefficientTransitivity,...,simpleFeatures.numberVerticesTimes,simpleFeatures.standardDeviationEdgeCost,simpleFeatures.standardDeviationEdgeCostTimes,simpleFeatures.standardDeviationVertexCost,simpleFeatures.standardDeviationVertexCostTimes,simpleFeatures.sumCostNearestNeighbor,simpleFeatures.sumCostNearestNeighborTimes,simpleFeatures.sumNLowestEdgeCost,simpleFeatures.sumNLowestEdgeCostTimes,simpleFeatures.vertexCostPrepTimes
0,ftv70,1.0,0.050234,2415,4.330472,137.7334,0.000203,137.733,0.299299,3,...,6e-06,63.468561,7.7e-05,20.423354,0.000149,1393.0,0.002991,1054.0,0.000413,0.006393
1,ftv70,1.0,0.048132,2415,4.144513,137.7334,0.000222,137.733,0.244307,3,...,4e-06,63.468561,7.5e-05,20.423354,5.8e-05,1393.0,0.002949,1054.0,0.000387,0.006328
2,ftv70,1.0,0.057157,2415,4.254828,137.7334,0.000178,137.733,0.239585,3,...,4e-06,63.468561,7.4e-05,20.423354,6e-05,1393.0,0.003007,1054.0,0.000393,0.00674
3,ftv70,1.0,0.059158,2415,4.431144,137.7334,0.000201,137.733,0.282669,3,...,4e-06,63.468561,7.5e-05,20.423354,5.9e-05,1393.0,0.00306,1054.0,0.000414,0.006581
4,ftv70,1.0,0.049166,2415,4.352321,137.7334,0.000214,137.733,0.294755,3,...,4e-06,63.468561,0.000114,20.423354,8.8e-05,1393.0,0.006425,1054.0,0.000572,0.009613
