In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import glob

%matplotlib inline

In [47]:
def get_out_file_names(folder:str)->list:
    return glob.glob(f"{folder}/*.out", recursive=True)

def get_csv_file_names(folder:str)->list:
    return glob.glob(f"{folder}/*.csv", recursive=True)

def get_basic_data(filename:str)->tuple:
    fields = ["Best Distance: ", "Process Time: ", "Wall Time: "]
    t = []
    with open(filename, "r") as f:
        lines = [line.strip() for line in f.readlines()]
        for field in fields:
            for line in lines:
                if line.startswith(field):
                    t.append(float(line[len(field):]))
    return tuple(t)

def get_out_df(filenames:list, is_cached:bool)->pd.DataFrame:
    insts = ["inst-0", "inst-13", "inst-5"]
    variants = ["base", "variant1", "variant2"]
    runs = [1, 2, 3, 4, 5]
    df_dict = {
        "cached": [],
        "instance": [],
        "variant": [],
        "run_number": [],
        "best_distance": [],
        "process_time": [],
        "wall_time": [],
    }
    for inst in insts:
        for variant in variants:
            for run in runs:
                filepat = f"{inst}.{variant}.{run}.out"
                for fname in filenames:
                    if filepat in fname:
                        bd, pt, wt = get_basic_data(fname)
                        df_dict['cached'].append(is_cached)
                        df_dict['instance'].append(inst)
                        df_dict['variant'].append(variant)
                        df_dict['run_number'].append(run)
                        df_dict['best_distance'].append(bd)
                        df_dict['process_time'].append(pt)
                        df_dict['wall_time'].append(wt)
    return pd.DataFrame(df_dict)

    
def test():
    f = get_out_file_names('cache')
    df = get_out_df("None", f, True)
    print(df.describe().T)

def get_all_out_files_data_as_dataframe():
    cf = get_out_file_names('cache')
    ncf = get_out_file_names('nocache')
    cdf = get_out_df(cf, True)
    ncdf = get_out_df(ncf, False)
    return cdf, ncdf

def get_description_df(cdf:pd.DataFrame, ncdf:pd.DataFrame):
    df = pd.concat([cdf, ncdf])
    print(df.shape, cdf.shape, ncdf.shape)
    bdf = df[['best_distance']]
    pdf = df[['process_time']]
    bdf = bdf[['cached', 'instance', 'variant', 'best_distance', 'process_time']].\
            groupby(by=['instance', 'cached', 'variant']).describe()
    pdf = pdf[['cached', 'instance', 'variant', 'best_distance', 'process_time']].\
            groupby(by=['instance', 'cached', 'variant']).describe()
    full_df = df[['cached', 'instance', 'variant', 'best_distance', 'process_time']].\
            groupby(by=['instance', 'cached', 'variant']).describe()
    return full_df, bdf, pdf

cdf, ncdf = get_all_out_files_data_as_dataframe()
full_description, best_distance_description, process_time_description = get_description_df(cdf, ncdf)

(90, 7) (45, 7) (45, 7)


KeyError: "['variant', 'process_time', 'instance', 'cached'] not in index"

In [46]:
description_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,best_distance,best_distance,best_distance,best_distance,best_distance,best_distance,best_distance,best_distance,process_time,process_time,process_time,process_time,process_time,process_time,process_time,process_time
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
instance,cached,variant,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2
inst-0,False,base,5.0,3483211.0,27401.561743,3452940.0,3461627.0,3486302.0,3493055.0,3522131.0,5.0,31.896875,2.652516,29.375,30.828125,30.96875,31.96875,36.34375
inst-0,False,variant1,5.0,3560954.0,41682.079138,3504536.0,3531718.0,3579031.0,3582463.0,3607023.0,5.0,9.81875,0.454013,9.09375,9.671875,10.03125,10.0625,10.234375
inst-0,False,variant2,5.0,3594154.0,19554.124732,3564625.0,3588953.0,3594459.0,3607021.0,3615712.0,5.0,10.4125,0.270543,10.078125,10.203125,10.484375,10.546875,10.75
inst-0,True,base,5.0,3483211.0,27401.561743,3452940.0,3461627.0,3486302.0,3493055.0,3522131.0,5.0,22.584375,0.607228,21.671875,22.5,22.625,22.765625,23.359375
inst-0,True,variant1,5.0,3560954.0,41682.079138,3504536.0,3531718.0,3579031.0,3582463.0,3607023.0,5.0,9.728125,0.956104,8.359375,9.453125,9.8125,10.015625,11.0
inst-0,True,variant2,5.0,3594154.0,19554.124732,3564625.0,3588953.0,3594459.0,3607021.0,3615712.0,5.0,9.996875,0.564925,9.46875,9.65625,9.78125,10.1875,10.890625
inst-13,False,base,5.0,6324916.0,28343.936094,6305152.0,6309133.0,6312294.0,6323959.0,6374044.0,5.0,205.35,11.371273,194.328125,195.21875,202.734375,215.859375,218.609375
inst-13,False,variant1,5.0,6400748.0,27786.048108,6364196.0,6388281.0,6394371.0,6425151.0,6431742.0,5.0,32.2375,3.301156,29.1875,30.109375,30.734375,33.96875,37.1875
inst-13,False,variant2,5.0,6441843.0,49424.881214,6397159.0,6417805.0,6425238.0,6444003.0,6525008.0,5.0,33.90625,1.252244,32.828125,33.296875,33.515625,33.84375,36.046875
inst-13,True,base,5.0,6324916.0,28343.936094,6305152.0,6309133.0,6312294.0,6323959.0,6374044.0,5.0,157.190625,15.452894,140.203125,151.125,154.703125,157.765625,182.15625


In [48]:
cdf = get_all_out_files_data_as_dataframe()

In [38]:
ncdf[['cached', 'instance', 'variant', 'best_distance', 'process_time']].groupby(by=['instance', 'cached', 'variant']).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,best_distance,best_distance,best_distance,best_distance,best_distance,best_distance,best_distance,best_distance,process_time,process_time,process_time,process_time,process_time,process_time,process_time,process_time
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
instance,cached,variant,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2
inst-0,False,base,5.0,3483211.0,27401.561743,3452940.0,3461627.0,3486302.0,3493055.0,3522131.0,5.0,31.896875,2.652516,29.375,30.828125,30.96875,31.96875,36.34375
inst-0,False,variant1,5.0,3560954.0,41682.079138,3504536.0,3531718.0,3579031.0,3582463.0,3607023.0,5.0,9.81875,0.454013,9.09375,9.671875,10.03125,10.0625,10.234375
inst-0,False,variant2,5.0,3594154.0,19554.124732,3564625.0,3588953.0,3594459.0,3607021.0,3615712.0,5.0,10.4125,0.270543,10.078125,10.203125,10.484375,10.546875,10.75
inst-13,False,base,5.0,6324916.0,28343.936094,6305152.0,6309133.0,6312294.0,6323959.0,6374044.0,5.0,205.35,11.371273,194.328125,195.21875,202.734375,215.859375,218.609375
inst-13,False,variant1,5.0,6400748.0,27786.048108,6364196.0,6388281.0,6394371.0,6425151.0,6431742.0,5.0,32.2375,3.301156,29.1875,30.109375,30.734375,33.96875,37.1875
inst-13,False,variant2,5.0,6441843.0,49424.881214,6397159.0,6417805.0,6425238.0,6444003.0,6525008.0,5.0,33.90625,1.252244,32.828125,33.296875,33.515625,33.84375,36.046875
inst-5,False,base,5.0,11153410.0,36307.619408,11091910.0,11157080.0,11164260.0,11165580.0,11188210.0,5.0,2089.825,61.155074,2010.734375,2058.875,2091.34375,2113.625,2174.546875
inst-5,False,variant1,5.0,11350440.0,54096.755444,11302910.0,11311950.0,11320490.0,11395320.0,11421530.0,5.0,159.08125,6.889884,148.125,159.578125,160.109375,160.328125,167.265625
inst-5,False,variant2,5.0,11432680.0,74494.355171,11331780.0,11407550.0,11427110.0,11461760.0,11535190.0,5.0,164.76875,10.036672,153.875,155.390625,168.21875,168.546875,177.8125


In [None]:
c