In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import glob

%matplotlib inline

In [64]:
def get_out_file_names(folder:str)->list:
    return glob.glob(f"{folder}/*.out", recursive=True)

def get_csv_file_names(folder:str)->list:
    return glob.glob(f"{folder}/*.csv", recursive=True)

def get_basic_data(filename:str)->tuple:
    fields = ["Best Distance: ", "Process Time: ", "Wall Time: "]
    t = []
    with open(filename, "r") as f:
        lines = [line.strip() for line in f.readlines()]
        for field in fields:
            for line in lines:
                if line.startswith(field):
                    t.append(float(line[len(field):]))
    return tuple(t)

def get_out_df(filenames:list, is_cached:bool)->pd.DataFrame:
    insts = ["inst-0", "inst-13", "inst-5"]
    variants = ["base", "variant1", "variant2"]
    runs = [1, 2, 3, 4, 5]
    df_dict = {
        "cached": [],
        "instance": [],
        "variant": [],
        "run_number": [],
        "best_distance": [],
        "process_time": [],
        "wall_time": [],
    }
    for inst in insts:
        for variant in variants:
            for run in runs:
                filepat = f"{inst}.{variant}.{run}.out"
                for fname in filenames:
                    if filepat in fname:
                        bd, pt, wt = get_basic_data(fname)
                        df_dict['cached'].append(is_cached)
                        df_dict['instance'].append(inst)
                        df_dict['variant'].append(variant)
                        df_dict['run_number'].append(run)
                        df_dict['best_distance'].append(bd)
                        df_dict['process_time'].append(pt)
                        df_dict['wall_time'].append(wt)
    return pd.DataFrame(df_dict)

    
def test():
    f = get_out_file_names('cache')
    df = get_out_df("None", f, True)
    print(df.describe().T)

def get_all_out_files_data_as_dataframe():
    cf = get_out_file_names('cache')
    ncf = get_out_file_names('nocache')
    cdf = get_out_df(cf, True)
    ncdf = get_out_df(ncf, False)
    return cdf, ncdf

def get_description_df(cdf:pd.DataFrame, ncdf:pd.DataFrame):
    df = pd.concat([cdf, ncdf])
    print(df.shape, cdf.shape, ncdf.shape)
    bdf = df[['cached', 'instance', 'variant', 'best_distance']]
    pdf = df[['cached', 'instance', 'variant', 'process_time']]
    bdf = bdf.groupby(by=['instance', 'cached', 'variant']).describe()
    pdf = pdf.groupby(by=['instance', 'cached', 'variant']).describe()
    full_df = df[['cached', 'instance', 'variant', 'best_distance', 'process_time']].\
            groupby(by=['instance', 'cached', 'variant']).describe()
    return full_df, bdf, pdf

cdf, ncdf = get_all_out_files_data_as_dataframe()
full_description, best_distance_description, process_time_description = get_description_df(cdf, ncdf)

(90, 7) (45, 7) (45, 7)


In [67]:
best_distance_description

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,best_distance,best_distance,best_distance,best_distance,best_distance,best_distance,best_distance,best_distance
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,mean,std,min,25%,50%,75%,max
instance,cached,variant,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
inst-0,False,base,5.0,3483211.0,27401.561743,3452940.0,3461627.0,3486302.0,3493055.0,3522131.0
inst-0,False,variant1,5.0,3560954.0,41682.079138,3504536.0,3531718.0,3579031.0,3582463.0,3607023.0
inst-0,False,variant2,5.0,3594154.0,19554.124732,3564625.0,3588953.0,3594459.0,3607021.0,3615712.0
inst-0,True,base,5.0,3483211.0,27401.561743,3452940.0,3461627.0,3486302.0,3493055.0,3522131.0
inst-0,True,variant1,5.0,3560954.0,41682.079138,3504536.0,3531718.0,3579031.0,3582463.0,3607023.0
inst-0,True,variant2,5.0,3594154.0,19554.124732,3564625.0,3588953.0,3594459.0,3607021.0,3615712.0
inst-13,False,base,5.0,6324916.0,28343.936094,6305152.0,6309133.0,6312294.0,6323959.0,6374044.0
inst-13,False,variant1,5.0,6400748.0,27786.048108,6364196.0,6388281.0,6394371.0,6425151.0,6431742.0
inst-13,False,variant2,5.0,6441843.0,49424.881214,6397159.0,6417805.0,6425238.0,6444003.0,6525008.0
inst-13,True,base,5.0,6324916.0,28343.936094,6305152.0,6309133.0,6312294.0,6323959.0,6374044.0


In [61]:
cdf, ncdf = get_all_out_files_data_as_dataframe()

In [62]:
cdf

Unnamed: 0,cached,instance,variant,run_number,best_distance,process_time,wall_time
0,True,inst-0,base,1,3452940.0,22.625,22.652222
1,True,inst-0,base,2,3493055.0,21.671875,21.744243
2,True,inst-0,base,3,3486302.0,23.359375,23.40959
3,True,inst-0,base,4,3461627.0,22.765625,22.763834
4,True,inst-0,base,5,3522131.0,22.5,22.509611
5,True,inst-0,variant1,1,3582463.0,8.359375,8.367596
6,True,inst-0,variant1,2,3531718.0,9.8125,9.843245
7,True,inst-0,variant1,3,3504536.0,9.453125,9.450127
8,True,inst-0,variant1,4,3607023.0,10.015625,10.034549
9,True,inst-0,variant1,5,3579031.0,11.0,11.059918


In [None]:
c