In [46]:
import pandas as pd
import sys
import os
import string
import numpy as np
import scipy.stats as stats
import re

In [47]:
def atoi(text):
    return int(text) if text.isdigit() else text

def natural_keys(text):    
    return [ atoi(c) for c in re.split(r'(\d+)', text) ]

In [48]:
def calc_mean(list):
    if len(list) == 1:
        return list[0]
    else:    
        mean = np.mean(list)
        std = np.std(list)
        normal_list = []
        for l in list:
            z = (l-mean)/std
            if z <= 0.8 and z>= -0.8:
                normal_list.append(l)
        avg_value = np.mean(normal_list)/1000 #convert milisecond to second   
        return avg_value 

In [49]:
def merge_results(paths):
    dfs = []
    base_index = 0
    for path in paths:        
        data = pd.read_csv( path, sep=',')
        df_index = [ *range(base_index, base_index + len(data), 1)]
        base_index += len(data)
        data.index=df_index
        dfs.append(data)


    result =  pd.concat(dfs)
    return result

In [78]:
def results():
    root_path = '../archive'
    write_results = [f'{root_path}/read_write/CPP_R13_Experiment1_Write_times.dat', 
                     f'{root_path}/read_write/Java_R1_Experiment1_Write_times.dat',
                     f'{root_path}/read_write/Java_R2_Experiment1_Write_times.dat',
                     f'{root_path}/read_write/Java_R3_Experiment1_Write_times.dat']
   
    load_to_memory_results = [f'{root_path}/read_write/CPP_R13_Experiment1_LoadToMemory_times.dat',
                              f'{root_path}/read_write/Java_R1_Experiment1_LoadToMemory_times.dat',
                              f'{root_path}/read_write/Java_R2_Experiment1_LoadToMemory_times.dat',
                              f'{root_path}/read_write/Java_R3_Experiment1_LoadToMemory_times.dat']
   
    write_header = ["baseline","language","taskset","execution","platform","seq_rand","nrow","time"]   
    load_to_memory_header = ["language","platform","taskset","nrow","time"]

    baselines = ["DefaultJava","Json+GzipJava","BsonJava","ProtoBufJava","KryoJava","ByteBufferJava","JsonJava","FlatBuffersJava",
                "HandcodedCPP","inPlaceCPP","BoostCPP","ProtoBufCPP","BsonCPP","FlatBufCPP",
                "JsonRust","BincodeRust","MessagePackRust","BsonRust","FlexBufRust"]    
    
    nrows = [10000000,9000000,8000000,7000000,6000000,5000000,4000000,3000000,2000000,1000000]

    executions = ["CPU", "IO", "Total"]

    platforms = ["Single", "Parallel"]

    seq_rands = ["Sequential", "Random"]

    languages = ["Java", "CPP", "Rust"]

    tasksets = [True, False]

    df_write = merge_results(write_results)
    df_load_to_memory = merge_results(load_to_memory_results)

    # write
    #baseline,language,taskset,execution,platform,seq_rand,nrow,time
    for baseline in baselines:
        for language in languages:
            for taskset in tasksets:
                for execution in executions:
                     for platform in platforms:
                         for seq_rand in seq_rands:
                             for nrow in nrows:                                                                      
                                df =  df_write.loc[(df_write['baseline']==baseline) &
                                                    (df_write["language"]==language) &
                                                    (df_write['taskset']==taskset) &
                                                    (df_write['execution']==execution) &
                                                    (df_write['platform']==platform) &
                                                    (df_write['seq_rand']==seq_rand) &
                                                    (df_write['nrow']==nrow)
                                                    ]
                                list = np.array(df['time'].tolist())  
                                if len(list) > 0:
                                    time = calc_mean(list=list)
                                    str = f"{baseline},{language},{taskset},{execution},{platform},{seq_rand},{nrow},{time}"
                                    print(str)                                                   



   

In [79]:
results()

DefaultJava,Java,True,CPU,Single,Sequential,10000000,473.76
DefaultJava,Java,True,Total,Single,Sequential,10000000,512.286
DefaultJava,Java,False,CPU,Single,Sequential,10000000,481.995
DefaultJava,Java,False,CPU,Parallel,Sequential,10000000,66.372
DefaultJava,Java,False,Total,Single,Sequential,10000000,549.696
DefaultJava,Java,False,Total,Parallel,Sequential,10000000,110.0755
Json+GzipJava,Java,True,CPU,Single,Sequential,10000000,1805.418
Json+GzipJava,Java,True,Total,Single,Sequential,10000000,1844.7525
Json+GzipJava,Java,False,CPU,Single,Sequential,10000000,1870.594
Json+GzipJava,Java,False,CPU,Parallel,Sequential,10000000,503.732
Json+GzipJava,Java,False,Total,Single,Sequential,10000000,1869.048
Json+GzipJava,Java,False,Total,Parallel,Sequential,10000000,505.187
BsonJava,Java,True,CPU,Single,Sequential,10000000,686.92
BsonJava,Java,True,Total,Single,Sequential,10000000,747.843
BsonJava,Java,False,CPU,Single,Sequential,10000000,538.224
BsonJava,Java,False,CPU,Parallel,Sequential,1000