In [1]:
import os
import pandas as pd
import numpy as np

### Load data ###

In [2]:
cpd = {}

for dataset in ["CPU", "Memory", "Disk"]:

    results_dir = "./%s_results" % dataset
    if dataset not in cpd.keys():
        cpd[dataset] = {} 

    for f in os.listdir("%s" % results_dir):
        # Exclude hidden files (most likely thumbnails)
        if f[:2] != "._":
            print("Loading: %s/%s" % (results_dir, f))
            df = pd.read_csv("%s/%s" % (results_dir, f))
            print(len(df))

            if "aggr" in f:
                # Extract CPD threshold from the filename 
                lthreshold = float(f.split("-")[1])

                df["lthreshold"] = lthreshold
                if "aggr" not in cpd[dataset].keys():
                    cpd[dataset]["aggr"] = pd.DataFrame()
                cpd[dataset]["aggr"] = pd.concat([cpd[dataset]["aggr"], df], sort=False)

            if "indiv" in f:
                if "indiv" not in cpd[dataset].keys():
                    cpd[dataset]["indiv"] = pd.DataFrame()
                cpd[dataset]["indiv"] = pd.concat([cpd[dataset]["indiv"], df], sort=False)

            if "meta" in f:
                cpd[dataset]["meta"] = df

            if "segment-durations" in f:
                cpd[dataset]["segment-durations"] = df

Loading: ./CPU_results/indiv-0.70
283
Loading: ./CPU_results/aggr-1.00
4034
Loading: ./CPU_results/indiv-0.40
147
Loading: ./CPU_results/aggr-0.30
354
Loading: ./CPU_results/aggr-0.90
3951
Loading: ./CPU_results/aggr-0.70
3068
Loading: ./CPU_results/indiv-1.00
410
Loading: ./CPU_results/aggr-0.40
930
Loading: ./CPU_results/meta
11
Loading: ./CPU_results/indiv-0.30
49
Loading: ./CPU_results/indiv-0.90
363
Loading: ./CPU_results/indiv-0.80
325
Loading: ./CPU_results/aggr-0.50
1944
Loading: ./CPU_results/segment-durations
6725
Loading: ./CPU_results/aggr-0.60
2459
Loading: ./CPU_results/aggr-0.80
3565
Loading: ./CPU_results/indiv-0.50
193
Loading: ./CPU_results/indiv-0.60
235
Loading: ./Memory_results/indiv-0.70
1406
Loading: ./Memory_results/aggr-1.00
6241
Loading: ./Memory_results/indiv-0.40
925
Loading: ./Memory_results/aggr-0.30
2656
Loading: ./Memory_results/aggr-0.90
6002
Loading: ./Memory_results/aggr-0.70
5420
Loading: ./Memory_results/indiv-1.00
1784
Loading: ./Memory_results/agg

In [3]:
count_df = pd.DataFrame(columns=["CPU: CP #", "Mem: CP #", "Disk: CP #"])
for lthreshold in cpd["CPU"]["indiv"]["lthreshold"].sort_values().unique():
    cpu_cpd_c = len(cpd["CPU"]["indiv"][(cpd["CPU"]["indiv"]["lthreshold"] == lthreshold)])
    
    mem_cpd_c = len(cpd["Memory"]["indiv"][(cpd["Memory"]["indiv"]["lthreshold"] == lthreshold)])
    
    disk_cpd_c = len(cpd["Disk"]["indiv"][(cpd["Disk"]["indiv"]["lthreshold"] == lthreshold)])
    
    count_df.loc[lthreshold] = (cpu_cpd_c, mem_cpd_c, disk_cpd_c)
    
count_df["Total #"] = count_df.sum(1)
count_df.loc['Total',:] = count_df.sum(0)
display(count_df)

Unnamed: 0,CPU: CP #,Mem: CP #,Disk: CP #,Total #
0.3,49,492,42,583.0
0.4,147,925,76,1148.0
0.5,193,1113,108,1414.0
0.6,235,1263,134,1632.0
0.7,283,1406,160,1849.0
0.8,325,1529,191,2045.0
0.9,363,1630,215,2208.0
1.0,410,1784,245,2439.0
Total,2005,10142,1171,13318.0


### Process timestamp ###

In [5]:
import time
for dataset in ['Memory','CPU','Disk']:
    cpd[dataset]['indiv']['timestamp'] = cpd[dataset]['indiv']['timestamp'].map(lambda x: time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(x)))

In [6]:
cpd['Memory']['indiv']

Unnamed: 0,hw_type,lthreshold,testname,dvfs,socket_num,timestamp,percent_change
0,c220g1,0.7,add,no,0,2018-01-06 02:35:34,-3.772623
1,c220g1,0.7,add,no,0,2018-10-20 20:40:56,9.972447
2,c220g1,0.7,add,no,0,2018-12-01 14:37:48,-10.811279
3,c220g1,0.7,add,no,1,2018-01-06 02:35:34,-3.804811
4,c220g1,0.7,add,no,1,2018-10-20 20:43:05,-1.383751
...,...,...,...,...,...,...,...
1258,xl170,0.6,write_memory_rep_stosq_omp,yes,0,2019-11-01 19:19:39,5.007408
1259,xl170,0.6,write_memory_sse,no,0,2019-11-01 19:19:39,6.229457
1260,xl170,0.6,write_memory_sse,yes,0,2019-11-01 19:19:39,6.192962
1261,xl170,0.6,write_memory_sse_omp,no,0,2019-11-01 19:19:39,6.462294


In [7]:
cpd['CPU']['indiv']

Unnamed: 0,hw_type,lthreshold,testname,total_threads,dvfs,socket_num,timestamp,percent_change
0,c220g1,0.7,BT,1,no,0,2018-11-24 14:38:06,0.214888
1,c220g1,0.7,BT,16,no,0,2019-10-02 02:39:00,1.825371
2,c220g1,0.7,CG,1,no,0,2018-12-02 08:39:40,4.477789
3,c220g1,0.7,CG,16,no,0,2019-08-14 02:38:55,0.962987
4,c220g1,0.7,CG,16,no,1,2019-08-14 02:38:55,0.337155
...,...,...,...,...,...,...,...,...
230,xl170,0.6,SP,20,no,0,2019-11-01 19:19:39,-2.716910
231,xl170,0.6,SP,20,yes,0,2019-11-01 19:19:39,-3.021414
232,xl170,0.6,UA,1,yes,0,2019-04-22 20:18:53,-0.239028
233,xl170,0.6,UA,20,yes,0,2019-04-03 20:18:34,-0.241847


In [8]:
cpd['Disk']['indiv']

Unnamed: 0,hw_type,lthreshold,testname,device,iodepth,timestamp,percent_change
0,c220g1,0.7,randread,/dev/sda4,1,2017-12-24 20:26:31,0.525657
1,c220g1,0.7,randread,/dev/sda4,1,2019-08-14 02:38:55,-0.588755
2,c220g1,0.7,randread,/dev/sda4,4096,2018-08-11 08:31:03,-2.547777
3,c220g1,0.7,randread,/dev/sdb,1,2017-11-15 08:26:32,0.480763
4,c220g1,0.7,randread,/dev/sdb,1,2018-10-20 20:40:56,-0.405271
...,...,...,...,...,...,...,...
129,xl170,0.6,read,/dev/sda4,1,2018-09-02 08:20:42,-0.864026
130,xl170,0.6,read,/dev/sda4,1,2019-04-22 20:19:58,-11.289161
131,xl170,0.6,read,/dev/sda4,1,2019-06-30 02:21:24,-3.503756
132,xl170,0.6,write,/dev/sda4,1,2019-04-22 20:19:58,-12.762087
