## Changepoint Collection for the changepoints detection of CCGrid20 paper dataset and save them into files##

In [28]:
import csv
import pandas as pd
import numpy as np
import string
import matplotlib.pyplot as plt
import datetime as dt
import os

In [27]:
import cpd

In [19]:
data = {}

data["CPU"] = pd.DataFrame()
data["Memory"] = pd.DataFrame()
data["Disk"] = pd.DataFrame()

source_dir = './CPU'
for f in os.listdir(source_dir):
    f_df = pd.read_csv("%s/%s" % (source_dir, f))
    hw_type, testname = f.split("-")
    f_df["hw_type"] = hw_type
    f_df["testname"] = testname
    data["CPU"] = pd.concat([data["CPU"], f_df])
print (data["CPU"].hw_type.value_counts())

d430      391104
c220g5    378288
m510      212908
c8220     180368
c6320     156456
xl170     151776
m400      150773
c220g1    119880
c6220      97928
r320       60622
c6420      58872
Name: hw_type, dtype: int64


In [20]:
source_dir = './Memory'
for f in os.listdir(source_dir):
    f_df = pd.read_csv("%s/%s" % (source_dir, f))
    hw_type, testname = f.split("-")
    f_df["hw_type"] = hw_type
    f_df["testname"] = testname
    data["Memory"] = pd.concat([data["Memory"], f_df])
print (data["Memory"].hw_type.value_counts())

d430      695296
c220g5    672512
c8220     649088
m510      573088
c6320     443648
c220g1    372416
xl170     269824
m400      200970
c6220     174208
r320      114112
c6420     104832
Name: hw_type, dtype: int64


In [21]:
source_dir = './Disk'
for f in os.listdir(source_dir):
    f_df = pd.read_csv("%s/%s" % (source_dir, f))
    hw_type, testname = f.split("-")
    f_df["hw_type"] = hw_type
    f_df["testname"] = testname
    data["Disk"] = pd.concat([data["Disk"], f_df])
print (data["Disk"].hw_type.value_counts())

m400      114768
d430       86896
c220g5     84048
c8220      82464
m510       72368
c220g1     71616
c6320      56384
xl170      33704
c6420      26208
c6220      21040
r320       14264
Name: hw_type, dtype: int64


In [22]:
df = data["CPU"]
factor_list = ["testname", "total_threads", "dvfs", "socket_num"]
metric = "exec_time"

lthreshold_list = [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]

dest_dir = "./CPU_results"
if not os.path.exists(dest_dir):
    os.makedirs(dest_dir)

meta_df = pd.DataFrame(columns=["hw_type", "total_measurements", "configurations", 
                                    "start_timestamp", "end_timestamp"])
indiv_cps = pd.DataFrame(columns = ["hw_type", "lthreshold"] + 
                  factor_list +  
                  ["timestamp", "percent_change"])
indiv_durations = pd.DataFrame(columns = ["hw_type", "lthreshold"] + 
                                     factor_list +  
                                     ["segment_duration"])
cpd.cpd_aggr(df, factor_list, metric, meta_df, lthreshold_list, indiv_cps, indiv_durations,dest_dir)


Processing hw_type: c220g1
Processed configs: 576
Processed measurements: 119880

Processing hw_type: c220g5
Processed configs: 1152
Processed measurements: 498168

Processing hw_type: c6220
Processed configs: 1728
Processed measurements: 596096

Processing hw_type: c6320
Processed configs: 2304
Processed measurements: 752552

Processing hw_type: c6420
Processed configs: 2592
Processed measurements: 811424

Processing hw_type: c8220
Processed configs: 3168
Processed measurements: 991792

Processing hw_type: d430
Processed configs: 3744
Processed measurements: 1382896

Processing hw_type: m400
Processed configs: 3872
Processed measurements: 1533669

Processing hw_type: m510
Processed configs: 4160
Processed measurements: 1746577

Processing hw_type: r320
Processed configs: 4432
Processed measurements: 1807199

Processing hw_type: xl170
Processed configs: 4720
Processed measurements: 1958975


In [23]:
df = data["Memory"]
factor_list = ["testname", "dvfs", "socket_num"]
metric = "mean"

lthreshold_list = [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]

dest_dir = "./Memory_results"
if not os.path.exists(dest_dir):
    os.makedirs(dest_dir)

meta_df = pd.DataFrame(columns=["hw_type", "total_measurements", "configurations", 
                                    "start_timestamp", "end_timestamp"])
indiv_cps = pd.DataFrame(columns = ["hw_type", "lthreshold"] + 
                  factor_list +  
                  ["timestamp", "percent_change"])
indiv_durations = pd.DataFrame(columns = ["hw_type", "lthreshold"] + 
                                     factor_list +  
                                     ["segment_duration"])

cpd.cpd_aggr(df, factor_list, metric, meta_df, lthreshold_list, indiv_cps, indiv_durations,dest_dir)


Processing hw_type: c220g1
Processed configs: 1024
Processed measurements: 372416

Processing hw_type: c220g5
Processed configs: 2048
Processed measurements: 1044928

Processing hw_type: c6220
Processed configs: 3072
Processed measurements: 1219136

Processing hw_type: c6320
Processed configs: 4096
Processed measurements: 1662784

Processing hw_type: c6420
Processed configs: 4608
Processed measurements: 1767616

Processing hw_type: c8220
Processed configs: 5632
Processed measurements: 2416704

Processing hw_type: d430
Processed configs: 6656
Processed measurements: 3112000

Processing hw_type: m400
Processed configs: 6768
Processed measurements: 3312970

Processing hw_type: m510
Processed configs: 7280
Processed measurements: 3886058

Processing hw_type: r320
Processed configs: 7792
Processed measurements: 4000170

Processing hw_type: xl170
Processed configs: 8304
Processed measurements: 4269994


In [25]:
df = data["Disk"]
factor_list = ["testname", "device", "iodepth"]
metric = "mean"

lthreshold_list = [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]

dest_dir = "./Disk_results"
if not os.path.exists(dest_dir):
    os.makedirs(dest_dir)

# Initialize dataframes where results will be saved by giving names to the columns
meta_df = pd.DataFrame(columns=["hw_type", "total_measurements", "configurations", 
                                    "start_timestamp", "end_timestamp"])
indiv_cps = pd.DataFrame(columns = ["hw_type", "lthreshold"] + 
                  factor_list +  
                  ["timestamp", "percent_change"])
indiv_durations = pd.DataFrame(columns = ["hw_type", "lthreshold"] + 
                                     factor_list +  
                                     ["segment_duration"])
cpd.cpd_aggr(df, factor_list, metric, meta_df, lthreshold_list, indiv_cps, indiv_durations,dest_dir)


Processing hw_type: c220g1
Processed configs: 192
Processed measurements: 71616

Processing hw_type: c220g5
Processed configs: 320
Processed measurements: 155664

Processing hw_type: c6220
Processed configs: 448
Processed measurements: 176704

Processing hw_type: c6320
Processed configs: 576
Processed measurements: 233088

Processing hw_type: c6420
Processed configs: 704
Processed measurements: 259296

Processing hw_type: c8220
Processed configs: 832
Processed measurements: 341760

Processing hw_type: d430
Processed configs: 960
Processed measurements: 428656

Processing hw_type: m400
Processed configs: 1024
Processed measurements: 543424

Processing hw_type: m510
Processed configs: 1088
Processed measurements: 615792

Processing hw_type: r320
Processed configs: 1152
Processed measurements: 630056

Processing hw_type: xl170
Processed configs: 1216
Processed measurements: 663760
