# Extract data from Prometheus measurements
Tests performed

Test 1: 17:53:47 - 17:56:15

Test 10: 17:34:13 - 17:36:39 last 17:38:06

Test 50: 18:57:59 - 18:59:41 last 19:02:05 actual stop 19:10

In [3]:
import pandas as pd

In [1]:
cpu_path = './data/grafana_data_export_cpu.csv'
ram_path = './data/grafana_data_export_ram.csv'
cluster_cpu_path = './data/grafana_data_export_cluster_cpu.csv'
cluster_ram_path = './data/grafana_data_export_cluster_ram.csv'
pod_table_path = './results/pod_table.csv'
master_table_path = './results/master_table.csv'
worker_table_path = './results/worker_table.csv'

In [2]:
index = ['idle', '1', '10', '50']
pod = 'prediction-deployment-785855ff75-8c8rr'
columns = ['mean', 'max', 'min']

tests = {
    'idle': { 'start': '2020-06-14T13:00:00+02:00', 'stop': '2020-06-14T14:00:00+02:00' },
    '1': { 'start': '2020-06-14T17:53:47+02:00', 'stop': '2020-06-14T17:56:15+02:00' },
    '10': { 'start': '2020-06-14T17:34:13+02:00', 'stop': '2020-06-14T17:38:06+02:00' },
    '50': { 'start': '2020-06-14T18:57:59+02:00', 'stop': '2020-06-14T19:10:05+02:00' }
}

In [2]:
def get_metrics(df, tests, pod, columns, index, prefix=None, suffix=None):
    
    df = df[[pod]]
    
    result = pd.DataFrame(columns=columns)
    
    for test in tests:
        start = tests[test]['start']
        stop = tests[test]['stop']
    
        # Limit 
        range_df = df[df.index > start]
        range_df = range_df[range_df.index < stop]

        # Compute avg
        summary = range_df.describe().transpose()[columns]

        result = result.append(summary)
    
    result['devices'] = index
    result.set_index('devices', inplace=True)
    
    if prefix:
        result.columns = [prefix + str(col) for col in result.columns]
    
    if suffix:
        result.columns = [str(col) + suffix for col in result.columns]
    
    return result
    

# Pod table
## Pod CPU

In [104]:
cpu_df = pd.read_csv(cpu_path, sep=';')
cpu_df.columns

Index(['Time', 'chronograf-chronograf-589555d744-htw2l',
       'coredns-d798c9dd-2ctbp', 'dashboard-metrics-scraper-c79c65bb7-q9xmb',
       'grafana-78fb9f87d4-drvjg', 'influxdb-0',
       'kafka-broker-5477466f54-bjmhz', 'kafka-zk-7d49659d74-gqct7',
       'kapacitor-kapacitor-7465969874-hglr5',
       'kubernetes-dashboard-6f89967466-vz68v',
       'local-path-provisioner-58fb86bdfd-9vtpd',
       'metrics-server-6d684c7b5-gljq4',
       'mqtt2kafka-deployment-8fd8bb6f8-677z4',
       'prediction-deployment-785855ff75-8c8rr',
       'prometheus-alertmanager-6f764c9487-qxgjs',
       'prometheus-kube-state-metrics-6756bbbb8-4nn8d',
       'prometheus-node-exporter-rhdq2', 'prometheus-node-exporter-s89ff',
       'prometheus-pushgateway-69c85cc8b-8lz6v',
       'prometheus-server-69b5694445-gbrv5', 'svclb-grafana-lhtwt',
       'svclb-grafana-tc7g8', 'svclb-traefik-576tq', 'svclb-traefik-vlcwc',
       'telegraf-84c4b5cdcb-l6tgw', 'traefik-65bccdc4bd-68bxv'],
      dtype='object')

In [256]:
cpu_table = get_metrics(cpu_df, tests, pod, columns, index, prefix='CPU_')
cpu_table

Unnamed: 0_level_0,CPU_mean,CPU_max,CPU_min
devices,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
idle,0.016,0.017,0.012
1,0.046,0.085,0.018
10,0.272,0.553,0.019
50,0.624,0.804,0.021


## Pod RAM

In [229]:
ram_df = pd.read_csv(ram_path, sep=';', thousands=',')
ram_df

Unnamed: 0,Time,svclb-traefik-576tq,kafka-broker-5477466f54-bjmhz,prometheus-node-exporter-s89ff,prediction-deployment-785855ff75-8c8rr,local-path-provisioner-58fb86bdfd-9vtpd,prometheus-kube-state-metrics-6756bbbb8-4nn8d,prometheus-pushgateway-69c85cc8b-8lz6v,telegraf-84c4b5cdcb-l6tgw,influxdb-0,...,prometheus-node-exporter-rhdq2,kafka-zk-7d49659d74-gqct7,svclb-grafana-tc7g8,traefik-65bccdc4bd-68bxv,dashboard-metrics-scraper-c79c65bb7-q9xmb,svclb-grafana-lhtwt,prometheus-server-69b5694445-gbrv5,coredns-d798c9dd-2ctbp,chronograf-chronograf-589555d744-htw2l,kapacitor-kapacitor-7465969874-hglr5
0,2020-06-14T12:09:00+02:00,,,,,,,,,,...,,,,,,,,,,
1,2020-06-14T12:09:15+02:00,,,,,,,,,,...,,,,,,,,,,
2,2020-06-14T12:09:30+02:00,,,,,,,,,,...,,,,,,,,,,
3,2020-06-14T12:09:45+02:00,,,,,,,,,,...,,,,,,,,,,
4,2020-06-14T12:10:00+02:00,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1997,2020-06-14T20:28:15+02:00,4677632.0,728219648.0,6541312.0,406945792.0,8593408.0,9859072.0,6373376.0,32894976.0,74784768.0,...,10240000.0,52146176.0,1359872.0,14258176.0,19427328.0,1744896.0,205492224.0,8335360.0,29134848.0,33304576.0
1998,2020-06-14T20:28:30+02:00,4677632.0,728219648.0,6541312.0,406945792.0,8593408.0,9859072.0,6373376.0,32894976.0,74784768.0,...,10240000.0,52146176.0,1359872.0,14258176.0,19427328.0,1744896.0,205492224.0,8335360.0,29134848.0,33304576.0
1999,2020-06-14T20:28:45+02:00,4677632.0,728219648.0,6549504.0,406945792.0,8593408.0,9859072.0,6373376.0,32894976.0,74784768.0,...,10240000.0,52146176.0,1359872.0,14258176.0,19427328.0,1744896.0,205492224.0,8335360.0,29134848.0,33304576.0
2000,2020-06-14T20:29:00+02:00,4677632.0,728092672.0,6549504.0,406290432.0,8593408.0,9859072.0,6467584.0,32808960.0,74784768.0,...,10240000.0,52146176.0,1359872.0,14258176.0,19460096.0,1744896.0,212254720.0,8335360.0,29306880.0,33452032.0


In [313]:
ram_table = get_metrics(ram_df, tests, pod, columns, index, prefix='RAM_', suffix=' (MB)')
ram_table = ram_table/10**6 # Convert to MB
ram_table

Unnamed: 0_level_0,RAM_mean (MB),RAM_max (MB),RAM_min (MB)
devices,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
idle,238.074944,239.050752,237.019136
1,400.307086,400.379904,400.183296
10,396.046336,400.347136,393.965568
50,407.596722,411.38176,400.65024


## Pod table

In [323]:
pod_table = pd.concat([cpu_table, ram_table], axis=1).round(3)
pod_table.to_csv(pod_table_path)
pod_table

Unnamed: 0_level_0,CPU_mean,CPU_max,CPU_min,RAM_mean (MB),RAM_max (MB),RAM_min (MB)
devices,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
idle,0.016,0.017,0.012,238.075,239.051,237.019
1,0.046,0.085,0.018,400.307,400.38,400.183
10,0.272,0.553,0.019,396.046,400.347,393.966
50,0.624,0.804,0.021,407.597,411.382,400.65


# Master table
## Master CPU

In [295]:
master_column = 'k3smaster'

In [296]:
master_cpu_df = pd.read_csv(cluster_cpu_path, sep=';', index_col='Time')
master_cpu_df.describe()

Unnamed: 0,k3smaster,k3sworker
count,347.0,347.0
mean,0.294239,0.300571
std,0.022028,0.138697
min,0.143,0.113
25%,0.282,0.2525
50%,0.298,0.27
75%,0.308,0.282
max,0.363,1.046


In [298]:
master_cpu_table = get_metrics(master_cpu_df, tests, master_column, columns, index, prefix='CPU_')
master_cpu_table

Unnamed: 0_level_0,CPU_mean,CPU_max,CPU_min
devices,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
idle,0.271276,0.28,0.259
1,0.2915,0.297,0.286
10,0.2795,0.289,0.27
50,0.264,0.313,0.226


## Master RAM

In [301]:
master_ram_df = pd.read_csv(cluster_ram_path, sep=';', thousands=',', index_col='Time')
master_ram_df

Unnamed: 0_level_0,k3smaster,k3sworker
Time,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-06-14T00:00:00+02:00,,
2020-06-14T00:02:00+02:00,,
2020-06-14T00:04:00+02:00,,
2020-06-14T00:06:00+02:00,,
2020-06-14T00:08:00+02:00,,
...,...,...
2020-06-14T23:52:00+02:00,1.181655e+09,2.423689e+09
2020-06-14T23:54:00+02:00,1.182888e+09,2.423153e+09
2020-06-14T23:56:00+02:00,1.181819e+09,2.441114e+09
2020-06-14T23:58:00+02:00,1.181688e+09,2.447184e+09


In [314]:
master_ram_table = get_metrics(master_ram_df, tests, master_column, columns, index, prefix='RAM_', suffix=' (GB)')
master_ram_table /= 10**9
master_ram_table

Unnamed: 0_level_0,RAM_mean (GB),RAM_max (GB),RAM_min (GB)
devices,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
idle,1.0872,1.095258,1.081295
1,1.150165,1.150214,1.150116
10,1.148592,1.14867,1.148514
50,1.15835,1.166922,1.15644


## Master table

In [324]:
master_table = pd.concat([master_cpu_table, master_ram_table], axis=1).round(3)
master_table.to_csv(master_table_path)
master_table.round(3)

Unnamed: 0_level_0,CPU_mean,CPU_max,CPU_min,RAM_mean (GB),RAM_max (GB),RAM_min (GB)
devices,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
idle,0.271,0.28,0.259,1.087,1.095,1.081
1,0.292,0.297,0.286,1.15,1.15,1.15
10,0.279,0.289,0.27,1.149,1.149,1.149
50,0.264,0.313,0.226,1.158,1.167,1.156


# Worker
## Worker CPU

In [305]:
worker_column = 'k3sworker'
worker_cpu_table = get_metrics(master_cpu_df, tests, worker_column, columns, index, prefix='CPU_')
worker_cpu_table

Unnamed: 0_level_0,CPU_mean,CPU_max,CPU_min
devices,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
idle,0.235276,0.246,0.229
1,0.287,0.321,0.253
10,0.6455,0.774,0.517
50,0.855,1.046,0.279


## Worker RAM

In [315]:
worker_ram_table = get_metrics(master_ram_df, tests, worker_column, columns, index, prefix='RAM_', suffix=' (GB)')
worker_ram_table /= 10**9
worker_ram_table

Unnamed: 0_level_0,RAM_mean (GB),RAM_max (GB),RAM_min (GB)
devices,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
idle,1.647483,1.652396,1.643987
1,2.246337,2.248618,2.244055
10,2.221894,2.225246,2.218541
50,2.309435,2.344145,2.291343


In [321]:
worker_table = pd.concat([worker_cpu_table, worker_ram_table], axis=1).round(3)
worker_table.to_csv(worker_table_path)
worker_table

Unnamed: 0_level_0,CPU_mean,CPU_max,CPU_min,RAM_mean (GB),RAM_max (GB),RAM_min (GB)
devices,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
idle,0.235,0.246,0.229,1.647,1.652,1.644
1,0.287,0.321,0.253,2.246,2.249,2.244
10,0.646,0.774,0.517,2.222,2.225,2.219
50,0.855,1.046,0.279,2.309,2.344,2.291


# Prediction performance

In [3]:
import pandas as pd

In [12]:
prediction_perf_path = './data/chronograf_data_export_prediction.csv'
perf_tests = {
    '1': { 'start': '2020-06-15T14:14:30+02:00', 'stop': '2020-06-15T14:17:37+02:00' },
    '10': { 'start': '2020-06-15T14:18:41+02:00', 'stop': '2020-06-15T14:22:15+02:00' },
    '50': { 'start': '2020-06-15T14:23:02+02:00', 'stop': '2020-06-15T14:36:43+02:00' }
}

columns = ['mean', 'max', 'min']

index = ['1', '10', '50']

performance_table_path = './results/performance_table.csv'

In [5]:
prediction_perf_df = pd.read_csv(prediction_perf_path, sep=',', thousands=',', index_col='time')
prediction_perf_df.dropna(how='all', axis=0).describe()

Unnamed: 0,turbofan_prediction_stats.mean_Sensor0_events_active,turbofan_prediction_stats.mean_Sensor0_events_runtime_avg,turbofan_prediction_stats.mean_Sensor0_events_s
count,207.0,207.0,207.0
mean,0.502415,0.047068,12.371981
std,0.681762,0.026001,19.995188
min,0.0,0.000233,0.0
25%,0.0,0.035271,0.0
50%,0.0,0.060603,4.0
75%,1.0,0.063792,22.5
max,2.0,0.120745,218.0


In [6]:
prediction_perf_df.columns

Index(['turbofan_prediction_stats.mean_Sensor0_events_active',
       'turbofan_prediction_stats.mean_Sensor0_events_runtime_avg',
       'turbofan_prediction_stats.mean_Sensor0_events_s'],
      dtype='object')

In [7]:
prediction_perf_df.columns
latency_column = 'turbofan_prediction_stats.mean_Sensor0_events_runtime_avg'

In [8]:
latency_table = get_metrics(prediction_perf_df, perf_tests, latency_column, columns, index, prefix='latency_', suffix=' (ms)')
latency_table *= 1000

In [9]:
thoughput_column = 'turbofan_prediction_stats.mean_Sensor0_events_s'
throughput_table = get_metrics(prediction_perf_df, perf_tests, thoughput_column, columns, index, prefix='throughput_', suffix=' (events per second)')
throughput_table

Unnamed: 0_level_0,throughput_mean (events per second),throughput_max (events per second),throughput_min (events per second)
devices,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,2.157895,4.0,0.0
10,19.428571,40.0,2.0
50,25.756098,218.0,4.0


In [13]:
performance_table = pd.concat([latency_table, throughput_table], axis=1)
performance_table.round(3).to_csv(performance_table_path)
performance_table

Unnamed: 0_level_0,latency_mean (ms),latency_max (ms),latency_min (ms),throughput_mean (events per second),throughput_max (events per second),throughput_min (events per second)
devices,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,19.513309,47.663625,2.767948,2.157895,4.0,0.0
10,48.007808,120.745347,3.084262,19.428571,40.0,2.0
50,56.457937,115.424437,0.232931,25.756098,218.0,4.0
