# 상위 및 하위 throughput 값 비교

In [1]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_boston
import xgboost
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import explained_variance_score
from lightgbm import LGBMRegressor, plot_importance
import lightgbm as lgb
from lightgbm import LGBMClassifier, LGBMRegressor

In [2]:
# config data(.csv) -> DataFrame
config_df = pd.read_csv("./data/result_config0.csv", sep=',')

In [3]:
# external data(.csv) -> DataFrame
external_df = pd.read_csv("./data/external_metrics0.csv", sep=',')
throughput_data = external_df['Gets_KB/sec']

In [4]:
# 최대, 최소 값 확인
print("max: {}".format(max(throughput_data)))
print("min: {}".format(min(throughput_data)))

max: 66377.65
min: 47520.43


In [5]:
# 40000~ 50000대 확인
index1 = (external_df['Gets_KB/sec'] >= 47520.43)
index2 = (external_df['Gets_KB/sec'] <= 50000)

external_df[index2]['Gets_KB/sec']

1662    48151.93
1795    47520.43
1918    49023.47
2277    48485.40
2437    47543.22
Name: Gets_KB/sec, dtype: float64

In [6]:
# 40000~50000 그룹핑
group1 =  list(external_df[index2].index)
group1

[1662, 1795, 1918, 2277, 2437]

In [7]:
# 60000대 확인
index3 = (external_df['Gets_KB/sec'] >= 65000)

external_df[index3]['Gets_KB/sec']

31      65011.44
280     65632.04
281     65767.16
282     66377.65
285     65321.22
288     66161.63
2007    65339.15
2010    65185.13
2036    65146.51
2051    65367.86
Name: Gets_KB/sec, dtype: float64

In [17]:
# 600000~ 그룹핑
group2 =  list(external_df[index3].index)
group2

[31, 280, 281, 282, 285, 288, 2007, 2010, 2036, 2051]

In [23]:
# config 파일 

knobs_rdb = {
    
    "save0_0": (700,1100),
    "save0_1": (1,9),
    "save1_0": (100,500),
    "save1_1": (10,100),
    "save2_0": (30,90),
    "save2_1": (8000,12000),

    "rdbcompression": ('yes', 'no'),
    "rdbchecksum": ('yes', 'no'),
    "rdb-save-incremental-fsync": ('yes', 'no'),

    "activedefrag": ('yes', 'no'),
    "active-defrag-threshold-lower": (1, 31),
    "active-defrag-threshold-upper": (70, 101),
    "active-defrag-cycle-min": (1, 31),
    "active-defrag-cycle-max": (70, 91),
    
    "maxmemory": (1000, 2900),
    
    "maxmemory-policy":     ("volatile-lru", "allkeys-lru", "volatile-lfu", "allkeys-lfu", 
                             "volatile-random","allkeys-random", "volatile-ttl", "noeviction"),
    "maxmemory-samples": (3, 7),
    "lazyfree-lazy-eviction": ('yes', 'no'),
    "lazyfree-lazy-expire": ('yes', 'no'),
    "lazyfree-lazy-server-del": ('yes', 'no'),

    "hash-max-ziplist-entries":     (256, 751), 
    "hash-max-ziplist-value": (16, 257),
    "activerehashing": ('yes', 'no'),
    "hz": (1, 41),
    "dynamic-hz": ('yes', 'no')
}

knobs_list = list(knobs_rdb.keys())
knobs_bound = list(knobs_rdb.values())

# config data(.csv) -> DataFrame
config_df = pd.read_csv("./result_config0.csv", sep=',')

In [24]:
# knob 들만 추출
config_df_knob = config_df.loc[:,knobs_list]

### config index
데이터 프레임 인덱스를 1 더한다.


### 비교 결과
( 샘플 생성 중 값, 다시 돌린 값)
#### 40000대
* 1795 : config11796 (47520.43, 61211.65)
* 2437 : config12438 ( 47543.22, 62077.59)

#### 60000대
* 282 : config10283 (66377.65, 62776.06)
* 288 : config10289 (66161.63 , 62168.97)

In [25]:
# 40000~ 50000
config_df_knob.loc[group1]

Unnamed: 0,save0_0,save0_1,save1_0,save1_1,save2_0,save2_1,rdbcompression,rdbchecksum,rdb-save-incremental-fsync,activedefrag,...,maxmemory-policy,maxmemory-samples,lazyfree-lazy-eviction,lazyfree-lazy-expire,lazyfree-lazy-server-del,hash-max-ziplist-entries,hash-max-ziplist-value,activerehashing,hz,dynamic-hz
1662,742,8,101,55,88,11162,yes,yes,no,yes,...,volatile-lfu,4,yes,yes,no,342,163,no,30,yes
1795,985,6,149,63,37,10506,no,yes,yes,yes,...,volatile-lfu,7,yes,no,yes,604,89,no,12,yes
1918,748,4,219,61,66,10702,yes,no,yes,,...,noeviction,6,yes,yes,no,401,102,no,17,yes
2277,936,1,496,94,49,8855,yes,no,yes,,...,volatile-lru,5,no,yes,yes,675,179,yes,28,yes
2437,751,8,422,69,56,11629,yes,yes,yes,,...,volatile-ttl,5,no,yes,no,732,255,yes,32,no


In [26]:
# 60000~
config_df_knob.loc[group2]

Unnamed: 0,save0_0,save0_1,save1_0,save1_1,save2_0,save2_1,rdbcompression,rdbchecksum,rdb-save-incremental-fsync,activedefrag,...,maxmemory-policy,maxmemory-samples,lazyfree-lazy-eviction,lazyfree-lazy-expire,lazyfree-lazy-server-del,hash-max-ziplist-entries,hash-max-ziplist-value,activerehashing,hz,dynamic-hz
31,791,5,433,71,84,11880,yes,yes,yes,yes,...,noeviction,6,yes,no,no,567,56,yes,24,yes
280,932,7,279,71,33,9449,no,no,no,yes,...,noeviction,3,no,no,no,518,46,yes,39,yes
281,952,2,464,62,83,10574,yes,yes,yes,yes,...,allkeys-lfu,5,no,yes,no,338,29,no,6,yes
282,1073,2,470,37,57,8334,yes,no,yes,,...,volatile-random,3,yes,no,no,652,128,yes,9,no
285,778,5,326,48,31,8676,no,no,yes,,...,volatile-random,5,no,no,no,739,208,yes,32,no
288,1070,5,183,14,42,11677,no,yes,no,,...,volatile-ttl,7,no,yes,no,617,141,no,32,no
2007,784,7,151,55,76,10844,yes,no,no,,...,volatile-random,7,no,yes,no,558,104,no,34,no
2010,1029,6,301,99,39,8505,yes,no,no,yes,...,allkeys-lru,3,no,no,yes,307,253,yes,39,yes
2036,708,8,469,12,34,10671,no,no,yes,,...,volatile-lru,6,yes,no,no,694,47,yes,22,no
2051,753,7,116,48,47,11530,no,yes,no,,...,volatile-ttl,6,yes,no,no,383,147,no,40,yes
