# model

In [38]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_boston
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import explained_variance_score
from lightgbm import LGBMRegressor, plot_importance
from sklearn.metrics import roc_auc_score

In [39]:
# knob 종류 및 바운드
[305, 722, 5, 12, 9163, 'no', 'yes']
knobs_rdb = {
    
    "hash-max-ziplist-entries":     (256, 751), 
    "hash-max-ziplist-value": (16, 257),
    "activerehashing": ('yes', 'no'),  ##yes
    "hz": (1, 41),
    "dynamic-hz": ('yes', 'no'),

    "save0_0": (700,1100),
    "save0_1": (1,9),
    "save1_0": (100,500),
    "save1_1": (10,100),
    "save2_0": (30,90),
    "save2_1": (8000,12000),

    "rdbcompression": ('yes', 'no'), ##yes
    "rdbchecksum": ('yes', 'no'),    ##yes
    "rdb-save-incremental-fsync": ('yes', 'no'),   ## yes

    "activedefrag": ('yes','no'),  
    "active-defrag-threshold-lower": (1, 31),
    "active-defrag-threshold-upper": (70, 101),
    "active-defrag-cycle-min": (1, 31),
    "active-defrag-cycle-max": (70, 91),
    
    "maxmemory": (1000, 2900),   ##"volatile-lru"
    "maxmemory-policy":     ("volatile-lru", "allkeys-lru", "volatile-lfu", "allkeys-lfu", 
                             "volatile-random","allkeys-random", "volatile-ttl", "noeviction"),
    "maxmemory-samples": (3, 7),
    
    "lazyfree-lazy-eviction": ('yes', 'no'),
    "lazyfree-lazy-expire": ('yes', 'no'),
    "lazyfree-lazy-server-del": ('yes', 'no')
}

knobs_list = list(knobs_rdb.keys())
knobs_bound = list(knobs_rdb.values())

## 데이터 전처리
* 이상치 파악
* test, train 데이터 잘 나뉘어졌는지 확인

In [40]:
# config(knob) data
# config_df = pd.read_csv("./data/result_config0.csv", sep=',')
config_df = pd.read_csv("./data/result_config5.csv", sep=',')

# external(throughput) data
# external_df = pd.read_csv("./data/external_metrics0.csv", sep=',')
external_df = pd.read_csv("./data/external_metrics5.csv", sep=',')

In [41]:
# config(knog), external(throughput) 데이터 하나의 프레임으로 결합
dataset = config_df
dataset['throughput'] = external_df['Gets_KB/sec']

In [42]:
# 결측치 처리 
dataset_nan = dataset.copy()
dataset['activedefrag'].fillna(value='no', inplace=True)  # activedefrag no 채우기
dataset = dataset.fillna(value = 0)  # 전부 0으로 채움

In [43]:
dataset.head(3)

Unnamed: 0,appendonly,appendfsync,auto-aof-rewrite-percentage,auto-aof-rewrite-min-size,no-appendfsync-on-rewrite,aof-rewrite-incremental-fsync,aof-use-rdb-preamble,save0_0,save0_1,save1_0,...,maxmemory-samples,lazyfree-lazy-eviction,lazyfree-lazy-expire,lazyfree-lazy-server-del,hash-max-ziplist-entries,hash-max-ziplist-value,activerehashing,hz,dynamic-hz,throughput
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,868,4,320,...,5,yes,yes,yes,506,144,no,20,yes,45305.88
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,867,6,258,...,4,yes,no,yes,552,141,no,22,yes,44540.19
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,911,6,319,...,5,yes,no,yes,552,185,yes,20,no,45261.71


In [44]:
# maxmemory knob 단위(mb) 처리
dataset['maxmemory']  = dataset['maxmemory'].str.strip('mb')
dataset['maxmemory'] = pd.to_numeric(dataset['maxmemory'])

In [45]:
# Object 변수 categorical 로 변경 (LightGBM에 바로 넣기 위해)
category_knob = []

for knob in dataset:
    if dataset[knob].dtype == 'O':  # Object인 경우 category 형으로 변경
        dataset[knob] = dataset[knob].astype('category')
        if knob in knobs_list:
            category_knob.append(knob)

In [46]:
# 설정한 knob들만 추출
extract_knobs_list = knobs_list.copy()
extract_knobs_list.append('throughput')  # target 값 추가

dataset = dataset[extract_knobs_list]

In [47]:
dataset.head(3)

Unnamed: 0,hash-max-ziplist-entries,hash-max-ziplist-value,activerehashing,hz,dynamic-hz,save0_0,save0_1,save1_0,save1_1,save2_0,...,active-defrag-threshold-upper,active-defrag-cycle-min,active-defrag-cycle-max,maxmemory,maxmemory-policy,maxmemory-samples,lazyfree-lazy-eviction,lazyfree-lazy-expire,lazyfree-lazy-server-del,throughput
0,506,144,no,20,yes,868,4,320,58,58,...,0.0,0.0,0.0,1500,volatile-random,5,yes,yes,yes,45305.88
1,552,141,no,22,yes,867,6,258,52,67,...,0.0,0.0,0.0,1500,allkeys-lfu,4,yes,no,yes,44540.19
2,552,185,yes,20,no,911,6,319,54,65,...,0.0,0.0,0.0,2900,noeviction,5,yes,no,yes,45261.71


In [48]:
# 결측치 처리전 확인
print(dataset_nan.info())

# 결측치 처리후 확인
print(dataset.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1700 entries, 0 to 1699
Data columns (total 33 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   appendonly                     0 non-null      float64
 1   appendfsync                    0 non-null      float64
 2   auto-aof-rewrite-percentage    0 non-null      float64
 3   auto-aof-rewrite-min-size      0 non-null      float64
 4   no-appendfsync-on-rewrite      0 non-null      float64
 5   aof-rewrite-incremental-fsync  0 non-null      float64
 6   aof-use-rdb-preamble           0 non-null      float64
 7   save0_0                        1700 non-null   int64  
 8   save0_1                        1700 non-null   int64  
 9   save1_0                        1700 non-null   int64  
 10  save1_1                        1700 non-null   int64  
 11  save2_0                        1700 non-null   int64  
 12  save2_1                        1700 non-null   i

### 이상치 처리

In [49]:
# -999999와 같은 이상치 확인 -> 없음
dataset.describe()

Unnamed: 0,hash-max-ziplist-entries,hash-max-ziplist-value,hz,save0_0,save0_1,save1_0,save1_1,save2_0,save2_1,active-defrag-threshold-lower,active-defrag-threshold-upper,active-defrag-cycle-min,active-defrag-cycle-max,maxmemory,maxmemory-samples,throughput
count,1700.0,1700.0,1700.0,1700.0,1700.0,1700.0,1700.0,1700.0,1700.0,1700.0,1700.0,1700.0,1700.0,1700.0,1700.0,1700.0
mean,503.073529,136.014118,20.331765,899.707647,5.018824,301.504118,54.734706,60.100588,10003.781176,7.648235,41.854118,7.588824,39.392353,1938.529412,4.999412,44384.915482
std,40.678152,20.991291,3.175918,33.35388,0.747528,33.671656,7.505045,5.084096,337.575574,7.95017,42.548603,7.905846,40.028266,571.372337,0.362289,2836.074574
min,365.0,68.0,9.0,792.0,2.0,200.0,30.0,44.0,8680.0,0.0,0.0,0.0,0.0,1000.0,4.0,26918.97
25%,476.0,122.0,18.0,877.0,5.0,278.0,50.0,57.0,9781.5,0.0,0.0,0.0,0.0,1500.0,5.0,42970.87
50%,503.0,135.0,20.0,899.0,5.0,302.0,55.0,60.0,9999.5,0.0,0.0,0.0,0.0,1900.0,5.0,44181.36
75%,531.0,150.0,22.0,922.0,5.0,325.0,60.0,63.0,10229.0,15.0,85.0,15.0,80.0,2400.0,5.0,45033.1525
max,635.0,204.0,33.0,1016.0,7.0,413.0,78.0,75.0,11169.0,23.0,93.0,23.0,85.0,2900.0,6.0,64115.0


In [50]:
# 인덱스제거하고 돌려보자
print(dataset['throughput'][dataset['throughput'] <= 52500])
outlier_index = dataset['throughput'][dataset['throughput'] <= 52500].index
print('개수: ', len(outlier_index))

0       45305.88
1       44540.19
2       45261.71
3       45108.96
4       45277.43
          ...   
1695    44861.25
1696    45034.29
1697    45003.06
1698    44653.27
1699    44799.15
Name: throughput, Length: 1655, dtype: float64
개수:  1655


In [51]:
# # 이상치 제거
# dataset = dataset.drop(outlier_index)

In [52]:
# 이상치 제거 확인
dataset.shape

(1700, 26)

In [53]:
# 타입 확인 (카테고리)
dataset.dtypes

hash-max-ziplist-entries            int64
hash-max-ziplist-value              int64
activerehashing                  category
hz                                  int64
dynamic-hz                       category
save0_0                             int64
save0_1                             int64
save1_0                             int64
save1_1                             int64
save2_0                             int64
save2_1                             int64
rdbcompression                   category
rdbchecksum                      category
rdb-save-incremental-fsync       category
activedefrag                     category
active-defrag-threshold-lower     float64
active-defrag-threshold-upper     float64
active-defrag-cycle-min           float64
active-defrag-cycle-max           float64
maxmemory                           int64
maxmemory-policy                 category
maxmemory-samples                   int64
lazyfree-lazy-eviction           category
lazyfree-lazy-expire             c

In [54]:
dataset = pd.get_dummies(dataset)

In [55]:
dataset.dtypes

hash-max-ziplist-entries              int64
hash-max-ziplist-value                int64
hz                                    int64
save0_0                               int64
save0_1                               int64
save1_0                               int64
save1_1                               int64
save2_0                               int64
save2_1                               int64
active-defrag-threshold-lower       float64
active-defrag-threshold-upper       float64
active-defrag-cycle-min             float64
active-defrag-cycle-max             float64
maxmemory                             int64
maxmemory-samples                     int64
throughput                          float64
activerehashing_no                    uint8
activerehashing_yes                   uint8
dynamic-hz_no                         uint8
dynamic-hz_yes                        uint8
rdbcompression_no                     uint8
rdbcompression_yes                    uint8
rdbchecksum_no                  

### test, train 데이터 분리

In [56]:
# train 8, test 2
X_train_val, X_test, y_train_val, y_test = train_test_split(dataset.iloc[:,:-1]
                                                    , dataset['throughput'] 
                                                    ,test_size=0.2
                                                   , random_state = 156)

# train 6 test 2
X_train, X_val, y_train, y_val = train_test_split(X_train_val
                                                    , y_train_val 
                                                    ,test_size=0.25
                                                   , random_state = 156)


In [57]:
# X_train 확인
X_train.head()

Unnamed: 0,hash-max-ziplist-entries,hash-max-ziplist-value,hz,save0_0,save0_1,save1_0,save1_1,save2_0,save2_1,active-defrag-threshold-lower,...,maxmemory-policy_noeviction,maxmemory-policy_volatile-lfu,maxmemory-policy_volatile-lru,maxmemory-policy_volatile-random,maxmemory-policy_volatile-ttl,lazyfree-lazy-eviction_no,lazyfree-lazy-eviction_yes,lazyfree-lazy-expire_no,lazyfree-lazy-expire_yes,lazyfree-lazy-server-del_no
1645,496,127,19,884,6,349,75,64,10321,0.0,...,0,0,0,1,0,0,1,1,0,1
522,490,148,20,848,4,346,63,57,9463,15.0,...,0,0,0,0,1,0,1,0,1,1
445,568,147,20,894,6,254,59,62,9841,11.0,...,0,0,0,0,0,1,0,0,1,1
220,580,128,23,906,4,326,48,67,10014,0.0,...,0,0,1,0,0,0,1,0,1,1
1611,480,136,18,883,4,243,71,60,9791,0.0,...,0,0,0,0,1,1,0,1,0,0


In [59]:
# # test, train 에 비슷하게 분류됬는지 확인
# y_train.hist()
# plt.title('train data')
# plt.show()

# y_test.hist()
# plt.title('test data')
# plt.show()

## XGBoost 모델 생성

In [60]:
from xgboost import XGBRegressor

xgb_wrapper = XGBRegressor(n_estimatros=1000, learning_rate = 0.1, max_depth = 3)

# 검증 데이터(validation data)
evals = [(X_val, y_val)]

xgb_wrapper.fit(X_train, y_train, early_stopping_rounds = 100, eval_metric='logloss', eval_set = evals, verbose=True)

Parameters: { "n_estimatros" } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[0]	validation_0-logloss:-1634686.87500
[1]	validation_0-logloss:-1634686.87500
[2]	validation_0-logloss:-1634686.87500
[3]	validation_0-logloss:-1634686.87500
[4]	validation_0-logloss:-1634686.87500
[5]	validation_0-logloss:-1634686.87500
[6]	validation_0-logloss:-1634686.87500
[7]	validation_0-logloss:-1634686.87500
[8]	validation_0-logloss:-1634686.87500
[9]	validation_0-logloss:-1634686.87500
[10]	validation_0-logloss:-1634686.87500
[11]	validation_0-logloss:-1634686.87500
[12]	validation_0-logloss:-1634686.87500
[13]	validation_0-logloss:-1634686.87500
[14]	validation_0-logloss:-1634686.87500
[15]	validation_0-logloss:-1634686.87500
[16]	validation_0-logloss:-1634686.87500
[17]	validation_0-loglo

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
             importance_type='gain', interaction_constraints='',
             learning_rate=0.1, max_delta_step=0, max_depth=3,
             min_child_weight=1, missing=nan, monotone_constraints='()',
             n_estimators=100, n_estimatros=1000, n_jobs=8, num_parallel_tree=1,
             random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
             subsample=1, tree_method='exact', validate_parameters=1,
             verbosity=None)

## 성능 평가

In [61]:
# 성능 측정 함수 RMSLE(캐글에서 요구한 성능 평가 방법), MSE, RMSE
from sklearn.metrics import mean_squared_error, mean_absolute_error
# log 값 변환 시 NaN 등의 이슈로 log()가 아닌 log1p()를 이용해 RMSLE 계산 
def rmsle(y, pred):
    # log1p 변환
    log_y = np.log1p(y)
    log_pred = np.log1p(pred)
    
    # squred 계산
    squared_error = (log_y-log_pred)**2
    
    # RMSE 계산
    rmsle = np.sqrt(np.mean(squared_error))
    
    return rmsle

# 사이킷런의 mean_square_error()를 이용해 RMSE 계산
def rmse(y, pred):
    return np.sqrt(mean_squared_error(y, pred))

# MSE, RMSE, RMSLE를 모두 계산
def evaluate_regr(y, pred):
    rmsle_val = rmsle(y, pred)
    rmse_val = rmse(y, pred)
    
    # MAE는 사이킷런의 mean_absolute_error()로 계산
    mae_val = mean_absolute_error(y, pred)
    
    print('RMSLE: {0:.3f}, RMSE: {1:.3F}, MAE: {2:.3F}'.format(rmsle_val, rmse_val, mae_val))
    

In [62]:
preds = xgb_wrapper.predict(X_test)  # 예측 결과
evaluate_regr(y_test, preds)

RMSLE: 2.303, RMSE: 39978.610, MAE: 39910.671


이상치 제거 전 RMSLE: 0.060, RMSE: 3492.223, MAE: 3032.843

이상치 제거 후 RMSLE: 0.058, RMSE: 3393.720, MAE: 2974.377

In [24]:
X_test.iloc[:1,:]

Unnamed: 0,hash-max-ziplist-entries,hash-max-ziplist-value,activerehashing,hz,dynamic-hz,save0_0,save0_1,save1_0,save1_1,save2_0,...,active-defrag-threshold-lower,active-defrag-threshold-upper,active-defrag-cycle-min,active-defrag-cycle-max,maxmemory,maxmemory-policy,maxmemory-samples,lazyfree-lazy-eviction,lazyfree-lazy-expire,lazyfree-lazy-server-del
1050,500,142,no,33,no,933,5,282,61,61,...,0.0,0.0,0.0,0.0,1200,allkeys-lfu,5,yes,yes,yes


In [25]:
# lgbm_wrapper.predict(X_test.iloc[3:4,:])

In [69]:
# 예측 prediction 시험

df_result_test = {}

for i in knobs_list:
    df_result_test[i] = ''

df_result_test['hash-max-ziplist-entries'] = [512]  #(256, 751)
df_result_test['hash-max-ziplist-value'] = [64]  #(256, 751)
df_result_test['activerehashing'] = ['yes']  #(256, 751)
df_result_test['dynamic-hz'] = ['yes']  #(256, 751)
df_result_test['hz'] = [10]  #(256, 751)
df_result_test['save0_0'] = [900]
df_result_test['save0_1'] = [1]
df_result_test['save1_0'] = [300]
df_result_test['save1_1'] = [10]
df_result_test['save2_0'] = [60]
df_result_test['save2_1'] = [10000]
df_result_test['rdbcompression'] = ['yes']
df_result_test['rdbchecksum'] = ['yes']
df_result_test['rdb-save-incremental-fsync'] = ['yes']
df_result_test['activedefrag'] = ['yes']

df_result_test['active-defrag-threshold-lower'] = [10]
df_result_test['active-defrag-threshold-upper'] = [100]
df_result_test['active-defrag-cycle-min'] = [5]
df_result_test['active-defrag-cycle-max'] = [75]
df_result_test['maxmemory'] = []



df_result_test['lazyfree-lazy-expire'] = ['yes']

df_result_test['save2_0'] = [74]

df_result_test['active-defrag-cycle-max'] = [91]
df_result_test['maxmemory-samples'] = [5]


# for idx, ty in enumerate(list(df_result_test2.dtypes)):
#     if ty == 'O':
#         df_result_test2.iloc[0,idx].astype('category')
    
# for elem in df_result_test:
#     if df_result_test[elem].dtype == 'O':
        
#         df_result_test2[elem] = df_result_test2[elem].astype('category')

        
df_result_test2 = pd.get_dummies(df_result_test)
xgb_wrapper.predict(df_result_test2.iloc[:1,:])[0]

TypeError: unhashable type: 'list'

In [27]:
# df_result_test2

In [28]:
# lgbm_wrapper.predict(df_result_test2)

In [29]:
# # plot_importance()를 이영해 피처 중요도 시각화
# from lightgbm import plot_importance
# import matplotlib.pyplot as plt
# %matplotlib inline

# fig, ax = plt.subplots(figsize=(10, 12))
# plot_importance(lgbm_wrapper, ax= ax)

---

### 하이퍼파라미터 튜닝

In [30]:
# from sklearn.model_selection import GridSearchCV

# # 하이퍼파라미터 튜닝시 수행 속도를 향상시키기 위해 n_estimators를 200으로 감소
# lgbm_wrapper = LGBMRegressor(n_estimators = 200)

# params = {
#     'num_leaves':[32, 64],
#     'max_depth':[128, 160],
#     'min_child_samples':[60, 100],
#     'subsample':[0.8, 1]
# }

# # cv는 3으로 가정 (교차 검증을 자동으로 해주는건가?)
# gridcv = GridSearchCV(lgbm_wrapper, param_grid = params, cv=3)
# gridcv.fit(X_train, y_train, early_stopping_rounds = 30, eval_metric='auc', 
#           eval_set = [(X_train, y_train), (X_test, y_test)])

# print('GridSeaerchCV 최적 파라미터:', gridcv.best_params_)

In [31]:
# # 하이퍼 파라미터 적용 후 다시 생성
# lgbm_wrapper = LGBMRegressor(n_estimators = 1000,
#                             num_leaves = 32,
#                             subsample=0.8,
#                             min_child_samples=60,
#                             max_depth=128)

# evals = [(X_test, y_test)]

# lgbm_wrapper.fit(X_train, y_train, early_stopping_rounds=100,
#                 eval_metric='auc',
#                 eval_set=evals,
#                 verbose=True)

---

# BO

모델은 같은 것을 사용한다.

knob_default 딕셔너리 


1. 목적함수에서 지정한 범위knob 값을 랜덤하게 추출
2. 목적함수에는 init_knob 값이 지정되어있고 나머지 값들은 들어오는 값으로 초기화
목적함수 들어가기 전에 미리 선택을 해야 한다. 범위에 대해서 

cluster0 값만준다.
목적함수에서는 받은 cluster0을 할당한다
할당되지 않은 Knob에 대해서는 knob_init를 참고하여 값을 할당한다. 

결과값을 init_dict에 할당한다. 
목적함세어는 받은 cluster1을 할당한다.
할당되지 않은 Knob에 대해서는 knob_init을 참고한다. 




## 목적함수

In [90]:
noise_level = 0.1

def objective_func(knob_value, noise_level=noise_level):

    knob_dict = {}
    for knob in knobs_list: # 순서에 맞게 선언
        knob_dict[knob] = 'x'
    
#     print('###x 삽입###')
#     print(knob_dict)
    
    # knob_value 값 먼저 할당
    for idx,knob in enumerate(knobs):
        knob_dict[knob] = knob_value[idx]
    
#     print('##먼저할당##')
#     print(knob_dict)
    
    
    # 나머지 값 채우기 
    for knob ,knob_val in knob_dict.items():
        if knob_val == 'x':
            knob_dict[knob] = knobs_update[knob]
            
#     print('##나머지 update 반영 할당##')
#     print(knob_dict)
                    
    if 'activedefrag' in knob_dict:
        if knob_dict['activedefrag'] == 'no':  # activedefrag no인 경우 나머지 관련 knob들에 0할당!
            activ_var = ['active-defrag-threshold-lower', 'active-defrag-threshold-upper', 'active-defrag-cycle-min', 'active-defrag-cycle-max']
            for activ_knob in activ_var:
                knob_dict[activ_knob] = 0     
    
    knob_df = pd.DataFrame(knob_dict, index=[0])  # 데이터 프레임으로 변환
    
    # 카테고리 변수 변경
    for knob in category_knob:
        knob_df[knob] = knob_df[knob].astype('category') 

    return -lgbm_wrapper.predict(knob_df)[0]


## gp_minimize

In [33]:
%matplotlib inline
print(__doc__)

import numpy as np
np.random.seed(237)
import matplotlib.pyplot as plt
from skopt.plots import plot_gaussian_process
from skopt import gp_minimize

Automatically created module for IPython interactive environment


In [80]:
# knob 종류 및 바운드

# cluster_table = {
#     'cluster0':[
#         'save1_1', 
#         'lazyfree-lazy-eviction'
#     ],

#     'clsuter1':
#     [
#         'active-defrag-cycle-min',
#         'active-defrag-cycle-max',
#         'save0_0',
#         'save0_1',
#         'save2_0',
#         'save2_1',
#         'maxmemory-samples',
#         'hash-max-ziplist-entries',
#         'hash-max-ziplist-value',
#         'lazyfree-lazy-expire',
#         'dynamic-hz',
#         'hz'               
#                ],
#     'other':[
#         'activerehashing',
#         'save1_0',
#         'rdbcompression',
#         'rdbchecksum',
#         'rdb-save-incremental-fsync',
#         'activedefrag',
#         'active-defrag-threshold-lower',
#         'active-defrag-threshold-upper',
#         'maxmemory',
#         'maxmemory-policy',
#         'lazyfree-lazy-server-del'
#     ]
# }

cluster_table = {

    'clsuter1':
    [
                'save1_1', 
        'lazyfree-lazy-eviction',

        'active-defrag-cycle-min',
        'active-defrag-cycle-max',
        'save0_0',
        'save0_1',
        'save2_0',
        'save2_1',
        'maxmemory-samples',
        'hash-max-ziplist-entries',
        'hash-max-ziplist-value',
        'lazyfree-lazy-expire',
        'dynamic-hz',
        'hz'               
               ],
#     'other':[
#         'activerehashing',
#         'save1_0',
#         'rdbcompression',
#         'rdbchecksum',
#         'rdb-save-incremental-fsync',
#         'activedefrag',
#         'active-defrag-threshold-lower',
#         'active-defrag-threshold-upper',
#         'maxmemory',
#         'maxmemory-policy',
#         'lazyfree-lazy-server-del'
#     ]
}

cluster_table_T = {

    'clsuter1':
    [
        'save1_1', 
        'lazyfree-lazy-eviction',

        'active-defrag-cycle-min',
        'active-defrag-cycle-max',
        'save0_0',
        'save0_1',
        'save2_0',
        'save2_1',
        'maxmemory-samples',
        'hash-max-ziplist-entries',
        'hash-max-ziplist-value',
        'lazyfree-lazy-expire',
        'dynamic-hz',
        'hz',
        'activerehashing',
        'save1_0',
        'rdbcompression',
        'rdbchecksum',
        'rdb-save-incremental-fsync',
        'activedefrag',
        'active-defrag-threshold-lower',
        'active-defrag-threshold-upper',
        'maxmemory',
        'maxmemory-policy',
        'lazyfree-lazy-server-del'

               ]
}


knobs_update = {

    'hash-max-ziplist-entries': 270,
    'hash-max-ziplist-value': 63,
    'activerehashing': 'no',
    'hz': 40,
    'dynamic-hz': 'yes',
    'save0_0': 829,
    'save0_1':1,
    'save1_0': 355,
    'save1_1':100,
    'save2_0': 31,
    'save2_1':8478,
    'rdbcompression': 'no',
    'rdbchecksum': 'no',
    'rdb-save-incremental-fsync': 'yes',
    'activedefrag': 'yes',
    'active-defrag-threshold-lower': 12,
    'active-defrag-threshold-upper': 86,
    'active-defrag-cycle-min': 10,
    'active-defrag-cycle-max': 80,
    'maxmemory': 2039,
    'maxmemory-policy':'noeviction',
    'maxmemory-samples': 3,
    'lazyfree-lazy-eviction': 'no',
    'lazyfree-lazy-expire': 'yes',
    'lazyfree-lazy-server-del': 'no'

}

# knobs_default = {

#     'hash-max-ziplist-entries': 270,
#     'hash-max-ziplist-value': 63,
#     'activerehashing': 'no',
#     'hz': 40,
#     'dynamic-hz': 'yes',
#     'save0_0': 829,
#     'save0_1':1,
#     'save1_0': 355,
#     'save1_1':100,
#     'save2_0': 31,
#     'save2_1':8478,
#     'rdbcompression': 'no',
#     'rdbchecksum': 'no',
#     'rdb-save-incremental-fsync': 'yes',
#     'activedefrag': 'yes',
#     'active-defrag-threshold-lower': 12,
#     'active-defrag-threshold-upper': 86,
#     'active-defrag-cycle-min': 10,
#     'active-defrag-cycle-max': 80,
#     'maxmemory': 2039,
#     'maxmemory-policy':'noeviction',
#     'maxmemory-samples': 3,
#     'lazyfree-lazy-eviction': 'no',
#     'lazyfree-lazy-expire': 'yes',
#     'lazyfree-lazy-server-del': 'no'

# }

In [81]:
res_result = []

# x0_list = [
#     578,
#     30,
#     'yes',
#     1,
#     'yes',
#     878,
#     2,
#     347,
#     82,
#     58,
#     9335,
#     'yes',
#     'no',
#     'no',
#     'yes',
#     13,
#     71,
#     24,
#     83,
#     2850,
#     'allkeys-random',
#     4,
#     'no',
#     'yes',
#     'no'
# ]  # x0 초기값

# y0_val = -44618.390215749234 # y0 초기값

for i in range(5):
    print('###########{}##########'.format(i))
    
    iter_per_res = 0
    
    for cluster, knobs in cluster_table.items():
        print('#####{}#####'.format(cluster))
        gp_knobs_bound = []

        # 1. 클러스터의 bound 생성
#         print('{} bound 생성'.format(cluster))
        for knob in knobs: 
            gp_knobs_bound.append(knobs_rdb[knob])

#         print('{} gp 진행'.format(cluster))
        # 2. gp_minimize 진행
        res = gp_minimize(

                    objective_func,                  # 목적함수
                    gp_knobs_bound,      # bound
                    random_state=np.random.randint(1,1234),
#                     x0 = x0_list,
#                     y0 = y0_val, 
            
                    acq_func="EI",      
                    n_calls=15,      # 총 반복 횟수
                    n_random_starts=5,    # 초기 랜덤 값
                    noise=0.1**2,       
        )   

        # 결과 knob 값들
#         print('{} gp 완료'.format(cluster))
        gp_result = res.x
        iter_per_res = res.fun
        
#         x0_list = res.x
#         y0_val = res.fun
        
        for idx,knob in enumerate(knobs):
            knobs_update[knob] = gp_result[idx]
        
        print('{}-th'.format(i))
        print(res.func_vals)
        print(res.fun)
        
    print(iter_per_res)
    print()
        


###########0##########
#####clsuter1#####
0-th
[-44618.39021575 -44791.05809369 -44598.18793102 -44597.61136068
 -44372.84884351 -44618.39021575 -44791.05809369 -44791.05809369
 -44791.05809369 -44791.05809369 -44597.61136068 -44618.39021575
 -44618.39021575 -44618.39021575 -44618.39021575]
-44791.058093691274
-44791.058093691274

###########1##########
#####clsuter1#####
1-th
[-44597.61136068 -44618.39021575 -44380.94917837 -44597.61136068
 -44597.61136068 -44597.61136068 -44618.39021575 -44618.39021575
 -44597.61136068 -44618.39021575 -44380.94917837 -44618.39021575
 -44618.39021575 -44791.05809369 -44380.94917837]
-44791.058093691274
-44791.058093691274

###########2##########
#####clsuter1#####
2-th
[-44618.39021575 -44380.94917837 -44597.61136068 -44618.39021575
 -44618.39021575 -44598.18793102 -44598.18793102 -44597.61136068
 -44618.39021575 -44618.39021575 -44618.39021575 -44618.39021575
 -44618.39021575 -44618.39021575 -44618.39021575]
-44618.390215749234
-44618.390215749234

#

In [71]:
knobs_update

{'hash-max-ziplist-entries': 335,
 'hash-max-ziplist-value': 210,
 'activerehashing': 'no',
 'hz': 17,
 'dynamic-hz': 'yes',
 'save0_0': 814,
 'save0_1': 7,
 'save1_0': 355,
 'save1_1': 59,
 'save2_0': 62,
 'save2_1': 11765,
 'rdbcompression': 'no',
 'rdbchecksum': 'no',
 'rdb-save-incremental-fsync': 'yes',
 'activedefrag': 'yes',
 'active-defrag-threshold-lower': 12,
 'active-defrag-threshold-upper': 86,
 'active-defrag-cycle-min': 28,
 'active-defrag-cycle-max': 77,
 'maxmemory': 2039,
 'maxmemory-policy': 'noeviction',
 'maxmemory-samples': 4,
 'lazyfree-lazy-eviction': 'no',
 'lazyfree-lazy-expire': 'no',
 'lazyfree-lazy-server-del': 'no'}

In [89]:
# 전체 데이터 대상
res_result = []

cluster_table_T = {

    'clsuter1':
    [
        'save1_1', 
        'lazyfree-lazy-eviction',

        'active-defrag-cycle-min',
        'active-defrag-cycle-max',
        'save0_0',
        'save0_1',
        'save2_0',
        'save2_1',
        'maxmemory-samples',
        'hash-max-ziplist-entries',
        'hash-max-ziplist-value',
        'lazyfree-lazy-expire',
        'dynamic-hz',
        'hz',
        'activerehashing',
        'save1_0',
        'rdbcompression',
        'rdbchecksum',
        'rdb-save-incremental-fsync',
        'activedefrag',
        'active-defrag-threshold-lower',
        'active-defrag-threshold-upper',
        'maxmemory',
        'maxmemory-policy',
        'lazyfree-lazy-server-del'

               ]
}


knobs_update = {

    'hash-max-ziplist-entries': 270,
    'hash-max-ziplist-value': 63,
    'activerehashing': 'no',
    'hz': 40,
    'dynamic-hz': 'yes',
    'save0_0': 829,
    'save0_1':1,
    'save1_0': 355,
    'save1_1':100,
    'save2_0': 31,
    'save2_1':8478,
    'rdbcompression': 'no',
    'rdbchecksum': 'no',
    'rdb-save-incremental-fsync': 'yes',
    'activedefrag': 'yes',
    'active-defrag-threshold-lower': 12,
    'active-defrag-threshold-upper': 86,
    'active-defrag-cycle-min': 10,
    'active-defrag-cycle-max': 80,
    'maxmemory': 2039,
    'maxmemory-policy':'noeviction',
    'maxmemory-samples': 3,
    'lazyfree-lazy-eviction': 'no',
    'lazyfree-lazy-expire': 'yes',
    'lazyfree-lazy-server-del': 'no'

}



for i in range(1):
    print('###########{}##########'.format(i))
    
    iter_per_res = 0
    
    for cluster, knobs in cluster_table_T.items():
        print('#####{}#####'.format(cluster))
        gp_knobs_bound = []

        # 1. 클러스터의 bound 생성
#         print('{} bound 생성'.format(cluster))
        for knob in knobs: 
            gp_knobs_bound.append(knobs_rdb[knob])

#         print('{} gp 진행'.format(cluster))
        # 2. gp_minimize 진행
        res = gp_minimize(

                    objective_func,                  # 목적함수
                    gp_knobs_bound,      # bound
                    random_state=np.random.randint(1,1234),
#                     x0 = x0_list,
#                     y0 = y0_val, 
            
                    acq_func="EI",      
                    n_calls=15,      # 총 반복 횟수
                    n_random_starts=5,    # 초기 랜덤 값
                    noise=0.1**2,       
        )   

        # 결과 knob 값들
#         print('{} gp 완료'.format(cluster))
        gp_result = res.x
        iter_per_res = res.fun
        
#         x0_list = res.x
#         y0_val = res.fun
        
        for idx,knob in enumerate(knobs):
            knobs_update[knob] = gp_result[idx]
        
        print('{}-th'.format(i))
        print(res.func_vals)
        print(res.fun)
        
    print(iter_per_res)
    print()
        


###########0##########
#####clsuter1#####
0-th
[-44618.39021575 -44618.39021575 -44481.77898713 -44618.39021575
 -44598.18793102 -44791.05809369 -44597.61136068 -44791.05809369
 -44597.61136068 -44618.39021575 -44428.73653257 -44618.39021575
 -44618.39021575 -44339.10816538 -44597.61136068]
-44791.058093691274
-44791.058093691274



In [86]:
knobs_update

{'hash-max-ziplist-entries': 256,
 'hash-max-ziplist-value': 257,
 'activerehashing': 'no',
 'hz': 1,
 'dynamic-hz': 'yes',
 'save0_0': 700,
 'save0_1': 1,
 'save1_0': 500,
 'save1_1': 10,
 'save2_0': 30,
 'save2_1': 12000,
 'rdbcompression': 'yes',
 'rdbchecksum': 'yes',
 'rdb-save-incremental-fsync': 'yes',
 'activedefrag': 'yes',
 'active-defrag-threshold-lower': 31,
 'active-defrag-threshold-upper': 101,
 'active-defrag-cycle-min': 31,
 'active-defrag-cycle-max': 70,
 'maxmemory': 1000,
 'maxmemory-policy': 'volatile-ttl',
 'maxmemory-samples': 3,
 'lazyfree-lazy-eviction': 'no',
 'lazyfree-lazy-expire': 'no',
 'lazyfree-lazy-server-del': 'no'}

In [51]:
li = []
for value in knobs_update.values():
    li.append(value)

In [52]:
li

[578,
 30,
 'yes',
 1,
 'yes',
 878,
 2,
 347,
 82,
 58,
 9335,
 'yes',
 'no',
 'no',
 'yes',
 13,
 71,
 24,
 83,
 2850,
 'allkeys-random',
 4,
 'no',
 'yes',
 'no']

---

In [181]:
# gp 결과 정리 함수
def li_to_str(result_list):
    for idx,knob in enumerate(list(knobs_rdb.keys())):  # knobs_rdb 키마다 접근
        print("{} ".format(knob), end='')
        print('{}'.format(result_list[idx]))

In [74]:
# gp 결과
li_to_str(res.x)

hash-max-ziplist-entries 305
save0_0 722
save0_1 5
save1_1 12
save2_1 9163
rdb-save-incremental-fsync no
lazyfree-lazy-expire yes


## gp 결과 정리

### gp 분류된것
"hash-max-ziplist-entries" : 284
'hash-max-ziplist-value' 22
'activerehashing': 'yes'
'hz' 32
'dynamic-hz' : 'yes'
'save0_0' : 877
'save0_1'2
'save1_0'272
'save1_1' : 12
'save2_0'33
'save2_1' : 11669 
'rdbcompression'no
'rdbchecksum' yes
"rdb-save-incremental-fsync" 'no'
'active-defrag-threshold-lower'3
'active-defrag-threshold-upper': 87
'active-defrag-cycle-min' : 25
'active-defrag-cycle-max' : 80
'maxmemory'1298
maxmemory-policy : 'allkeys-random' 
'maxmemory-samples'6
"lazyfree-lazy-eviction"  'no'
'lazyfree-lazy-expire' yes
'lazyfree-lazy-server-del' yes


### gp 분류된것2
hash-max-ziplist-entries 502
hash-max-ziplist-value 213
activerehashing yes
hz 32
dynamic-hz yes
save0_0 760
save0_1 3
save1_0 426
save1_1 24
save2_0 37
save2_1 8052 
rdbcompression no
rdbchecksum' no
rdb-save-incremental-fsync yes
active-defrag-threshold-lower 4
active-defrag-threshold-upper 72
active-defrag-cycle-min 14
active-defrag-cycle-max 70
maxmemory 1552
maxmemory-policy allkeys-random
maxmemory-samples 6
lazyfree-lazy-eviction yes
lazyfree-lazy-expire yes
lazyfree-lazy-server-del no
[575,
 94,
 'no',
 10,
 'yes',
 1072,
 5,
 465,
 24,
 83,
 9398,
 'yes',
 'yes',
 'no',
 26,
 81,
 20,
 75,
 1539,
 'allkeys-lfu',
 4,
 'yes',
 'no',
 'no']

### gp_T
hash-max-ziplist-entries 575
hash-max-ziplist-value 94
activerehashing no
hz 10
dynamic-hz yes
save0_0 1072
save0_1 5
save1_0 465
save1_1 24
save2_0 83
save2_1 9398
rdbcompression yes
rdbchecksum yes
rdb-save-incremental-fsync no
active-defrag-threshold-lower 26
active-defrag-threshold-upper 81
active-defrag-cycle-min 20
active-defrag-cycle-max 75
maxmemory 1539
maxmemory-policy allkeys-lfu
maxmemory-samples 4
lazyfree-lazy-eviction yes
lazyfree-lazy-expire no
lazyfree-lazy-server-del no

