## Stage Pool 의 구성 & GA frontier 의 구성 분석

In [119]:
import sys
sys.path.insert(0, '../')

import numpy as np
import os
import json

import pandas as pd

import glob

import plotly
import plotly.graph_objects as go
from plotly.subplots import make_subplots
plotly.offline.init_notebook_mode(connected=True)

from utils_kyy.pareto_front import identify_pareto
from utils_kyy.utils_graycode import graydecode

## [0] GA - 파레토 프론티어 찾아놓기

In [None]:
data_path = '../logs/__New_main_experiment_1_GA_30gen/'

with open(os.path.join(data_path,"train_logging.log")) as json_file:
    data = json.load(json_file)

train_log = data['train_log']
niter = len(train_log)
npop = len(train_log['0'])

objs_fitness = []
objs_chromo = []
gen_num = []
for i in range(niter):
    gen_num.extend([i for j in range(npop)])
    fitness_i = [train_log[str(i)][j][1] for j in range(npop)]  # [-val_acc, flops]
    chromo_i = [train_log[str(i)][j][0] for j in range(npop)]  # [-val_acc, flops]
    objs_fitness.append(fitness_i)
    objs_chromo.append(chromo_i)

objs_fitness = np.array(objs_fitness)
epoch = list(range(niter))

objs_fitness[:,:,0]= -1*objs_fitness[:,:,0]  # -val_acc => +val_acc

y1 = objs_fitness[:,:,0].reshape(-1).tolist()  # val_accuracy 는 - 붙어있는채로 사용 => minimize 하는 pareto frontier 찾는 함수 그대로 사용
y2 = objs_fitness[:,:,1].reshape(-1).tolist()
idxs = [i for i in range(len(y1))]
pareto = [0 for i in range(len(y1))]

df = pd.DataFrame({'gen':gen_num,'idx': idxs, 'acc':y1, 'flops': y2})

## pareto front 찾기
data_30gen_score = df[['acc','flops']].values  # df 전부

# 1) flops 에 - 붙이기 => score 로 만들기
data_30gen_score[:, 1] = -data_30gen_score[:, 1]

# 2) 파레토 프론티어 찾기
pareto_30gen_idx = identify_pareto(data_30gen_score)
pareto_front_30gen = data_30gen_score[pareto_30gen_idx]

# 3) 파레토 프론티어에 있는 크로모좀 리스트 만들기
pareto_chromos = []
for idx in list(pareto_30gen_idx):
    i = int(idx / 20)   # e.g. 33 => 1 * 20 + 13 => 1 gen 의 14번째 => objs_chromo[1][13]  ## 각각 0번째 ~ 19번째 있음
    j = idx - i*20
    temp_chromo = objs_chromo[i][j]
    pareto_chromos.append( temp_chromo )

In [None]:
len(pareto_chromos)  # 파레토 프론티어상 총 크로모좀 개수

In [61]:
## 연습
chromo_idx = 0
individual = pareto_chromos[chromo_idx]

## Gray Decode to Decimal(십진법)
graph_name = []

gray_len = len(individual)//3
for i in range(3):
    # list to string
    tmp = ''
    for j in individual[gray_len*i:gray_len*(i+1)]:
        tmp += str(j)

    # sting to binary to num
    graph_name.append(graydecode(int(tmp)))

print(graph_name)

[124, 65, 25]


In [71]:
################################################################
# 파레토 프론티어에 있는 stage_1 ~ stage_3 까지의 graph 정리
################################################################
pareto_frontier_graph_name = []

for individual in pareto_chromos:
    # 1) Gray Decode to Decimal(십진법)
    graph_name = []  # => e.g. [124, 65, 25]

    gray_len = len(individual)//3
    for i in range(3):
        # list to string
        tmp = ''
        for j in individual[gray_len*i:gray_len*(i+1)]:
            tmp += str(j)

        # sting to binary to num
        graph_name.append(graydecode(int(tmp)))  
    
    # 2) 정리
    pareto_frontier_graph_name.append(graph_name)

In [73]:
pareto_frontier_graph_name[0], len(pareto_frontier_graph_name)

([124, 65, 25], 7)

### [1] Stage Pool 의 구성 분석

### 1.1.1 stage pool 전체 분석 - Node Size

In [64]:
#########################
# stage - 1 으로 확인
#########################
# 1) 파일명 가져오기
stage_num = 1
stage_path = "../graph_pool/New_main_experiment_1/" + str(stage_num) + "/*.*"
stage = glob.glob(stage_path)

In [65]:
# 2) 경로 앞부분 & .yaml 없애기
for idx, path in enumerate(stage):
    temp = path.split('/')[-1][:-5]  # '21_WS_11_029.yaml' => '21_WS_11_029'
    stage[idx] = temp

In [66]:
# 3) 노드의 분포 확인
node_num_distribution = []
for graph_i in stage:
    num = graph_i.split('_')[0]
    node_num_distribution.append(int(num))  # '32' => 32

In [101]:
#########################
# stage 1 ~3 수행
#########################
stage_nums = [1, 2, 3]
node_dict = {}   #  {1: [32, 24, 22, ...], 2:[...], 3:[...] }

for stage_num in stage_nums:
    # 1) 파일명 가져오기
    stage_path = "../graph_pool/New_main_experiment_1/" + str(stage_num) + "/*.*"
    stage = glob.glob(stage_path)

    # 2) 경로 앞부분 & .yaml 없애기
    for idx, path in enumerate(stage):
        temp = path.split('/')[-1][:-5]  # '21_WS_11_029.yaml' => '21_WS_11_029'
        stage[idx] = temp

    # 3) 노드의 분포 확인
    node_num_distribution = []
    for graph_i in stage:
        num = graph_i.split('_')[0]
        node_num_distribution.append(int(num))  # '32' => 32
    
    node_dict[stage_num] = node_num_distribution

In [68]:
# histogram plot 을 위해 판다스 데이터프레임 생성
df_node_size = pd.DataFrame({'stage_1':node_dict[1],'stage_2': node_dict[2], 'stage_3':node_dict[3]})

In [69]:
fig = go.Figure()

start = 20
end = 40
size = 1

fig.add_trace(go.Histogram(
    x=node_dict[1],
    name='stage #1',
    xbins=dict(
        start=start,
        end=end,
        size=size)
))
              
fig.add_trace(go.Histogram(
    x=node_dict[2],
    name='stage #2',
    xbins=dict(
        start=start,
        end=end,
        size=size)
))

fig.add_trace(go.Histogram(
    x=node_dict[3],
    name='stage #3',
    xbins=dict(
        start=start,
        end=end,
        size=size)
))

# Overlay both histograms
fig.update_layout(barmode='overlay')
# Reduce opacity to see both histograms
fig.update_traces(opacity=0.75)
fig.show()

### 1.1.2 stage pool 전체 분석 - 그래프 종류

In [111]:
from collections import Counter

In [112]:
#########################
# stage 1 ~3 수행
#########################
stage_nums = [1, 2, 3]
graph_dict = {}   #  {1: ['WS', 'ER', ...], 2:[...], 3:[...] }

for stage_num in stage_nums:
    # 1) 파일명 가져오기
    stage_path = "../graph_pool/New_main_experiment_1/" + str(stage_num) + "/*.*"
    stage = glob.glob(stage_path)

    # 2) 경로 앞부분 & .yaml 없애기
    for idx, path in enumerate(stage):
        temp = path.split('/')[-1][:-5]  # '21_WS_11_029.yaml' => '21_WS_11_029'
        stage[idx] = temp

    # 3) 노드의 분포 확인
    graph_distribution = []
    for graph_i in stage:
        name = graph_i.split('_')[1]
        graph_distribution.append(name)  # 'WS'
    
    graph_dict[stage_num] = graph_distribution

In [113]:
# 확인
result = Counter(graph_dict[1])
print(result)
print(result['BA'])

Counter({'BA': 43, 'ER': 43, 'WS': 42})
43


In [114]:
graph_counter_dict = {}  # {1: Counter({'BA': 43, 'ER': 43, 'WS': 42}), ...}

for i in [1, 2, 3]:
    result = Counter(graph_dict[i])
    graph_counter_dict[i] = result

In [92]:
for stage_num in [1, 2, 3]:
    labels = ['BA','ER','WS']
    values = [ graph_counter_dict[stage_num]['BA'], graph_counter_dict[stage_num]['ER'], graph_counter_dict[stage_num]['WS'] ]

    fig = go.Figure(data=[go.Pie(labels=labels, values=values)])
    fig.show()

### 1.2. Node size; 파레토 프론티어 분석 -- 기존 stage pool의 분포와 GA 에서 찾은 것의 분포 비교

In [102]:
stage_1_node = []
stage_2_node = []
stage_3_node = []

for graph_name in pareto_frontier_graph_name:
    stage_1_node.append(node_dict[1][graph_name[0]])
    stage_2_node.append(node_dict[2][graph_name[1]])
    stage_3_node.append(node_dict[3][graph_name[2]])

In [103]:
####################
# stage - 1
####################
fig = go.Figure()

start = 20
end = 40
size = 1

fig.add_trace(go.Histogram(
    x=node_dict[1],
    name='stage #1 - Stage Pool',
    xbins=dict(
        start=start,
        end=end,
        size=size)
))
              
fig.add_trace(go.Histogram(
    x=stage_1_node,
    name='stage #1 - Pareto Frontier',
    xbins=dict(
        start=start,
        end=end,
        size=size)
))

# Overlay both histograms
fig.update_layout(
    barmode='overlay',
    title_text="Stage #1")

# Reduce opacity to see both histograms
fig.update_traces(opacity=0.75)
fig.show()

In [76]:
####################
# stage - 2
####################
fig = go.Figure()

start = 20
end = 40
size = 1

fig.add_trace(go.Histogram(
    x=node_dict[2],
    name='stage #2 - Stage Pool',
    xbins=dict(
        start=start,
        end=end,
        size=size)
))
              
fig.add_trace(go.Histogram(
    x=stage_2_node,
    name='stage #2 - Pareto Frontier',
    xbins=dict(
        start=start,
        end=end,
        size=size)
))

# Overlay both histograms
fig.update_layout(barmode='overlay')
# Reduce opacity to see both histograms
fig.update_traces(opacity=0.75)
fig.show()

In [77]:
####################
# stage - 3
####################
fig = go.Figure()

start = 20
end = 40
size = 1

fig.add_trace(go.Histogram(
    x=node_dict[3],
    name='stage #3 - Stage Pool',
    xbins=dict(
        start=start,
        end=end,
        size=size)
))
              
fig.add_trace(go.Histogram(
    x=stage_3_node,
    name='stage #3 - Pareto Frontier',
    xbins=dict(
        start=start,
        end=end,
        size=size)
))

# Overlay both histograms
fig.update_layout(barmode='overlay')
# Reduce opacity to see both histograms
fig.update_traces(opacity=0.75)
fig.show()

### 1.3. 그래프 종류; 파레토 프론티어 분석 -- 기존 stage pool의 분포와 GA 에서 찾은 것의 분포 비교

### 2. stage 가 3개 연결되어 있으므로, 따로따로 보면 의미가 없지 않나? 같이 보기

In [95]:
#########################
# stage 1 ~3 수행
#########################
stage_paths = {}
graph_info = []   # [ [['WS', 32], ['WS', 20], ['EA', 40]], []

# 1. stage 파일명 정리
for stage_num in [1, 2, 3]:
    # 1) 파일명 가져오기
    stage_path = "../graph_pool/New_main_experiment_1/" + str(stage_num) + "/*.*"
    stage = glob.glob(stage_path)

    # 2) 경로 앞부분 & .yaml 없애기
    for idx, path in enumerate(stage):
        temp = path.split('/')[-1][:-5]  # '21_WS_11_029.yaml' => '21_WS_11_029'
        stage[idx] = temp
    
    # 3) 저장
    stage_paths[stage_num] = stage

# 2. 파레토 프론티어 내 크로모좀들 => graph_info 구축
# e.g. graph_i[0] = [124, 32, 5]
for graph_i in pareto_frontier_graph_name:
    # stage 마다의 파일명 가져오기
    stage_1_i = stage_paths[1][graph_i[0]]  # stage - 1 의 파일명
    stage_2_i = stage_paths[2][graph_i[1]]
    stage_3_i = stage_paths[3][graph_i[2]]
    
    # 노드 갯수, 그래프 종류 부분만 남기기
    stage_1_info = stage_1_i.split('_')[0:2]  # [0]: '21', [1]: 'WS'
    stage_2_info = stage_2_i.split('_')[0:2]
    stage_3_info = stage_3_i.split('_')[0:2]
    
    # '21' => 21 (int)
    stage_1_info[0] = int(stage_1_info[0])
    stage_2_info[0] = int(stage_2_info[0])
    stage_3_info[0] = int(stage_3_info[0])
    
    graph_info.append([stage_1_info, stage_2_info, stage_3_info])

In [98]:
graph_info

[[[32, 'WS'], [24, 'WS'], [36, 'BA']],
 [[32, 'WS'], [23, 'ER'], [27, 'BA']],
 [[32, 'WS'], [33, 'ER'], [40, 'BA']],
 [[32, 'WS'], [29, 'ER'], [27, 'BA']],
 [[32, 'WS'], [33, 'ER'], [40, 'WS']],
 [[21, 'ER'], [24, 'WS'], [31, 'BA']],
 [[35, 'BA'], [33, 'ER'], [20, 'WS']]]

In [107]:
# 스테이지 별 그래프 종류 세기
stage_1_list = []
stage_2_list = []
stage_3_list = []

for idx, chromo in enumerate(graph_info):
    stage_1_ = chromo[0][1]  # stage 1 의 그래프 종류
    stage_2_ = chromo[1][1]
    stage_3_ = chromo[2][1]    
    
    stage_1_list.append(stage_1_)
    stage_2_list.append(stage_2_)
    stage_3_list.append(stage_3_)

In [109]:
pareto_graph_dict = {1:stage_1_list, 2:stage_2_list, 3:stage_3_list}
pareto_graph_counter_dict = {}  # {1: Counter({'BA': 43, 'ER': 43, 'WS': 42}), ...}

for i in [1, 2, 3]:
    result = Counter(pareto_graph_dict[i])
    pareto_graph_counter_dict[i] = result

In [123]:
#####################################################
# 191223 => plotly 로 색깔 통일하는게 오래 걸려서, 임시로 엑셀로 파이차트 만듦
##########################################################################################################
# labels = ['BA','ER','WS']

# # Create subplots: use 'domain' type for Pie subplot
# fig = make_subplots(rows=4, cols=1, specs=[[{'type':'domain'}, {'type':'domain'}, {'type':'domain'}, {'type':'domain'}]])

# # 1) 기존 stage pool 의 그래프 구성 - 그래프 종류별 (43, 43, 42)개로 동일함.
# # 따라서 하나만 plot함
# fig.add_trace(go.Pie(labels=labels, 
#                      values = [ graph_counter_dict[1]['BA'], graph_counter_dict[1]['ER'], graph_counter_dict[1]['WS'] ],
#                      name="GHG Emissions"),
#               1, 1)

# # 2) Stage #1 - Pareto Frontier
# stage_num = 1
# fig.add_trace(go.Pie(labels=labels,
#                      values=[ pareto_graph_counter_dict[stage_num]['BA'], pareto_graph_counter_dict[stage_num]['ER'], pareto_graph_counter_dict[stage_num]['WS'] ],
#                      name="CO2 Emissions"),
#               2, 1)

# # 3) Stage #2 - Pareto Frontier
# stage_num = 2
# fig.add_trace(go.Pie(labels=labels,
#                      values=[ pareto_graph_counter_dict[stage_num]['BA'], pareto_graph_counter_dict[stage_num]['ER'], pareto_graph_counter_dict[stage_num]['WS'] ],
#                      name="CO2 Emissions"),
#               3, 1)

# # 4) Stage #3 - Pareto Frontier
# stage_num = 3
# fig.add_trace(go.Pie(labels=labels,
#                      values=[ pareto_graph_counter_dict[stage_num]['BA'], pareto_graph_counter_dict[stage_num]['ER'], pareto_graph_counter_dict[stage_num]['WS'] ],
#                      name="CO2 Emissions"),
#               4, 1)

# fig.show()

labels = ['BA','ER','WS']
colors = ['rgb(146, 123, 21)', 'rgb(177, 180, 34)', 'rgb(206, 206, 40)']

for stage_num in [1, 2, 3]:
    print("======================= Stage #"+ str(stage_num) +" ==================================")
    values = [ graph_counter_dict[stage_num]['BA'], graph_counter_dict[stage_num]['ER'], graph_counter_dict[stage_num]['WS'] ]

    fig = go.Figure(data=[go.Pie(labels=labels, values=values, marker_colors=colors)])
    fig.show()
    
    values_pareto = [ pareto_graph_counter_dict[stage_num]['BA'], pareto_graph_counter_dict[stage_num]['ER'], pareto_graph_counter_dict[stage_num]['WS'] ]

    fig = go.Figure(data=[go.Pie(labels=labels, values=values_pareto, marker_colors=colors)])
    fig.show()
    
    print('====================================================================')









In [124]:
pareto_graph_counter_dict

{1: Counter({'BA': 1, 'ER': 1, 'WS': 5}),
 2: Counter({'ER': 5, 'WS': 2}),
 3: Counter({'BA': 5, 'WS': 2})}

In [125]:
graph_counter_dict

{1: Counter({'BA': 43, 'ER': 43, 'WS': 42}),
 2: Counter({'BA': 43, 'ER': 43, 'WS': 42}),
 3: Counter({'BA': 43, 'ER': 43, 'WS': 42})}