# 06_2 => Cifar-100. 40 에폭 학습시켜보기. val_accuracy의 변화 추이 알아보기

[Reference] https://pytorch.org/tutorials/intermediate/ddp_tutorial.html

# Main

In [1]:
import os
import logging
from easydict import EasyDict
import numpy as np
import random

import time
import datetime

from deap import tools

In [2]:
from utils_kyy.utils_graph import make_random_graph
from utils_kyy.create_toolbox_cifar100 import create_toolbox_for_NSGA_RWNN

## 1. generation pool 구성하기 (Small RWNN 대상)

In [3]:
# 실험을 위한 환경 셋팅
run_code = 'test_kyy_CIFAR100_time_check'
stage_pool_path = './graph_pool' + '/' + run_code + '/'
log_path = './logs/' + run_code + '/'

# check & make the directory
if not os.path.exists(stage_pool_path): os.makedirs(stage_pool_path)
if not os.path.isdir(log_path): os.makedirs(log_path)

# write the log
log_file_name = log_path + 'logging.log'
logging.basicConfig(filename=log_file_name, level=logging.INFO)
logging.info('Start to write log.')

In [4]:
# make random graph pool
num_graph = 100
make_random_graph(num_graph, stage_pool_path)

######################################################
# => 최종적으로, num_graph와 stage_pool_path 를 인수로 받아서, 해당 path에 num_graph 수 만큼의 그래프 떨궈주는 함수 만들기
#    일단은 정해진 graph_model은 'WS', K, P 는 인수로 받지 말고 구현
#      =>  이후에 확장하기.
######################################################

Start to make random graph pool...
Finished


## 2. Main NSGA_RWNN

In [5]:
# define 'args_train' for evaluation
args_train = EasyDict({
    'lr_mode': 'cosine',
    'warmup_mode': 'linear',    # default
    'base_lr': 0.1,
    'momentum': 0.9, 
    'weight_decay': 0.00005,
    'print_freq': 100,

    'epochs': 40,
    'batch_size': 256,   # 128 => 256

    'workers': 32,  # 2 => 

    'warmup_epochs': 0,
    'warmup_lr': 0.0,
    'targetlr': 0.0,

})

In [6]:
# create custom_toolbox
# num_graph, args_train, stage_pool_path => to define the 'evaluate' function 
toolbox = create_toolbox_for_NSGA_RWNN(num_graph, args_train, stage_pool_path, log_file_name)

In [7]:
"""
4. Algorithms
 For the purpose of completeness we will develop the complete generational algorithm.
"""

POP_SIZE = 2    # population size
NGEN = 4    # number of Generation
CXPB = 0.5    # crossover probability 
MUTPB = 0.5    # mutation probability


# log에 기록할 stats
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("min", np.min, axis=0)
stats.register("max", np.max, axis=0)

logbook = tools.Logbook()
logbook.header = "gen", "evals", "min", "max", "evals_time", "gen_time"

# population 생성.  (toolbox.population은 creator.Individual n개를 담은 list를 반환. (=> population)
now = datetime.datetime.now()
now_str = now.strftime('%Y-%m-%d %H:%M:%S')
print("Initialion starts ...")
logging.info("Initialion starts at " + now_str)
init_start_time = time.time()

pop = toolbox.population(n=POP_SIZE)

# Evaluate the individuals with an invalid fitness
invalid_ind = [ind for ind in pop if not ind.fitness.valid]
fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)    # .evaluate는 tuple을 반환. 따라서 fitnesses는 튜플을 원소로 가지는 list
for ind, fit in zip(invalid_ind, fitnesses):
    ind.fitness.values = fit   # ind.fitness.values = (val_accuracy, flops) 튜플

# This is just to assign the crowding distance to the individuals
# no actual selection is done
pop = toolbox.select(pop, len(pop))

record = stats.compile(pop)
logbook.record(gen=0, evals=len(invalid_ind), **record)
print(logbook.stream)

now = datetime.datetime.now()
now_str = now.strftime('%Y-%m-%d %H:%M:%S')
print("Initialization is finished at", now_str)
logging.info("Initialion is finished at " + now_str)

init_time = time.time() - init_start_time
logging.info("Initialion time = " + str(init_time) + "s")


print()

# Begin the generational process
for gen in range(1, NGEN):
    now = datetime.datetime.now()
    now_str = now.strftime('%Y-%m-%d %H:%M:%S')
    print("#####", gen, "th generation starts at", now_str)
    logging.info(str(gen) + "th generation starts at" + now_str)
    
    start_gen = time.time()
    # Vary the population
    offspring = tools.selTournamentDCD(pop, len(pop))
    offspring = [toolbox.clone(ind) for ind in offspring]

    for ind1, ind2 in zip(offspring[::2], offspring[1::2]):
        if random.random() <= CXPB:
            toolbox.mate(ind1, ind2)

        toolbox.mutate(ind1, indpb=MUTPB)
        toolbox.mutate(ind2, indpb=MUTPB)
        del ind1.fitness.values, ind2.fitness.values

    # Evaluate the individuals with an invalid fitness
    print("##### Evaluation starts")
    start_time = time.time()
    
    invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
    fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
    for ind, fit in zip(invalid_ind, fitnesses):
        ind.fitness.values = fit
        
    eval_time_for_one_generation = time.time() - start_time        
    print("##### Evaluation ends (Time : %.3f)" % eval_time_for_one_generation)
    
    # Select the next generation population
    pop = toolbox.select(pop + offspring, POP_SIZE)
    
    gen_time = time.time() - start_gen
    print('##### [gen_time: %.3fs]' % gen_time, gen, 'th generation is finished.')
    
    record = stats.compile(pop)
    logbook.record(gen=gen, evals=len(invalid_ind), **record,
                   evals_time=eval_time_for_one_generation, gen_time=gen_time)
    
    logging.info('Gen [%03d/%03d] -- evals: %03d, evals_time: %.4fs, gen_time: %.4fs' % (gen, NGEN, len(invalid_ind), eval_time_for_one_generation, gen_time))
    print(logbook.stream)

Initialion starts ...
Files already downloaded and verified
Files already downloaded and verified
	 - Epoch: [0][0/196]	Time 14.883 (14.883)	Loss 6.9323 (6.9323)	Prec@1 0.000 (0.000)	Prec@5 0.000 (0.000)
	 - Epoch: [0][100/196]	Time 0.704 (0.825)	Loss 4.0394 (4.4480)	Prec@1 9.375 (4.974)	Prec@5 29.297 (18.669)
##### Validation_time 15.874 Prec@1 10.620 Prec@5 32.060 #####
	 - Epoch: [1][0/196]	Time 7.830 (7.830)	Loss 3.6646 (3.6646)	Prec@1 10.156 (10.156)	Prec@5 38.672 (38.672)
	 - Epoch: [1][100/196]	Time 0.714 (0.759)	Loss 3.5147 (3.6147)	Prec@1 16.797 (13.954)	Prec@5 43.359 (38.598)
##### Validation_time 15.246 Prec@1 19.850 Prec@5 46.960 #####
	 - Epoch: [2][0/196]	Time 6.100 (6.100)	Loss 3.2807 (3.2807)	Prec@1 21.875 (21.875)	Prec@5 49.219 (49.219)
	 - Epoch: [2][100/196]	Time 0.704 (0.748)	Loss 3.0007 (3.1828)	Prec@1 28.516 (21.558)	Prec@5 52.344 (50.928)
##### Validation_time 14.959 Prec@1 23.610 Prec@5 52.330 #####
	 - Epoch: [3][0/196]	Time 6.236 (6.236)	Loss 2.7609 (2.7609)	P

##### Validation_time 15.502 Prec@1 64.330 Prec@5 87.990 #####
	 - Epoch: [29][0/196]	Time 6.088 (6.088)	Loss 0.0039 (0.0039)	Prec@1 100.000 (100.000)	Prec@5 100.000 (100.000)
	 - Epoch: [29][100/196]	Time 0.753 (0.755)	Loss 0.0050 (0.0059)	Prec@1 100.000 (99.965)	Prec@5 100.000 (100.000)
##### Validation_time 15.020 Prec@1 64.590 Prec@5 88.160 #####
	 - Epoch: [30][0/196]	Time 6.219 (6.219)	Loss 0.0129 (0.0129)	Prec@1 99.609 (99.609)	Prec@5 100.000 (100.000)
	 - Epoch: [30][100/196]	Time 0.724 (0.753)	Loss 0.0034 (0.0050)	Prec@1 100.000 (99.981)	Prec@5 100.000 (100.000)
##### Validation_time 15.210 Prec@1 64.730 Prec@5 88.050 #####
	 - Epoch: [31][0/196]	Time 5.848 (5.848)	Loss 0.0033 (0.0033)	Prec@1 100.000 (100.000)	Prec@5 100.000 (100.000)
	 - Epoch: [31][100/196]	Time 0.711 (0.751)	Loss 0.0041 (0.0048)	Prec@1 100.000 (99.981)	Prec@5 100.000 (100.000)
##### Validation_time 15.213 Prec@1 64.760 Prec@5 88.140 #####
	 - Epoch: [32][0/196]	Time 6.155 (6.155)	Loss 0.0027 (0.0027)	Prec@1

	 - Epoch: [17][100/196]	Time 0.523 (0.579)	Loss 0.4238 (0.3936)	Prec@1 86.719 (87.887)	Prec@5 98.828 (98.902)
##### Validation_time 12.717 Prec@1 58.030 Prec@5 85.130 #####
	 - Epoch: [18][0/196]	Time 5.684 (5.684)	Loss 0.2727 (0.2727)	Prec@1 91.406 (91.406)	Prec@5 100.000 (100.000)
	 - Epoch: [18][100/196]	Time 0.534 (0.580)	Loss 0.3259 (0.2937)	Prec@1 90.234 (91.472)	Prec@5 99.609 (99.327)
##### Validation_time 12.959 Prec@1 58.360 Prec@5 84.680 #####
	 - Epoch: [19][0/196]	Time 5.990 (5.990)	Loss 0.2001 (0.2001)	Prec@1 95.703 (95.703)	Prec@5 99.609 (99.609)
	 - Epoch: [19][100/196]	Time 0.520 (0.585)	Loss 0.1709 (0.2254)	Prec@1 97.266 (93.607)	Prec@5 99.609 (99.640)
##### Validation_time 13.205 Prec@1 59.040 Prec@5 85.320 #####
	 - Epoch: [20][0/196]	Time 5.888 (5.888)	Loss 0.1995 (0.1995)	Prec@1 95.312 (95.312)	Prec@5 99.609 (99.609)
	 - Epoch: [20][100/196]	Time 0.537 (0.584)	Loss 0.1872 (0.1785)	Prec@1 95.703 (95.096)	Prec@5 100.000 (99.764)
##### Validation_time 12.673 Prec@1 6

ValueError: selTournamentDCD: individuals length must be a multiple of 4

In [None]:
# Check logbook
logbook

### logbook - plot

In [None]:
type(logbook)

In [None]:
len(logbook)

In [None]:
logbook[0]

In [None]:
logbook[0]['min']

In [None]:
-logbook[0]['min'][0], logbook[0]['min'][1]

In [None]:
min_val_acc = []
min_flops = []

max_val_acc = []
max_flops = []

for i in range(len(logbook)):
    min_val_acc_i, min_flops_i = -logbook[i]['min'][0], logbook[i]['min'][1]
    max_val_acc_i, max_flops_i = -logbook[i]['max'][0], logbook[i]['max'][1]
    
    min_val_acc.append(min_val_acc_i)
    min_flops.append(min_flops_i)
    max_val_acc.append(max_val_acc_i)
    max_flops.append(max_flops_i)    

In [None]:
from matplotlib import pyplot as plt
%matplotlib inline

In [None]:
# NSGA-2가 제대로 동작함을 알 수 있음.
plt.plot(min_val_acc, min_flops)

plt.xlabel('min_val_acc')
plt.ylabel('min_flops')
plt.title('Experiment Result')

plt.show()

In [None]:
# gen time 확인
gen_time_list = []

for i in range(1, len(logbook)):
    # 첫번째 initialize 에는 gen_time이 없음
    gen_time_i = logbook[i]['gen_time']
    gen_time_list.append(gen_time_i)

In [None]:
plt.plot(gen_time_list)

plt.xlabel('generation')
plt.ylabel('gen_time_list')
plt.title('Experiment Result')

plt.show()