# Introduction

Notebook para executar grid-search para encontrar melhores parâmetros para RL com NAS-v3 em variados datasets de NATS-Bench-201.

# Import Libraries

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
# disable tensorflow log level infos
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # show only errors

In [3]:
from IPython.display import clear_output

import pandas as pd
import numpy as np

In [4]:
import sys
if '../..' not in sys.path:
    sys.path.insert(0, '../..')
    
from validation_src.val_nas_executor import NASExecutor 

from src.base.experiment.training.optimizers import Optimizer

In [5]:
## restrict memory growth -------------------
import tensorflow as tf
physical_devices = tf.config.list_physical_devices('GPU') 
try:
    gpu_0 = physical_devices[0]
    tf.config.experimental.set_memory_growth(gpu_0, True) 
    #tf.config.experimental.set_virtual_device_configuration(gpu_0, [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=6500)])
    print(' ==> Restrict GPU memory growth: True')
except: 
    raise Exception("Invalid device or cannot modify virtual devices once initialized.")
## restrict memory growth ------------------- 

 ==> Restrict GPU memory growth: True


# Run Zoph LSTM Setup

In [6]:
ss_indicators = ['tss'] #'sss'
algos_list = ['rl']  # ,'random','evolution'
datasets_list = ['ImageNet16-120','cifar10','cifar100']
mth_list = [100]
use_neptune = False


for ss in ss_indicators:
    for a in algos_list:
        for d in datasets_list:
            for mth in mth_list:
                executor = NASExecutor(a, d, mth, ss, use_neptune)
                sorted_results = executor.test_nas_algo(f'./results/zoph_lstm/{ss}/{a}_{str(mth)}h_{d}.csv')
                clear_output()               

# Util Function

In [23]:
def get_best_arch(orig_df, dataset):
    df = orig_df[orig_df['dataset'] == dataset]
    cols = ['id','dna','pred_acc','train_accuracy','val_acc','test_acc']
    df = df[cols]
    df = df.sort_values(by='val_acc', ascending=False, ignore_index=True)
    df = df[df['val_acc'] == np.max(df['val_acc'])].sort_values(by='id', ignore_index=True)
    display(df.head(1))

# Zoph LSTM Setup Analysis

## SSS - Search Space

In [8]:
sss_cifar10_df  = pd.read_csv('./results/zoph_lstm/sss/rl_100h_cifar10.csv')
sss_cifar100_df = pd.read_csv('./results/zoph_lstm/sss/rl_100h_cifar100.csv')
sss_imagenet_df = pd.read_csv('./results/zoph_lstm/sss/rl_100h_ImageNet16-120.csv')

In [10]:
sss_cifar10_df.head()

Unnamed: 0,id,dna,cell_spec,val_acc,pred_acc,latency,time_cost,total_time,test_acc,test_loss,...,time_spent_in_hours,time_spent_in_secs,train_accuracy,train_loss,train_per_time,train_all_time,comment,algorithm,dataset,max_train_hours
0,6919,"DNA([7, 7, 7, 7, 7])",64:64:64:64:64,84.956,-1,0.020403,70.035428,291018.409453,93.4,0.253872,...,80,291018.409,99.92,0.007021,10.806808,972.612677,"In this dict, train-loss/accuracy/time is the ...",RL_DNAGenerator(),cifar10,100
1,7120,"DNA([7, 7, 7, 7, 7])",64:64:64:64:64,84.956,-1,0.020403,70.035428,301681.114148,93.4,0.253872,...,83,301681.114,99.92,0.007021,10.806808,972.612677,"In this dict, train-loss/accuracy/time is the ...",RL_DNAGenerator(),cifar10,100
2,7167,"DNA([7, 7, 7, 7, 7])",64:64:64:64:64,84.956,-1,0.020403,70.035428,304204.968086,93.4,0.253872,...,84,304204.968,99.92,0.007021,10.806808,972.612677,"In this dict, train-loss/accuracy/time is the ...",RL_DNAGenerator(),cifar10,100
3,6932,"DNA([7, 7, 7, 7, 2])",64:64:64:64:24,84.592,-1,0.020061,70.235065,291705.616526,93.0,0.269074,...,81,291705.617,99.862,0.010108,10.778216,970.03943,"In this dict, train-loss/accuracy/time is the ...",RL_DNAGenerator(),cifar10,100
4,7151,"DNA([7, 7, 7, 7, 2])",64:64:64:64:24,84.592,-1,0.020061,70.235065,303331.299581,93.0,0.269074,...,84,303331.3,99.862,0.010108,10.778216,970.03943,"In this dict, train-loss/accuracy/time is the ...",RL_DNAGenerator(),cifar10,100


In [24]:
get_best_arch(sss_cifar10_df,  'cifar10')
get_best_arch(sss_cifar100_df, 'cifar100')
get_best_arch(sss_imagenet_df, 'ImageNet16-120')

Unnamed: 0,id,dna,pred_acc,train_accuracy,val_acc,test_acc
0,6919,"DNA([7, 7, 7, 7, 7])",-1,99.92,84.956,93.4


Unnamed: 0,id,dna,pred_acc,train_accuracy,val_acc,test_acc
0,813,"DNA([7, 7, 7, 7, 7])",-1,96.982,61.06,70.72


Unnamed: 0,id,dna,pred_acc,train_accuracy,val_acc,test_acc
0,2388,"DNA([6, 5, 5, 6, 5])",-1,52.39354,36.366667,44.5


## TSS - Search Space

In [14]:
tss_cifar10_df  = pd.read_csv('./results/zoph_lstm/tss/rl_100h_cifar10.csv')
tss_cifar100_df = pd.read_csv('./results/zoph_lstm/tss/rl_100h_cifar100.csv')
tss_imagenet_df = pd.read_csv('./results/zoph_lstm/tss/rl_100h_ImageNet16-120.csv')

In [25]:
get_best_arch(tss_cifar10_df, 'cifar10')
get_best_arch(tss_cifar100_df, 'cifar100')
get_best_arch(tss_imagenet_df, 'ImageNet16-120')

Unnamed: 0,id,dna,pred_acc,train_accuracy,val_acc,test_acc
0,1666,"DNA([2, 4, 3, 1, 4, 2])",-1,99.858,80.712,92.27


Unnamed: 0,id,dna,pred_acc,train_accuracy,val_acc,test_acc
0,1731,"DNA([0, 1, 0, 3, 0, 1])",-1,91.348,54.76,67.02


Unnamed: 0,id,dna,pred_acc,train_accuracy,val_acc,test_acc
0,85,"DNA([2, 2, 2, 1, 0, 3])",-1,55.348055,34.766667,44.066667


# Results Analysis

Before ==> melhor configuração da grid_search para o search space + dataset
After  ==> configuração após grid_search com abordagem de probabilidade
Zoph   ==> experimentos realizados com configuração de paper de Zoph (2017)


**SSS**:

* Cifar-10
  * Before: 85.02% val_acc / ID = 354
  * After:  82.80% val_acc / ID = 638
  * Zoph:   84.95% val_acc / ID = 6919

* Cifar-100
  * Before: 61.06% val_acc / ID = 105 
  * After:  58.64% val_acc / ID = 1494
  * Zoph:   61.06% val_acc / ID = 813

* ImageNet16-120
  * Before: 39.33% val_acc / ID = 54
  * After:  38.13% val_acc / ID = 108
  * Zoph:   36.36% val_acc / ID = 2388
  
**TSS**:

* Cifar-10
  - Before: 84.96% val_acc / ID = 127
  - After:  83.50% val_acc / ID = 5712 
  - Zoph:   80.71% val_acc / ID = 1666

* Cifar-100
  - Before: 61.32% val_acc / ID = 2204
  - After:  58.64% val_acc / ID = 1494
  - Zoph:   54.76% val_acc / ID = 1731

* ImageNet16-120
  - Before: 37.63% val_acc / ID = 20
  - After:  27.90% val_acc / ID = 624
  - Zoph:   34.76% val_acc / ID = 85