# RNN based OPERATOR Sampler  


[Project] Select optimal combination of arithmetic operators to solve a simple equation   
[Base] Enas-Pytorch  
[Date] 2018/08/27, by funmv   
[Path] D:\PYTORCH_CASES\NEW_TRIAL\ENAS-pytorch   

In [1]:
import collections
import os

import torch
import torch.nn.functional as F

import utils
import config
from torch.autograd import Variable
import math

In [2]:
# RNN based Controller
# RNN structure consists of one of LSTMCell and a few Linear layers  
class Controller(torch.nn.Module):
    
    def __init__(self, args):
        torch.nn.Module.__init__(self)
        self.args = args

        self.num_tokens = [len(args.shared_rnn_activations)] #['*','-','*','/']
            
        #### the number of token per timestep and length of timestep ##########
        self.num_tokens = [4,4,4,4,4] 
        self.args.num_blocks = 5
        
        # Tuning parameters: affect convergency
        #self.args.controller_hid = 200
        #self.args.entropy_mode = 'regularizer'
        
        
        self.func_names = args.shared_rnn_activations

        num_total_tokens = sum(self.num_tokens) #130->20

        self.encoder = torch.nn.Embedding(num_total_tokens, #130->20
                                          args.controller_hid) #100
        # ONE of LSTMCell
        self.lstm = torch.nn.LSTMCell(args.controller_hid, args.controller_hid) #100,100

        # Controller RNN consists of ONE LSTMCell with size (100,100).
        # The single LSTMCell handles timestep input repeatly. 
        # At each timestep, output of LSTMCell is used as sampler throughout DENSE(linear) layer. 
        # Most of parameters in NET are concentrated in DENSE layer
        self.decoders = []
        for idx, size in enumerate(self.num_tokens):
            decoder = torch.nn.Linear(args.controller_hid, size)
            self.decoders.append(decoder)

        self._decoders = torch.nn.ModuleList(self.decoders)

        self.reset_parameters() # Initialize all parameters bewteen -0.1 ~ 0.1
        self.static_init_hidden = utils.keydefaultdict(self.init_hidden)

        def _get_default_hidden(key):
            return utils.get_variable(
                torch.zeros(key, self.args.controller_hid), #1,100
                self.args.cuda, #True
                requires_grad=False)

        self.static_inputs = utils.keydefaultdict(_get_default_hidden)

    def reset_parameters(self):
        init_range = 0.1
        for param in self.parameters():
            param.data.uniform_(-init_range, init_range)
        for decoder in self.decoders:
            decoder.bias.data.fill_(0)

    def forward(self,  # pylint:disable=arguments-differ
                inputs,
                hidden,
                block_idx,
                is_embed):
        if not is_embed:   # a constant value for input after second timestep of LSTMCell
            embed = self.encoder(inputs) # nn.Embeding(130,100) changes the constant to (1,100) tensor
        else:
            embed = inputs # (1,100) tensor for first timestep input of LSTMMCell

        hx, cx = self.lstm(embed, hidden) #hx(1,100),cx(1,100),embed(1,100),hidden(2,1,100)
        logits = self.decoders[block_idx](hx)

        logits /= self.args.softmax_temperature #logits(1,4)

        # exploration
        if self.args.mode == 'train':
            logits = (self.args.tanh_c*F.tanh(logits)) #2.5*..

        return logits, (hx, cx)

    def sample(self, batch_size=1, with_details=False, save_dir=None):
        """Samples a set of `args.num_blocks` many computational nodes from the
        controller, where each node is made up of an activation function, and
        each node except the last also includes a previous node.
        """
        if batch_size < 1:
            raise Exception(f'Wrong batch_size: {batch_size} < 1')

        # [B, L, H]
        inputs = self.static_inputs[batch_size] #(1,100)
        hidden = self.static_init_hidden[batch_size] #(2,1,100)

        activations = []
        entropies = []
        log_probs = []
        prev_nodes = []
        
        # NOTE(brendan): The RNN controller alternately outputs an activation,
        # followed by a previous node, for each block except the last one,
        # which only gets an activation function. The last node is the output
        # node, and its previous node is the average of all leaf nodes.
        
        for block_idx in range(self.args.num_blocks): 
            logits, hidden = self.forward(inputs,
                                          hidden,
                                          block_idx,
                                          is_embed=(block_idx == 0))

            probs = F.softmax(logits, dim=-1) #(1,4)
            log_prob = F.log_softmax(logits, dim=-1) #(1,4)
            # TODO(brendan): .mean() for entropy?
            entropy = -(log_prob * probs).sum(1, keepdim=False) #(1,)

            action = probs.multinomial(num_samples=1).data # a constant value
            selected_log_prob = log_prob.gather(  # prob. corresponding to the constant action
                1, utils.get_variable(action, requires_grad=False))

            # TODO(brendan): why the [:, 0] here? Should it be .squeeze(), or
            # .view()? Same below with `action`.
            entropies.append(entropy) # save for return
            log_probs.append(selected_log_prob[:, 0])

            # action selected is used as next timestep input 
            inputs = utils.get_variable(action[:,0], requires_grad=False)
            activations.append(action[:, 0])
            
            
        activations = torch.stack(activations).transpose(0, 1)  

        return torch.cat(log_probs), torch.cat(entropies), activations

    def init_hidden(self, batch_size):
        zeros = torch.zeros(batch_size, self.args.controller_hid)
        return (utils.get_variable(zeros, self.args.cuda, requires_grad=False),
                utils.get_variable(zeros.clone(), self.args.cuda, requires_grad=False))

In [3]:
args, unparsed = config.get_args()
print (args)


2018-08-30 16:51:13,792:INFO::Unparsed args: ['-f', 'C:\\Users\\DJKang\\AppData\\Roaming\\jupyter\\runtime\\kernel-02bdfeba-1c94-4475-867e-18131e58c995.json']


Namespace(activation_regularization=False, activation_regularization_amount=2.0, batch_size=64, cnn_hid=64, controller_grad_clip=0, controller_hid=100, controller_lr=0.00035, controller_lr_cosine=False, controller_lr_max=0.05, controller_lr_min=0.001, controller_max_step=2000, controller_optim='adam', cuda=True, data_dir='data', dataset='ptb', derive_num_sample=100, discount=1.0, ema_baseline_decay=0.95, entropy_coeff=0.0001, entropy_mode='reward', load_path='', log_dir='logs', log_level='INFO', log_step=50, max_epoch=150, max_save_num=4, mode='train', network_type='rnn', norm_stabilizer_fixed_point=5.0, norm_stabilizer_regularization=False, norm_stabilizer_regularization_amount=1.0, num_blocks=12, num_gpu=1, ppl_square=False, random_seed=12345, reward_c=80, save_epoch=4, shared_cnn_types=['3x3', '5x5', 'sep 3x3', 'sep 5x5', 'max 3x3', 'max 5x5'], shared_decay=0.96, shared_decay_after=15, shared_dropout=0.4, shared_dropoute=0.1, shared_dropouti=0.65, shared_embed=1000, shared_grad_clip

In [4]:
# Controller and their parameters
controller = Controller(args)
controller.cuda()

Controller(
  (encoder): Embedding(20, 100)
  (lstm): LSTMCell(100, 100)
  (_decoders): ModuleList(
    (0): Linear(in_features=100, out_features=4, bias=True)
    (1): Linear(in_features=100, out_features=4, bias=True)
    (2): Linear(in_features=100, out_features=4, bias=True)
    (3): Linear(in_features=100, out_features=4, bias=True)
    (4): Linear(in_features=100, out_features=4, bias=True)
  )
)

In [7]:
# Test that controller.sample works 
log_probs, entropies, activations = controller.sample(with_details=True)

In [9]:
print (log_probs)
print (entropies)
print ('activations= {}'.format(activations)) #12, ['tanh', 'RELU', 'Identity', 'Sigmoid']
print (activations.data.tolist()[0])

tensor([-1.3865, -1.3886, -1.3727, -1.3893, -1.3918], device='cuda:0')
tensor([ 1.3863,  1.3863,  1.3863,  1.3863,  1.3863], device='cuda:0')
activations= tensor([[ 3,  0,  1,  2,  3]], device='cuda:0')
[3, 0, 1, 2, 3]


In [10]:
# optimizer setup 
controller_optim = torch.optim.Adam(controller.parameters(), lr=args.controller_lr)

In [11]:
# Define four arithmetic operators 
opdic = {0:'+', 1:'-', 2:'/', 3:'*'}
print (opdic, type(opdic))
print (opdic[0])


# Define equation 
# Search the optimal combination of five sequential operators to sovlve equation
# The result of eq closes to zero, then reward is good !!!
def evaloper(op):
    ''' op: operator list sampled by controller
        return: reward increases when the equation closes to zero 
    '''
    stri ='-4.0{}0.3{}(-4.0){}(-0.7){}2.0{}(-0.5)'.format(opdic[op[0]],opdic[op[1]],opdic[op[2]],opdic[op[3]],opdic[op[4]])
    equa = eval(stri)
    resu = math.exp(-math.fabs(equa)) # reward 
    #print (stri, equa, resu)
    return resu

# Test for a random five operators 
print (activations.data.tolist()[0])
print (evaloper(activations.data.tolist()[0]))

{0: '+', 1: '-', 2: '/', 3: '*'} <class 'dict'>
+
[3, 0, 1, 2, 3]
0.004630918733533246


In [12]:
# Traing RNN   
baseline = None
controller.reset_parameters()

for step in range(10000):
    
    log_probs, entropies, activations = controller.sample(with_details=True)

    rewards = evaloper(activations.data.tolist()[0]) 

    # moving average baseline
    if baseline is None:
        baseline = rewards
    else:
        decay = args.ema_baseline_decay #0.95
        baseline = decay * baseline + (1 - decay) * rewards

    adv = rewards - baseline
#     print (adv, rewards, baseline, type(adv))
#     print (log_probs, log_probs*Variable(torch.Tensor([adv]).cuda()) )
    
    
    
    # policy loss
    loss = -log_probs * Variable(torch.Tensor([adv]).cuda(), requires_grad=False)
    if controller.args.entropy_mode == 'regularizer':
        loss -= args.entropy_coeff * entropies

    loss = loss.sum()  # or loss.mean()
    if step % 500 == 0:
        print (loss, rewards)

    # update
    controller_optim.zero_grad()
    loss.backward(retain_graph=True)
    controller_optim.step()



tensor(0., device='cuda:0') 0.056617985941811166
tensor(0.1517, device='cuda:0') 0.15244485102653757
tensor(-1.3019, device='cuda:0') 0.018076816826495798
tensor(1.00000e-02 *
       9.8138, device='cuda:0') 0.9488543210558013
tensor(1.00000e-02 *
       4.1340, device='cuda:0') 0.898397321348071
tensor(0.6101, device='cuda:0') 0.9999999999999998
tensor(1.00000e-02 *
       1.1036, device='cuda:0') 0.9488543210558013
tensor(1.00000e-03 *
       5.1819, device='cuda:0') 0.9488543210558013
tensor(1.00000e-03 *
       6.8021, device='cuda:0') 0.9488543210558013
tensor(1.00000e-03 *
       3.3121, device='cuda:0') 0.9488543210558013
tensor(-0.4958, device='cuda:0') 0.8105842459701871
tensor(1.00000e-03 *
       2.5662, device='cuda:0') 0.9488543210558013
tensor(-5.3906, device='cuda:0') 0.07243975703425146
tensor(1.00000e-03 *
       9.2837, device='cuda:0') 0.9488543210558013
tensor(1.00000e-03 *
       3.4676, device='cuda:0') 0.9488543210558013
tensor(1.00000e-03 *
       1.1435, device

In [13]:
# Verify the result 
# The combination of five operators gives a value close to zero
op = activations.data.tolist()[0]
stri = '-4.0{}0.3{}(-4.0){}(-0.7){}2.0{}(-0.5)'.format(opdic[op[0]],opdic[op[1]],opdic[op[2]],opdic[op[3]],opdic[op[4]])
print (op,stri,'=',eval(stri))

[3, 2, 3, 2, 3] -4.0*0.3/(-4.0)*(-0.7)/2.0*(-0.5) = 0.0525
