# Epsilon metric reproduction

If you wish to reproduce the results presented in our paper from scratch, feel free to use the below code. Since\
the version of torch used by the authors of NAS-Bench-NLP is too old, we run our tests on everything on CPU. \
While the procedure is quite light, it still took us several hours to evaluate the whole benchmark dataset of over\
14k architectures.

We provide the codes to reproduce the results for single run and multiple runs architectures on PennTreebank data.

In [2]:
import os
import json
import data

import numpy as np
import pickle as pkl
import seaborn as sns
from tqdm import trange

import torch.nn as nn

from utils import batchify
from argparse import Namespace
from model import AWDRNNModel
from train import train, evaluate
from utils import get_batch
from multilinear import MultiLinear
from custom_rnn import CustomRNNCell, CustomRNN
from weight_drop import ParameterListWeightDrop
from epsilon_utils import prepare_seed, prepare_recepies

## Single run with Penn Tree Bank

14322 randomly created architectures are trained with a single seed.

Note: this procedure takes about 5 hours on CPU.

In [3]:
# Prepare a fixed batch of data
file_list=os.listdir("train_logs_single_run/")
log_dflt = json.load(open('train_logs_single_run/' + file_list[0], 'r'))
args = Namespace(**log_dflt)
corpus = data.Corpus(args.data)

ntokens = len(corpus.dictionary)
batch_size = 256

train_eval_data = batchify(corpus.train, batch_size, args, "cpu")
x, _ = get_batch(train_eval_data, 0, args, evaluation=True)

In [4]:
# Setting up the weights
weight_l = 1e-5
weight_h = 1e-3

In [7]:
accs = []
nparams = []

score = []

for i in trange(len(file_list)):
    file = file_list[i]
    log = json.load(open('train_logs_single_run/' + file, 'r'))
    args = Namespace(**log)

    # Build the model
    network = AWDRNNModel(args.model,
                          ntokens,
                          args.emsize,
                          args.nhid,
                          args.nlayers,
                          args.dropout,
                          args.dropouth,
                          args.dropouti,
                          args.dropoute,
                          args.wdrop,
                          args.tied,
                          args.recepie,
                          verbose=False)
    print(network.modules)
    preds = []
    for weight in [weight_l, weight_h]:
        # Initialize
        prepare_seed(21)
        def initialize_resnet(m):
            if type(m)==MultiLinear:
                for par in m.weights_raw:
                    nn.init.constant_(par, weight)
            elif type(m)==CustomRNNCell:
                for par in m.parameters():
                    nn.init.constant_(par, weight)
            elif type(m)==nn.modules.linear.Linear:
                nn.init.constant_(m.weight, weight)
            elif type(m)==nn.modules.container.ParameterList:
                for par in m.parameters():
                    nn.init.constant_(par, weight)
            elif type(m)==CustomRNN:
                initialize_resnet(m.cell)
            elif type(m)==ParameterListWeightDrop:
                initialize_resnet(m.module)
            elif type(m)==nn.modules.container.ModuleDict:
                for sub_m in m:
                    initialize_resnet(sub_m)
            elif type(m)==nn.modules.container.ModuleList:
                for sub_m in m:
                    initialize_resnet(sub_m)
            elif type(m)==AWDRNNModel:
                initialize_resnet(m.rnns)

        network.apply(initialize_resnet)
        network.eval()
        hidden = network.init_hidden(batch_size, weight)
        # Take care of embedding is not constant
        nn.init.uniform_(network.encoder.weight, 0, 1)
        _, _, raw_output, _ = network(x, hidden=hidden, return_h=True)
        pred = raw_output[-1][:,:,0].flatten()
        pred = pred.numpy()
        pred_min = np.nanmin(pred)
        pred_max = np.nanmax(pred)
        pred_norm = (pred - pred_min)/(pred_max - pred_min)
        preds.append(pred_norm)

    # Compute the score
    preds = np.array(preds)
    preds[np.where(preds==0)] = np.nan
    mae = np.nanmean(np.abs(preds[0,:]-preds[1,:]))
    mean = np.nanmean(preds)
    score.append(mae/mean)
    
    try:
        accs.append(log['test_losses'][-1])
    except:
        # Some architectures have no reported test perplexity
        accs.append(np.nan)
    nparams.append(args.num_params)
    
# Save your results
save_dir = './results/NLP/PTB/SINGLE/WEIGHT_{}_{}/BS{}/'.format(weight_l, weight_h, batch_size)
os.makedirs(save_dir, exist_ok=True)

save_dic = {}
save_dic["score"] = score
save_dic["accs"] = accs
save_dic["nparams"] = nparams
    
pkl.dump(save_dic, open(save_dir + "Data", "wb"))



<bound method Module.modules of AWDRNNModel(
  (lockdrop): LockedDropout()
  (idrop): Dropout(p=0.4, inplace=False)
  (hdrop): Dropout(p=0.25, inplace=False)
  (drop): Dropout(p=0.1, inplace=False)
  (encoder): Embedding(10000, 400)
  (rnns): ModuleList(
    (0): ParameterListWeightDrop(
      (module): CustomRNN(
        (cell): CustomRNNCell(
          (components): ModuleDict(
            (node_7): MultiLinear(
              input_sizes=[400, 600], output_size=600
              (weights): None
              (weights_raw): ParameterList(
                  (0): Parameter containing: [torch.FloatTensor of size 600x400]
                  (1): Parameter containing: [torch.FloatTensor of size 600x600]
              )
            )
            (node_12): MultiLinear(
              input_sizes=[600, 600], output_size=600
              (weights): None
              (weights_raw): ParameterList(
                  (0): Parameter containing: [torch.FloatTensor of size 600x600]
                 

  0%|                                                                                          | 1/14322 [00:00<2:55:16,  1.36it/s]

<bound method Module.modules of AWDRNNModel(
  (lockdrop): LockedDropout()
  (idrop): Dropout(p=0.4, inplace=False)
  (hdrop): Dropout(p=0.25, inplace=False)
  (drop): Dropout(p=0.1, inplace=False)
  (encoder): Embedding(10000, 400)
  (rnns): ModuleList(
    (0): ParameterListWeightDrop(
      (module): CustomRNN(
        (cell): CustomRNNCell(
          (components): ModuleDict(
            (node_0): MultiLinear(
              input_sizes=[600, 600], output_size=600
              (weights): None
              (weights_raw): ParameterList(
                  (0): Parameter containing: [torch.FloatTensor of size 600x600]
                  (1): Parameter containing: [torch.FloatTensor of size 600x600]
              )
            )
            (node_2): MultiLinear(
              input_sizes=[400, 600, 600], output_size=600
              (weights): None
              (weights_raw): ParameterList(
                  (0): Parameter containing: [torch.FloatTensor of size 600x400]
             

  0%|                                                                                          | 2/14322 [00:02<6:13:01,  1.56s/it]

<bound method Module.modules of AWDRNNModel(
  (lockdrop): LockedDropout()
  (idrop): Dropout(p=0.4, inplace=False)
  (hdrop): Dropout(p=0.25, inplace=False)
  (drop): Dropout(p=0.1, inplace=False)
  (encoder): Embedding(10000, 400)
  (rnns): ModuleList(
    (0): ParameterListWeightDrop(
      (module): CustomRNN(
        (cell): CustomRNNCell(
          (components): ModuleDict(
            (node_0): MultiLinear(
              input_sizes=[400, 600], output_size=600
              (weights): None
              (weights_raw): ParameterList(
                  (0): Parameter containing: [torch.FloatTensor of size 600x400]
                  (1): Parameter containing: [torch.FloatTensor of size 600x600]
              )
            )
            (node_2): MultiLinear(
              input_sizes=[600, 600], output_size=600
              (weights): None
              (weights_raw): ParameterList(
                  (0): Parameter containing: [torch.FloatTensor of size 600x600]
                  

  0%|                                                                                          | 3/14322 [00:04<5:25:39,  1.36s/it]

<bound method Module.modules of AWDRNNModel(
  (lockdrop): LockedDropout()
  (idrop): Dropout(p=0.4, inplace=False)
  (hdrop): Dropout(p=0.25, inplace=False)
  (drop): Dropout(p=0.1, inplace=False)
  (encoder): Embedding(10000, 400)
  (rnns): ModuleList(
    (0): ParameterListWeightDrop(
      (module): CustomRNN(
        (cell): CustomRNNCell(
          (components): ModuleDict(
            (node_0): MultiLinear(
              input_sizes=[600, 400], output_size=600
              (weights): None
              (weights_raw): ParameterList(
                  (0): Parameter containing: [torch.FloatTensor of size 600x600]
                  (1): Parameter containing: [torch.FloatTensor of size 600x400]
              )
            )
            (node_2): MultiLinear(
              input_sizes=[600, 400], output_size=600
              (weights): None
              (weights_raw): ParameterList(
                  (0): Parameter containing: [torch.FloatTensor of size 600x600]
                  

  0%|                                                                                          | 4/14322 [00:05<5:44:51,  1.45s/it]

<bound method Module.modules of AWDRNNModel(
  (lockdrop): LockedDropout()
  (idrop): Dropout(p=0.4, inplace=False)
  (hdrop): Dropout(p=0.25, inplace=False)
  (drop): Dropout(p=0.1, inplace=False)
  (encoder): Embedding(10000, 400)
  (rnns): ModuleList(
    (0): ParameterListWeightDrop(
      (module): CustomRNN(
        (cell): CustomRNNCell(
          (components): ModuleDict(
            (node_2): MultiLinear(
              input_sizes=[600, 400], output_size=600
              (weights): None
              (weights_raw): ParameterList(
                  (0): Parameter containing: [torch.FloatTensor of size 600x600]
                  (1): Parameter containing: [torch.FloatTensor of size 600x400]
              )
            )
            (node_6): MultiLinear(
              input_sizes=[600, 600, 400], output_size=600
              (weights): None
              (weights_raw): ParameterList(
                  (0): Parameter containing: [torch.FloatTensor of size 600x600]
             

  0%|                                                                                          | 5/14322 [00:07<5:55:42,  1.49s/it]

<bound method Module.modules of AWDRNNModel(
  (lockdrop): LockedDropout()
  (idrop): Dropout(p=0.4, inplace=False)
  (hdrop): Dropout(p=0.25, inplace=False)
  (drop): Dropout(p=0.1, inplace=False)
  (encoder): Embedding(10000, 400)
  (rnns): ModuleList(
    (0): ParameterListWeightDrop(
      (module): CustomRNN(
        (cell): CustomRNNCell(
          (components): ModuleDict(
            (node_0): MultiLinear(
              input_sizes=[600, 400], output_size=600
              (weights): None
              (weights_raw): ParameterList(
                  (0): Parameter containing: [torch.FloatTensor of size 600x600]
                  (1): Parameter containing: [torch.FloatTensor of size 600x400]
              )
            )
            (node_4): MultiLinear(
              input_sizes=[400, 600], output_size=600
              (weights): None
              (weights_raw): ParameterList(
                  (0): Parameter containing: [torch.FloatTensor of size 600x400]
                  

  0%|                                                                                          | 6/14322 [00:08<5:40:26,  1.43s/it]

<bound method Module.modules of AWDRNNModel(
  (lockdrop): LockedDropout()
  (idrop): Dropout(p=0.4, inplace=False)
  (hdrop): Dropout(p=0.25, inplace=False)
  (drop): Dropout(p=0.1, inplace=False)
  (encoder): Embedding(10000, 400)
  (rnns): ModuleList(
    (0): ParameterListWeightDrop(
      (module): CustomRNN(
        (cell): CustomRNNCell(
          (components): ModuleDict(
            (node_0): MultiLinear(
              input_sizes=[600, 400], output_size=600
              (weights): None
              (weights_raw): ParameterList(
                  (0): Parameter containing: [torch.FloatTensor of size 600x600]
                  (1): Parameter containing: [torch.FloatTensor of size 600x400]
              )
            )
            (node_3): MultiLinear(
              input_sizes=[600, 600], output_size=600
              (weights): None
              (weights_raw): ParameterList(
                  (0): Parameter containing: [torch.FloatTensor of size 600x600]
                  

  0%|                                                                                          | 7/14322 [00:09<5:21:24,  1.35s/it]

<bound method Module.modules of AWDRNNModel(
  (lockdrop): LockedDropout()
  (idrop): Dropout(p=0.4, inplace=False)
  (hdrop): Dropout(p=0.25, inplace=False)
  (drop): Dropout(p=0.1, inplace=False)
  (encoder): Embedding(10000, 400)
  (rnns): ModuleList(
    (0): ParameterListWeightDrop(
      (module): CustomRNN(
        (cell): CustomRNNCell(
          (components): ModuleDict(
            (node_0): MultiLinear(
              input_sizes=[600, 600], output_size=600
              (weights): None
              (weights_raw): ParameterList(
                  (0): Parameter containing: [torch.FloatTensor of size 600x600]
                  (1): Parameter containing: [torch.FloatTensor of size 600x600]
              )
            )
            (node_3): MultiLinear(
              input_sizes=[400, 600], output_size=600
              (weights): None
              (weights_raw): ParameterList(
                  (0): Parameter containing: [torch.FloatTensor of size 600x400]
                  

  0%|                                                                                          | 8/14322 [00:11<5:25:37,  1.36s/it]

<bound method Module.modules of AWDRNNModel(
  (lockdrop): LockedDropout()
  (idrop): Dropout(p=0.4, inplace=False)
  (hdrop): Dropout(p=0.25, inplace=False)
  (drop): Dropout(p=0.1, inplace=False)
  (encoder): Embedding(10000, 400)
  (rnns): ModuleList(
    (0): ParameterListWeightDrop(
      (module): CustomRNN(
        (cell): CustomRNNCell(
          (components): ModuleDict(
            (node_0): MultiLinear(
              input_sizes=[400, 600], output_size=600
              (weights): None
              (weights_raw): ParameterList(
                  (0): Parameter containing: [torch.FloatTensor of size 600x400]
                  (1): Parameter containing: [torch.FloatTensor of size 600x600]
              )
            )
            (node_3): MultiLinear(
              input_sizes=[600, 400], output_size=600
              (weights): None
              (weights_raw): ParameterList(
                  (0): Parameter containing: [torch.FloatTensor of size 600x600]
                  

  0%|                                                                                          | 9/14322 [00:12<5:25:21,  1.36s/it]

<bound method Module.modules of AWDRNNModel(
  (lockdrop): LockedDropout()
  (idrop): Dropout(p=0.4, inplace=False)
  (hdrop): Dropout(p=0.25, inplace=False)
  (drop): Dropout(p=0.1, inplace=False)
  (encoder): Embedding(10000, 400)
  (rnns): ModuleList(
    (0): ParameterListWeightDrop(
      (module): CustomRNN(
        (cell): CustomRNNCell(
          (components): ModuleDict(
            (node_0): MultiLinear(
              input_sizes=[600, 400], output_size=600
              (weights): None
              (weights_raw): ParameterList(
                  (0): Parameter containing: [torch.FloatTensor of size 600x600]
                  (1): Parameter containing: [torch.FloatTensor of size 600x400]
              )
            )
            (node_4): MultiLinear(
              input_sizes=[400, 600], output_size=600
              (weights): None
              (weights_raw): ParameterList(
                  (0): Parameter containing: [torch.FloatTensor of size 600x400]
                  

  0%|                                                                                         | 10/14322 [00:13<4:59:43,  1.26s/it]

<bound method Module.modules of AWDRNNModel(
  (lockdrop): LockedDropout()
  (idrop): Dropout(p=0.4, inplace=False)
  (hdrop): Dropout(p=0.25, inplace=False)
  (drop): Dropout(p=0.1, inplace=False)
  (encoder): Embedding(10000, 400)
  (rnns): ModuleList(
    (0): ParameterListWeightDrop(
      (module): CustomRNN(
        (cell): CustomRNNCell(
          (components): ModuleDict(
            (node_3): MultiLinear(
              input_sizes=[600, 600, 400], output_size=600
              (weights): None
              (weights_raw): ParameterList(
                  (0): Parameter containing: [torch.FloatTensor of size 600x600]
                  (1): Parameter containing: [torch.FloatTensor of size 600x600]
                  (2): Parameter containing: [torch.FloatTensor of size 600x400]
              )
            )
            (node_5): MultiLinear(
              input_sizes=[600, 400], output_size=600
              (weights): None
              (weights_raw): ParameterList(
             

  0%|                                                                                         | 11/14322 [00:14<4:36:05,  1.16s/it]

<bound method Module.modules of AWDRNNModel(
  (lockdrop): LockedDropout()
  (idrop): Dropout(p=0.4, inplace=False)
  (hdrop): Dropout(p=0.25, inplace=False)
  (drop): Dropout(p=0.1, inplace=False)
  (encoder): Embedding(10000, 400)
  (rnns): ModuleList(
    (0): ParameterListWeightDrop(
      (module): CustomRNN(
        (cell): CustomRNNCell(
          (components): ModuleDict(
            (node_0): MultiLinear(
              input_sizes=[600, 600], output_size=600
              (weights): None
              (weights_raw): ParameterList(
                  (0): Parameter containing: [torch.FloatTensor of size 600x600]
                  (1): Parameter containing: [torch.FloatTensor of size 600x600]
              )
            )
            (h_new_1): MultiLinear(
              input_sizes=[400, 600], output_size=600
              (weights): None
              (weights_raw): ParameterList(
                  (0): Parameter containing: [torch.FloatTensor of size 600x400]
                 

  0%|                                                                                         | 12/14322 [00:15<5:05:12,  1.28s/it]

<bound method Module.modules of AWDRNNModel(
  (lockdrop): LockedDropout()
  (idrop): Dropout(p=0.4, inplace=False)
  (hdrop): Dropout(p=0.25, inplace=False)
  (drop): Dropout(p=0.1, inplace=False)
  (encoder): Embedding(10000, 400)
  (rnns): ModuleList(
    (0): ParameterListWeightDrop(
      (module): CustomRNN(
        (cell): CustomRNNCell(
          (components): ModuleDict(
            (node_0): MultiLinear(
              input_sizes=[400, 600], output_size=600
              (weights): None
              (weights_raw): ParameterList(
                  (0): Parameter containing: [torch.FloatTensor of size 600x400]
                  (1): Parameter containing: [torch.FloatTensor of size 600x600]
              )
            )
            (node_2): MultiLinear(
              input_sizes=[600, 600], output_size=600
              (weights): None
              (weights_raw): ParameterList(
                  (0): Parameter containing: [torch.FloatTensor of size 600x600]
                  




KeyboardInterrupt: 

In [6]:
network.modules

<bound method Module.modules of AWDRNNModel(
  (lockdrop): LockedDropout()
  (idrop): Dropout(p=0.4, inplace=False)
  (hdrop): Dropout(p=0.25, inplace=False)
  (drop): Dropout(p=0.1, inplace=False)
  (encoder): Embedding(10000, 400)
  (rnns): ModuleList(
    (0): ParameterListWeightDrop(
      (module): CustomRNN(
        (cell): CustomRNNCell(
          (components): ModuleDict(
            (node_0): MultiLinear(
              input_sizes=[600, 600], output_size=600
              (weights): ParameterList(
                  (0): Parameter containing: [torch.FloatTensor of size 600x600]
                  (1): Parameter containing: [torch.FloatTensor of size 600x600]
              )
              (weights_raw): ParameterList(
                  (0): Parameter containing: [torch.FloatTensor of size 600x600]
                  (1): Parameter containing: [torch.FloatTensor of size 600x600]
              )
            )
            (h_new_1): MultiLinear(
              input_sizes=[400, 600], 

## Multiple runs with Penn Tree Bank

4114 randomly created architectures trained with 3 random seeds.

Note: this procedure takes about 1.5 hours on CPU.

In [None]:
# Prepare a fixed batch of data
file_list=os.listdir("train_logs_multi_runs/")
log_dflt = json.load(open('train_logs_multi_runs/' + file_list[0], 'r'))
args = Namespace(**log_dflt)
corpus = data.Corpus(args.data)

ntokens = len(corpus.dictionary)
batch_size = 256

train_eval_data = batchify(corpus.train, batch_size, args, "cpu")
x, _ = get_batch(train_eval_data, 0, args, evaluation=True)

In [None]:
recepie_dic = prepare_recepies()
recepies = list(recepie_dic.keys())

In [None]:
accs_mean = []
accs_min = []
accs_max = []
nparams = []

score = []

for i in trange(len(recepie_dic)):
    rec = recepies[i]
    indices = recepie_dic[rec]
    # As for the same recepie the metric performance does not change,
    # we only need to compute it once
    file = file_list[indices[0]]
    log = json.load(open('train_logs_multi_runs/' + file, 'r'))
    args = Namespace(**log)

    # Build the model
    network = AWDRNNModel(args.model,
                          ntokens,
                          args.emsize,
                          args.nhid,
                          args.nlayers,
                          args.dropout,
                          args.dropouth,
                          args.dropouti,
                          args.dropoute,
                          args.wdrop,
                          args.tied,
                          args.recepie,
                          verbose=False)
    preds = []
    for weight in [weight_l, weight_h]:
        # Initialize
        prepare_seed(21)
        def initialize_resnet(m):
            if type(m)==MultiLinear:
                for par in m.weights_raw:
                    nn.init.constant_(par, weight)
            elif type(m)==CustomRNNCell:
                for par in m.parameters():
                    nn.init.constant_(par, weight)
            elif type(m)==nn.modules.linear.Linear:
                nn.init.constant_(m.weight, weight)
            elif type(m)==nn.modules.container.ParameterList:
                for par in m.parameters():
                    nn.init.constant_(par, weight)
            elif type(m)==CustomRNN:
                initialize_resnet(m.cell)
            elif type(m)==ParameterListWeightDrop:
                initialize_resnet(m.module)
            elif type(m)==nn.modules.container.ModuleDict:
                for sub_m in m:
                    initialize_resnet(sub_m)
            elif type(m)==nn.modules.container.ModuleList:
                for sub_m in m:
                    initialize_resnet(sub_m)
            elif type(m)==AWDRNNModel:
                initialize_resnet(m.rnns)

        network.apply(initialize_resnet)
        network.eval()
        hidden = network.init_hidden(batch_size, weight)
        # Take care of embedding is not constant
        nn.init.uniform_(network.encoder.weight, 0, 1)
        _, _, raw_output, _ = network(x, hidden=hidden, return_h=True)
        pred = raw_output[-1][:,:,0].flatten()
        pred = pred.numpy()
        pred_min = np.nanmin(pred)
        pred_max = np.nanmax(pred)
        pred_norm = (pred - pred_min)/(pred_max - pred_min)
        preds.append(pred_norm)

    # Compute the score
    preds = np.array(preds)
    preds[np.where(preds==0)] = np.nan
    mae = np.nanmean(np.abs(preds[0,:]-preds[1,:]))
    mean = np.nanmean(preds)
    score.append(mae/mean)
    
    # Retrive 3 seeds test errors
    acc_run = []
    for ind in indices:
        file = file_list[ind]
        log = json.load(open('train_logs_multi_runs/' + file, 'r'))
        args = Namespace(**log)
        try:
            acc_run.append(log['test_losses'][-1])
        except: 
            acc_run.append(np.nan)
            
    accs_mean.append(np.nanmean(acc_run))
    accs_min.append(np.nanmin(acc_run))
    accs_max.append(np.nanmax(acc_run))
    nparams.append(args.num_params)

# Save your results
save_dir = './results/NLP/PTB/MULTI/WEIGHT_{}_{}/BS{}/'.format(weight_l, weight_h, batch_size)
os.makedirs(save_dir, exist_ok=True)

save_dic = {}
save_dic["score"] = score
save_dic["accs_mean"] = accs_mean
save_dic["accs_min"] = accs_min
save_dic["accs_max"] = accs_max
save_dic["nparams"] = nparams
    
pkl.dump(save_dic, open(save_dir + "Data", "wb"))