# Import of NATS-Bench and ImageNet16

In [None]:
!pip install nats_bench
!pip install xautodl

!wget 'https://www.dropbox.com/s/pasubh1oghex3g9/?dl=1' -O 'NATS-tss-v1_0-3ffb9-simple.tar'

In [None]:
import tarfile
!wget 'https://www.dropbox.com/s/o2fg17ipz57nru1/?dl=1' -O ImageNet16.tar.gz
file = tarfile.open('ImageNet16.tar.gz')
file.extractall('.')
file.close()
!tar xvf "NATS-tss-v1_0-3ffb9-simple.tar"


# Create the API instance for the topology search space in NATS

In [None]:
from nats_bench import create
api = create("/content/NATS-tss-v1_0-3ffb9-simple", 'tss', fast_mode=True, verbose=False)

# Imports of packages

In [None]:
import numpy as np, collections
import pandas as pd
import torch
import torch.nn as nn
import torchvision
import torchvision.datasets as dset
import torchvision.transforms as transforms
from xautodl.models import get_cell_based_tiny_net, get_search_spaces, CellStructure, get_search_spaces
from xautodl.utils import get_model_infos, obtain_accuracy
from xautodl.datasets.DownsampledImageNet import ImageNet16
import random
import os
import copy
import math
from scipy import stats
import time
import collections
import os, sys, time, glob, random, argparse
from copy import deepcopy
from collections import defaultdict

torch.backends.cudnn.enabled = True
torch.backends.cudnn.benchmark = True


# Settings for trainloader and data augmentation

In [None]:
def get_datasets(name):
    if name == "cifar10":
        mean = [x / 255 for x in [125.3, 123.0, 113.9]]
        std = [x / 255 for x in [63.0, 62.1, 66.7]]
    elif name == "cifar100":
        mean = [x / 255 for x in [129.3, 124.1, 112.4]]
        std = [x / 255 for x in [68.2, 65.4, 70.4]]
    elif name.startswith("ImageNet16"):
        mean = [x / 255 for x in [122.68, 116.66, 104.01]]
        std = [x / 255 for x in [63.22, 61.26, 65.09]]
    else:
        raise TypeError("Unknown dataset : {:}".format(name))

    # Data Argumentation
    if name == "cifar10" or name == "cifar100":
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean, std),
        ])
    elif name.startswith("ImageNet16"):
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean, std),
        ])
    
    if name == "cifar10": 
      trainset = dset.CIFAR10("/content/Cifar10", train=True, transform = transform, download=True)
    elif name == "cifar100": 
      trainset = dset.CIFAR100("/content/Cifar100", train=True ,transform = transform, download=True)
    elif name.startswith("ImageNet16"): 
      trainset = ImageNet16("ImageNet16", train=True, transform = transform)
    else:
      raise TypeError("Unknown dataset : {:}".format(name))
    
    batch_size = 128
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,shuffle=True, num_workers=0, pin_memory = True)
    return trainloader, batch_size

datasets = ["cifar10", "cifar100", "ImageNet16-120"]

# Calculation of Hamming Distance as in NASWOT paper -> https://arxiv.org/pdf/2006.04647v3.pdf

In [None]:
def hamming_distance(x, y):
  return np.count_nonzero((np.logical_xor(x, y)))

def counting_forward_hook(module, inp, out):
  if isinstance(inp, tuple):
    inp = inp[0]
  inp = inp.view(inp.size(0), -1)
  inp = (inp > 0).float()
  global Ktemp
  res = np.zeros((inp.shape[0],inp.shape[0])) 
  Na = inp.shape[1]
  for i in range(inp.shape[0]):
    res[i,i] = Na #on the diagonal there are elements with high similarity
    for j in range(i+1,inp.shape[0]):
      res[i,j] = Na - hamming_distance(inp[i,:].cpu().numpy(), inp[j,:].cpu().numpy())  #hamming distance
      res[j,i] = res[i,j]
  Ktemp = Ktemp + res


# Variation for calculation of Kernel Matrix


In [None]:
def counting_forward_hook(module, inp, out):
  if isinstance(inp, tuple):
      inp = inp[0]
  inp = inp.view(inp.size(0), -1)
  x = (inp > 0).float()
  # K matrix is not calcuated with Hamming Distance but with Simon H Operator 
  K = x @ x.t() #dot product between tensor (all ones) and its own transposed
  K2 = (1.-x) @ (1.-x.t()) #dot product between tensor (all zeros) and its transposed
  global Ktemp
  Ktemp = Ktemp + K.cpu().numpy() + K2.cpu().numpy()


def init(m):
    if isinstance(m, (torch.nn.Conv2d, torch.nn.Linear)):
        torch.nn.init.xavier_normal_(m.weight)

# this is the logarithm of the determinant of K 
def hooklogdet(Ktemp, labels=None):
  s, ld = np.linalg.slogdet(Ktemp)
  return ld

# Setting CUDA device


In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

#STEP 1 - Score at initialization

In [None]:
for dataset in datasets:
  train_loader, batch_size = get_datasets(dataset)
  #for each dataset we save all the indexes of the architectures with:
  # -their own metric scores (Kernel Matrix)
  # -the correspondent accuracy score for 200 epochs
  # -time to configurate the network, calculate the logarithm of the determinant and output the accuracy
  for i in range(len(api)):    
    start = time.time()
    config = api.get_net_config(i, dataset)
    network = get_cell_based_tiny_net(config)
    network.apply(init)

    for name, module in network.named_modules():
      if (isinstance(module, torch.nn.modules.activation.ReLU)):
        # register on each Relu a forward hook
        module.register_forward_hook(counting_forward_hook)

    network = network.to(device)

    # initialize the kernel matrix
    Ktemp = torch.tensor(np.zeros((batch_size,batch_size))).to(device)
    data_iterator = iter(train_loader)
    x, target = next(data_iterator)
    x, target = x.to(device), target.to(device)
    # forward data to network 
    network(x)
    # log of the determinant
    score = hooklogdet(Ktemp.cpu().detach().numpy(), target)
    
    del Ktemp, network, data_iterator

    acc = api.get_more_info(int(i),dataset,is_random=False,hp=200)["test-accuracy"]
    
    # considering time to compute the score
    t = time.time()-start
    csv_dict = {'Dataset': dataset, 'Network': i, 'Metric': score, 'Accuracy': acc, 'Time': t}
    result = pd.DataFrame([csv_dict])
    result.to_csv(f'out_{dataset}.csv', mode='a', index=False, header=False)
    del result


#STEP 2

## a) NASWOT search algorithm for picking the higher naswot metric model using different sample sizes

In [None]:
iterations = 30
sample_sizes = [50,100,200,400]

for dataset in datasets:  
  for sample in sample_sizes:
    for run in range(iterations):
      #get the train loader for each dataset with its own data augmentation and transformation
      train_loader, batch_size = get_datasets(dataset)

      scores = []
      #pick a random sample of indexes in order to find the best one between them
      networks = np.random.randint(0,len(api),size=sample)  
      
      start = time.time() 

      for i in networks:
        #starting the configuration of each network
        config = api.get_net_config(i, dataset)
        network = get_cell_based_tiny_net(config)
        network.apply(init)
        #applying on each Relu a forward hook
        for name, module in network.named_modules():
          if (isinstance(module, torch.nn.modules.activation.ReLU)):
            module.register_forward_hook(counting_forward_hook)

        network = network.to(device)
        # initialize the kernel matrix
        Ktemp = torch.tensor(np.zeros((batch_size,batch_size))).to(device)
        data_iterator = iter(train_loader)
        x, target = next(data_iterator)
        x, target = x.to(device), target.to(device)
        # forward data to network 
        network(x)
        score = hooklogdet(Ktemp.cpu().detach().numpy(), target)
        
        del Ktemp, network, data_iterator
        
        scores.append(score)

      #select the best scoring function based on the naswot metric score
      best_net = np.argmax(scores)
      acc = api.get_more_info(int(networks[best_net]),dataset,is_random=False,hp=200)['test-accuracy']
      t = time.time()-start
      #save the name of the dataset, the number of sample, the index of the correspondant network, the naswot metric score, the accuracy for 200 epochs and finally the time to calculate everything
      csv_dict = {'Dataset': dataset, 'Sample_size' : sample,'Network': networks[best_net], 'Metric': scores[best_net], 'Accuracy': acc, 'Time': t}
      result = pd.DataFrame([csv_dict])
      result.to_csv('scores.csv', mode='a', index=False, header=False )

## Functions to calculate means and standard deviations for:
### -metric
### -accuracy for 200 epochs
### -time

In [None]:
df = pd.read_csv("/content/scores.csv",names = ['Dataset','size','Network','Metric','Accuracy','Time'], header = None, index_col=['Dataset','Network'])

metric = df.groupby(by = ['Dataset','size'])['Metric'].describe()[['mean','std']]
accuracy = df.groupby(by = ['Dataset','size'])['Accuracy'].describe()[['mean','std']]
time = df.groupby(by = ['Dataset','size'])['Time'].describe()[['mean','std']]
data = pd.concat([metric,accuracy,time],axis=1)
data.to_csv('stats_naswot.csv')
data.columns = ['metric_mean','metric_std','acc_mean','acc_std','time_mean','time_std']

## b) NASWOT search using validation accuracy for picking optimal network after 12 epochs of training, considering different sizes

In [None]:
iterations = 30
sample_sizes = [50,100,200,400]

for dataset in datasets:  
  for sample in sample_sizes:
    for run in range(iterations):

      #  training time
      t = 0

      start = time.time()

      # save the 12 epoch accuracies
      accuracies = []
      networks = np.random.randint(0,len(api),size=sample)

      for i in networks:
        # get_more_info gives accuracy and time to train for 12 epoches
        accuracies.append(api.get_more_info(int(i),dataset,is_random=False,hp=12)['test-accuracy']) 
        t += api.get_more_info(int(i),dataset,is_random=False,hp=12)['train-all-time']

      # choose the best network for 12 epochs test-accuracy
      best_net = np.argmax(accuracies)
      
      # consider the 200 epoch test accuracy 
      final_acc = api.get_more_info(int(networks[best_net]),dataset,is_random=False,hp=200)['test-accuracy']
      t += (time.time()-start)
      #save the name of the dataset, the number of sample, the index of the correspondant network, 
      #the accuracy for 12 epochs, the accuracy for 200 epochs and finally the time to:
      ## -train for 12 epochs taken from the api
      ## -calculate accuracies for 12 and 200 epochs
      ## -choose the best network between accuracy for 12 epochs
      csv_dict = {'Dataset': dataset, 'size':sample, 'Network': networks[best_net], 'Acc12Epochs': accuracies[best_net], 'Acc200Epochs': final_acc, 'Time': t}
      result = pd.DataFrame([csv_dict])
      result.to_csv('accuracies.csv', mode='a', index=False, header=False )

## Functions to calculate means and standard deviations for:
### -accuracy for 12 epochs
### -accuracy for 200 epochs
### -time

In [None]:
df = pd.read_csv("/content/accuracies.csv",names = ['Dataset','size','Network','Accuracy12','Accuracy200','Time'], header = None, index_col=['Dataset','Network'])
accuracy12 = df.groupby(by = ['Dataset','size'])['Accuracy12'].describe()[['mean','std']]
accuracy200 = df.groupby(by = ['Dataset','size'])['Accuracy200'].describe()[['mean','std']]
time = df.groupby(by = ['Dataset','size'])['Time'].describe()[['mean','std']]
data = pd.concat([accuracy12,accuracy200,time],axis=1)
data.to_csv('stats_accuracy.csv')
data.columns = ['acc12_mean','acc12_std','acc_mean','acc_std','time_mean','time_std']