In [89]:
class ComputeLatencyIndex():
    """Predictor class that predicts the efficiency of architecture given the accuracy predictor, 
    arthemetic intensity precictor and latency predictor. A new parameter is calculated and we call is compute_latency_index(cli)
    
    ref: [paper]

    """
    
    def __init__(self, accuracy_predictor, ai_predictor, latency_predictor, weights = [0.5, 0.5]):
        self.ai = ai_predictor
        self.lat = latency_predictor
        self.acc = accuracy_predictor    #expects list of samples
        self.wts = weights
        
    
    def predict_efficiency(self, sample):   #Computes CLI of latency.
        arth_int = 1/self.ai.predict_efficiency(sample)  #actualy returns 1/arth_intensity
        latency = self.lat.predict_efficiency(sample)
        acc = self.acc.predict_accuracy([sample]).item()
        
        cli =  self.wts[1]*arth_int/latency + self.wts[0]*acc
        
        return cli  #inverse in order to make it the minimization problem
        
    

In [22]:
cfg = {'ks': [7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7], 'e': [6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6], 'd': [4, 4, 4, 4, 4]}


cfg['r']= [224]

In [6]:
# accuracy predictor

cuda_available = False

from ofa.accuracy_predictor import AccuracyPredictor
from ofa.flops_table import ArthIntTable


accuracy_predictor = AccuracyPredictor(
    pretrained=True,
    device='cuda:0' if cuda_available else 'cpu'
)

print('The accuracy predictor is ready!')
print(accuracy_predictor.model)


The accuracy predictor is ready!
Sequential(
  (0): Linear(in_features=128, out_features=400, bias=True)
  (1): ReLU()
  (2): Linear(in_features=400, out_features=400, bias=True)
  (3): ReLU()
  (4): Linear(in_features=400, out_features=400, bias=True)
  (5): ReLU()
  (6): Linear(in_features=400, out_features=1, bias=True)
)


In [7]:
from fpga_utils.latency_estimation import LatencyTable  

arthemetic_intensity_lookup = ArthIntTable(pred_type='arthemetic_intensity', 
                                  device='cuda:0' if cuda_available else 'cpu',batch_size=1, 
                                  )

latency_estimator = LatencyTable()

print('The  Efficient Arthemetic intensity predictor is ready!')

Building the arthemetic_intensity lookup table (resolution=224)...
Built the arthemetic_intensity lookup table (resolution=224)!
The  Efficient Arthemetic intensity predictor is ready!


In [90]:
predictor = ComputeLatencyIndex(accuracy_predictor, arthemetic_intensity_lookup, latency_estimator)

In [91]:
predictor.predict_efficiency(cfg)

0.7384103622795521

In [26]:
import torch 
from ofa.imagenet_classification.elastic_nn.networks.ofa_mbv3 import OFAMobileNetV3
from ofa.model_zoo import ofa_net
from ofa.utils import download_url

net_id  = 'ofa_mbv3_d234_e346_k357_w1.2'
url_base = "https://raw.githubusercontent.com/han-cai/files/master/ofa/ofa_nets/"

ofa_network = OFAMobileNetV3(
            dropout_rate=0,
            width_mult=1.2,
            ks_list=[3, 5, 7],
            expand_ratio_list=[3, 4, 6],
            depth_list=[2, 3, 4],
        )

pt_path = download_url(url_base + net_id, model_dir=".torch/ofa_nets")
init = torch.load(pt_path, map_location="cpu")["state_dict"]
ofa_network.load_state_dict(init)
print('Supernetwork Ready')

Supernetwork Ready


In [78]:
from ofa.evolution_finder import ArchManager
eff = []
samples = []
arch =  ArchManager()
for i in range(10000):
    sample = arch.random_sample()
    samples+=[sample]
    eff+=[predictor.predict_efficiency(sample)]
    

In [80]:

eff.index(min(eff)), max(eff)

(5248, 1.2582569625058737)

In [82]:
samples[eff.index(max(eff))]

{'wid': None,
 'ks': [7, 7, 7, 5, 5, 7, 3, 3, 7, 7, 5, 7, 3, 7, 3, 5, 7, 3, 5, 3],
 'e': [3, 6, 6, 6, 3, 6, 6, 6, 6, 6, 6, 4, 3, 4, 3, 4, 6, 3, 6, 6],
 'd': [4, 2, 2, 3, 4],
 'r': [224]}

In [77]:
sm

[{'wid': None,
  'ks': [5, 7, 7, 7, 3, 7, 3, 7, 7, 3, 7, 7, 3, 5, 7, 5, 3, 5, 5, 7],
  'e': [3, 4, 3, 4, 3, 4, 4, 4, 6, 4, 3, 3, 3, 3, 4, 4, 3, 6, 6, 6],
  'd': [2, 2, 2, 3, 4],
  'r': [224]}]