In [2]:
%load_ext autoreload
%autoreload 2

In [61]:
import os
import sys
import json
import time
import single_model_profiles as smp
import profiler
import end_to_end_profiles as e2e_profs
import numpy as np
from IPython.display import display


In [10]:
profs = smp.load_single_model_profiles()

In [12]:
profs.keys()

dict_keys(['alexnet', 'res152', 'res18', 'res50', 'inception', 'tf-kernel-svm', 'tf-lang-detect', 'tf-log-reg', 'tf-lstm', 'tf-nmt', 'tf-resnet-feats'])

In [20]:
dag = profiler.get_logical_pipeline("pipeline_three")
with open(os.path.abspath("../results/e2e_profs/systemx/resnet_cascade/slo_500ms/alex_1-r50_1-r152_2-171025_083730.json")) as f:
    sample_run = json.load(f)
scale_factors = profiler.get_node_scale_factors(sample_run, dag.root)

In [62]:
class ModelProfile(object):
    
    def __init__(self, profile, scale_factor):
        self.profile = profile
        self.scale_factor = scale_factor
    
    def estimate_performance(self, num_cpus, gpu_type, batch_size):
        """
        Estimates the model's performance under the specified configuration.
        
        Parameters:
        -----------
        num_cpus : int
            The number of virtual cpus allocated to model
        gpu_type : str
            Which type of GPU this model is using. Can be None, "p100", "k80", "v100".
        batch_size : int
            The batch size for the model
        
        Returns:
        --------
        tuple : (p99_latency, throughput, cost)
            Returns estimated latency, throughput, and cost for this configuration.
            If there is not an exact batch size match, the profiler will perform linear
            interpolation.
            
        Raises:
        -------
        A RuntimeException will be raised if the model has not been profiled under the requested configuration.
        """
        resource_bundle_matches = self.profile[(self.profile.gpu_type == gpu_type)
                                             & (self.profile.num_cpus_per_replica == num_cpus)]
        resource_bundle_matches = resource_bundle_matches.sort_values("mean_batch_size")
        glb = resource_bundle_matches['mean_batch_size'] <= batch_size
        lub = resource_bundle_matches['mean_batch_size'] >= batch_size
        idx_glb = resource_bundle_matches.loc[resource_bundle_matches.index[glb], 'mean_batch_size'].idxmax()
        idx_lub = resource_bundle_matches.loc[resource_bundle_matches.index[lub], 'mean_batch_size'].idxmin()
        relevant_entries = resource_bundle_matches.loc[idx_glb:idx_lub]
        assert np.all(np.diff(relevant_entries["mean_throughput_qps"]) > 0)
        estimated_thruput = np.interp(batch_size,
                                      relevant_entries["mean_batch_size"],
                                      relevant_entries["mean_throughput_qps"])
        
        assert np.all(np.diff(relevant_entries["p99_latency"]) > 0)
        estimated_latency = np.interp(batch_size,
                                      relevant_entries["mean_batch_size"],
                                      relevant_entries["p99_latency"])
        # The cost for all the entries with the same resource bundle is the same,
        # so we just get it from the first entry
        cost = relevant_entries["cost"].iloc[0]
        return (estimated_latency, estimated_thruput, cost)
        

        
        

In [64]:
test_prof = ModelProfile(profs["res50"], scale_factors["res50"])
test_prof.estimate_performance(batch_size=6, gpu_type="p100", num_cpus=2)

AssertionError: 

In [52]:
matches = test_prof.estimate_performance(2, "p100", 16)
matches = matches.sort_values("mean_batch_size")
glb = matches['mean_batch_size'] <= batch_size
lub = matches['mean_batch_size'] >= batch_size
idx_glb = matches.loc[matches.index[glb], 'mean_batch_size'].idxmax()
idx_lub = matches.loc[matches.index[lub], 'mean_batch_size'].idxmin()
display(matches.loc[idx_glb:idx_lub])
matches


# idx_glb, idx_lub


Unnamed: 0,num_cpus_per_replica,mean_throughput_qps,std_throughput_qps,p99_latency,mean_batch_size,cost,fname,cloud,gpu_type
58,2,171.691247,2.764047,0.609447,26.784837,0.795,results-p100-2-32-180201_232825.json,gcp,p100
60,2,172.551915,0.879018,1.125017,44.753849,0.795,results-p100-2-48-180202_000055.json,gcp,p100


Unnamed: 0,num_cpus_per_replica,mean_throughput_qps,std_throughput_qps,p99_latency,mean_batch_size,cost,fname,cloud,gpu_type
51,2,56.892347,0.122212,0.108707,1.0,0.795,results-p100-2-1-180201_023311.json,gcp,p100
52,2,60.85229,0.160357,0.097816,1.0,0.795,results-p100-2-1-180201_054538.json,gcp,p100
55,2,116.630455,0.455269,0.06555,1.95021,0.795,results-p100-2-2-180201_060645.json,gcp,p100
59,2,150.366386,0.566909,0.09233,3.805082,0.795,results-p100-2-4-180201_062346.json,gcp,p100
62,2,105.351405,2.189242,0.524941,8.0,0.795,results-p100-2-8-180201_195554.json,gcp,p100
53,2,173.329049,2.126446,0.239299,11.993266,0.795,results-p100-2-12-180201_201136.json,gcp,p100
54,2,154.347975,2.686674,0.599024,16.0,0.795,results-p100-2-16-180201_215116.json,gcp,p100
56,2,170.635352,2.619203,0.496442,18.970265,0.795,results-p100-2-20-180201_220804.json,gcp,p100
57,2,171.259816,1.353511,0.762376,24.0,0.795,results-p100-2-24-180201_222522.json,gcp,p100
58,2,171.691247,2.764047,0.609447,26.784837,0.795,results-p100-2-32-180201_232825.json,gcp,p100


In [57]:
matches["cost"].iloc[0]

0.79499999999999993

In [None]:
def estimate_performance(logical_pipeline, scale_factors, single_model_profiles