In [1]:
import importlib
import torch
import torch.nn as nn
import itertools
from collections import OrderedDict 
import onnx
import tvm
from onnx import shape_inference
import copy
import onnxruntime as rt
from tvm.contrib import graph_executor
from itertools import combinations
import numpy as np

import os
import graph_generator.op_constraint as opc
import graph_generator.op_projection as opp
import graph_generator.random_graph as rg
import graph_generator.utility
import graph_generator.graph_debug as gd
import utility

In [2]:
combined_model = torch.load("graph_generator/cell4_p6_ER_bug/0_test_error_model.pt")
recorder = torch.load("graph_generator/cell4_p6_ER_bug/0_test_error_recorder.pt")

### First debug for every cell

In [40]:
cell_num = len(recorder.sub_graph_params)
for index in range(cell_num):
    test_cell = opp.Cell(1, *recorder.sub_graph_params[index][1])
    test_cell = test_cell.eval()
    cell, error_result = gd.find_bugs_in_cell_level(test_cell, gd.test_under_pytorch, exec="debug")
    if error_result is not None:
        print("Find error cell! Idx: {}".format(index))
        raise Exception
print("Cell all good")

### Debug sub-graphs

In [7]:
def rebuild_subgraphs(model, recorder, index_list):
    rebuild_subgraphs = list()
    init_input_shape = recorder.sub_graph_params[index_list[0]][-4]
    for idx in index_list:
        input_shape = recorder.sub_graph_params[idx][-4]
        output_shape = model[idx].output_shape
        #cell_graph = opp.get_cell(output_shape)
        params = copy.deepcopy(recorder.sub_graph_params[idx])
        rebuild_subgraph = rg.TestSubgraph(*params)
        rebuild_subgraphs.append(rebuild_subgraph)
    return init_input_shape, rebuild_subgraphs

In [74]:
index = 3
input_shape = recorder.sub_graph_params[index][-4]
output_shape = combined_model[index].output_shape
cell_graph = opp.get_cell(output_shape)
params = copy.deepcopy(recorder.sub_graph_params[index])
#params[1] = cell_graph 
rebuild_subgraph1 = rg.TestSubgraph(*params)

In [8]:
init_input_shape, debug_graphs = rebuild_subgraphs(combined_model, recorder, [0, 2, 3])
debug_graphs = torch.nn.Sequential(*debug_graphs)

In [9]:
random_input = torch.randn(init_input_shape)
result = gd.test_under_pytorch(debug_graphs, random_input, "normal", 3)

To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  /pytorch/aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)


In [None]:
#### sub cell ordered combinations

In [6]:
def find_connected_sequence(index_list, length):
    assert length <= len(index_list)
    result = list()
    for i in range(len(index_list)-length+1):
        result.append(index_list[i : i + length])
    return result

def find_minimal_bug_cells(model, recorder, cell_idx_list, opt_level=3):
    print("Begin exploring {} sequence".format(cell_idx_list))
    sub_cell_length = len(cell_idx_list) - 1
    sub_cell_idx_list = find_connected_sequence(cell_idx_list, sub_cell_length)
    for seq in sub_cell_idx_list:
        seq = sorted(seq)
        init_input_shape, debug_graphs = rebuild_subgraphs(model, recorder, seq)
        debug_graphs = torch.nn.Sequential(*debug_graphs)
        random_input = torch.randn(init_input_shape)
        result = gd.test_under_pytorch(debug_graphs, random_input, "normal", opt_level)
        if result[1] is not None:
            # Find the bug sub cells
            print("Find bug for sequence: {}, bug info: {}".format(seq, result[1]))
            sub_seq = find_minimal_bug_cells(model, recorder, seq)
            return sub_seq
    return cell_idx_list

In [19]:
find_minimal_bug_cells(combined_model, recorder, [i for i in range(len(combined_model))])

Begin exploring [0, 1, 2, 3] sequence


For x86 target, depthwise_conv2d with channel multiplier greater than 1 is not optimized
For x86 target, depthwise_conv2d with channel multiplier greater than 1 is not optimized
For x86 target, depthwise_conv2d with channel multiplier greater than 1 is not optimized
For x86 target, depthwise_conv2d with channel multiplier greater than 1 is not optimized
For x86 target, depthwise_conv2d with channel multiplier greater than 1 is not optimized
For x86 target, depthwise_conv2d with channel multiplier greater than 1 is not optimized
For x86 target, depthwise_conv2d with channel multiplier greater than 1 is not optimized
For x86 target, depthwise_conv2d with channel multiplier greater than 1 is not optimized
For x86 target, depthwise_conv2d with channel multiplier greater than 1 is not optimized
For x86 target, depthwise_conv2d with channel multiplier greater than 1 is not optimized


Find bug for sequence: [1, 2, 3], bug info: 
Not equal to tolerance rtol=0.01, atol=0.01

x and y -inf location mismatch:
 x: array([[[[ -74.31442 , -126.72563 , -111.86792 , ...,   -2.6567  ,
            -2.705109,   -3.041387],
         [ -60.686405, -128.05017 ,  -93.24954 , ...,   -2.662258,...
 y: array([[[[ -74.31447 , -126.72558 , -111.86785 , ...,   -2.6567  ,
            -2.705109,   -3.041387],
         [ -60.686382, -128.05008 ,  -93.24952 , ...,   -2.662258,...
Begin exploring [1, 2, 3] sequence


For x86 target, depthwise_conv2d with channel multiplier greater than 1 is not optimized
For x86 target, depthwise_conv2d with channel multiplier greater than 1 is not optimized
For x86 target, depthwise_conv2d with channel multiplier greater than 1 is not optimized
For x86 target, depthwise_conv2d with channel multiplier greater than 1 is not optimized
For x86 target, depthwise_conv2d with channel multiplier greater than 1 is not optimized


[1, 2, 3]

In [10]:
def test_under_pytorch(model, random_input, exec="normal", opt_level=3):
    rtol = 1e-2
    atol = 1e-2
    mod, params = utility.torch2relay(model, random_input)
    lib = utility.build_relay(mod, params=params, opt_level=opt_level)
    with torch.no_grad():
        if isinstance(random_input, list) or isinstance(random_input, tuple):
            baseline_outputs = model(*random_input)
            baseline_input = [i.numpy() for i in random_input]
        else:
            baseline_outputs = model(random_input)
            baseline_input = [random_input.numpy()]
        input_name = ["input{}".format(i) for i in range(len(baseline_input))] # See utility.torch2relay
    return [lib, tvm.cpu(0), baseline_input, [baseline_outputs.numpy()], rtol, atol, exec, input_name]

In [11]:
init_input_shape, debug_graphs = rebuild_subgraphs(combined_model, recorder, [1, 2, 3])
debug_graphs = torch.nn.Sequential(*debug_graphs)
random_input = torch.randn(init_input_shape)
test_list = test_under_pytorch(debug_graphs, random_input, "normal", 3)

For x86 target, depthwise_conv2d with channel multiplier greater than 1 is not optimized
For x86 target, depthwise_conv2d with channel multiplier greater than 1 is not optimized
For x86 target, depthwise_conv2d with channel multiplier greater than 1 is not optimized
For x86 target, depthwise_conv2d with channel multiplier greater than 1 is not optimized
For x86 target, depthwise_conv2d with channel multiplier greater than 1 is not optimized


In [14]:
test_list = test_list + [True]

In [16]:
result = utility.verify_compiled_model(*test_list)

In [18]:
result['tvm']

[array([[[[ -74.31086  , -127.218544 , -111.81207  , ...,   -2.688539 ,
             -2.805067 ,   -3.1428323],
          [ -61.21711  , -129.10709  ,  -92.84157  , ...,   -2.6428006,
             -2.690722 ,   -3.1447124],
          [ -64.556206 ,         -inf, -129.62233  , ...,   -2.791613 ,
             -2.7879748,   -3.1077762],
          ...,
          [ -68.51478  ,         -inf,         -inf, ...,   -2.6383305,
             -2.7485623,   -3.2475314],
          [-102.9821   ,         -inf,         -inf, ...,   -2.752676 ,
             -2.7756639,   -3.1942592],
          [-102.75902  ,         -inf,         -inf, ...,   -2.6680007,
             -2.7510037,   -3.176314 ]],
 
         [[-110.147255 , -130.86389  ,  -56.031063 , ...,   -2.6187077,
             -2.8881493,   -2.9271832],
          [-124.23634  ,         -inf,  -76.73361  , ...,   -2.689084 ,
             -2.8741283,   -2.8716474],
          [ -70.76816  , -104.69658  ,  -24.948404 , ...,   -2.7171142,
             -