In [2]:
import warnings
warnings.filterwarnings("ignore")

import torch
import torch.nn.functional as F
from torchvision import datasets, transforms
from nni.algorithms.compression.pytorch.quantization import LsqQuantizer, QAT_Quantizer

import torch.nn as nn
import onnx
import onnx.numpy_helper
from math import ceil
### markus
import numpy as np
import PIL
import os

import matplotlib.pyplot as plt
import matplotlib
#%matplotlib notebook

In [3]:
#required_input_dim = 3121
#gen_img_input_dim_w = required_input_dim
#gen_img_input_dim_h = 1
#gen_img_input_channels = 3

#imgSizeW=32
imgSizeW= 82#ceil(imgSizeW/4)*4
#imgSizeH=28
imgSizeH= 1#ceil(imgSizeH/4)*4

gen_img_input_dim_w = imgSizeW
gen_img_input_dim_h = imgSizeH
gen_img_input_channels = 3
#test input image
test_input_data="../convertdemo/dataset/rand_3.jpg"
#Paths
quant_image_path = "../quantization_images"
script_path = "../scripts"
log_path = "../logs"
network_path = "../convertdemo/network"
#Files
perform_script = "perform_r6.sh"
parse_script = "parse_r1.sh"
perform_log_file = "model_execution.log"
parsed_log_file = "model_execution_parsed.log"
model_name="mnist"


# Generate images based on some arbitrary input dimension

In [4]:
path = quant_image_path

def generate_random_images(xdim, ydim, channels=3, count=1, path="."):
    """
    This functions generates random bmp images to use for quantization given
    a defined dimension
        @xdim   .. width of images
        @ydim   .. height of images
        @count  .. number of images
        @path   .. path of images
    """

    # Check whether the specified path exists or not
    isExist = os.path.exists(path)
    if not isExist:
        # Create a new directory because it does not exist 
        os.makedirs(path)
        print("The new directory quantization_images is created!")

    #delete the pre generated bmp/jpg files
    filelist = [ f for f in os.listdir(quant_image_path) if (f.endswith(".jpg") or f.endswith(".bmp") ) ]
    for f in filelist:
        os.remove(os.path.join(quant_image_path, f))
        
    for c in range(count):
        rnd_img = np.random.randint(low=0,high=255, size=(ydim, xdim, channels),dtype=np.uint8) #imag.transpose((1,2,0)
        imag_tp = np.ascontiguousarray(rnd_img, dtype=np.uint8)

        pil_image = PIL.Image.frombytes('RGB',(xdim, ydim), imag_tp)
        pil_image.save(path + "/rand_"+str(c)+".bmp")
        pil_image.save(path + "/rand_"+str(c)+".jpg")



In [5]:

torch.manual_seed(0)
# choose the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## generate 10 random input images based on the provided dimensions
generate_random_images(gen_img_input_dim_w, gen_img_input_dim_h, channels=3, count=20, path=quant_image_path)


# Build the normal model

In [6]:
class SimpleNN(nn.Module):

    def __init__(self):
        super(SimpleNN, self).__init__()
    
        self.fc1 = nn.Linear(gen_img_input_dim_w, 50)#gen_img_input_dim_w*gen_img_input_dim_h
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        # select first dim from [1,C,H,W] ie. [0,:,:,:]
        x = x.select(0,0)
        # select first channel [0,:,:]
        x = x.select(0,0)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

In [7]:
model = SimpleNN()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

# Train the model for random images

In [8]:
import glob

def load_images(count=1, path=".", extension='*.bmp'):
    """
    loads images as np arrays; no normalization
    """
    imgs =  []
    files = glob.glob(path + "/" + extension)
    for i, f in enumerate(files):
        img = np.asarray(PIL.Image.open(f), dtype=np.float32).transpose((2,0,1))
        img = img.reshape( (1,img.shape[0],img.shape[1], img.shape[2]) )
        img = torch.from_numpy(img)
        imgs.append(img)

        if i+1 > count:
            break

    return imgs

imgs = load_images(count=20,path=quant_image_path)


In [9]:
model.train()
for img in imgs:
    target = torch.tensor([2])
    optimizer.zero_grad()
    output = model(img)
    #print(output)
    loss = F.nll_loss(output, target)
    print(loss.item())
    #print(target, output)
    loss.backward()
    optimizer.step()

44.232093811035156
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0


# Now Assume the Model is Given
- Use it to Profile

In [10]:
def export_model_to_onnx(model, input_shape=(1,3,28,28), path=model_name+".onnx"):


    dummy_input = torch.randn(input_shape)
    model.to('cpu')
        
    # very important or must leave out - not sure need to test again...
    #traced = torch.jit.trace(model, input_dimension)
    print("------------- Exporting to onnx")
    torch.onnx.export(
                      model, 
                      dummy_input, 
                      path,
                      opset_version=7,
                      verbose=True,
                      export_params=True, 
                      input_names=['input'],
                      output_names=['output'],
                      dynamic_axes=None
    )
    
    print("------------- Checking exported model")
    
    # Load the ONNX model
    onnx_model = onnx.load(path)

    # Check that the IR is well formed
    onnx.checker.check_model(onnx_model)

    # Print a Human readable representation of the graph
    print( onnx.helper.printable_graph(onnx_model.graph) )



In [11]:
#export_model_to_onnx(model)

#Create a image dimensions configuration file

In [12]:
with open(network_path+"/"+"imgSize.config", 'w') as f:
    f.write('imgSize=%d,%d,%d'%(gen_img_input_channels,gen_img_input_dim_h,gen_img_input_dim_w))

In [13]:
from re import L
import subprocess
from subprocess import DEVNULL, STDOUT
from xmlrpc.client import boolean
import pandas as pd
import numpy as np

def parse_the_results(inp="model_execution.log",
                      out="model_execution_parsed.log",
                      script="parse.sh",
                      loop=1,
                      show=False):
    """
    parse the output of the profiled log
    Parameters
    ----------
    inp : input file
        file to be parsed
    out : output file
        the output parsed file
    scripts : parsing scripts
        shell scripts to be used for parsing
    loop: number of runs
        the loops to run the model in the main.c
    show: show the scripts output
        to sohw or hide the shell script output
    Returns
    -------
    None
    """
    print("------------- Parsing the profiling results...")
    if show == False:
        #subprocess.check_call([script, inp, out,loop], stdout=DEVNULL, stderr=STDOUT)
        !bash {script} {inp}  {out} {loop}
        
    else:
        #subprocess.check_call([script, inp, out,loop])
        !bash {script} {inp}  {out} {loop} > ../logs/jupyter_parse.log
    
    print("------------- Parsing the profiling results done!")

def run_profiler(script=script_path+"/"+"perform.sh",loop=1,test_input="test.jpg",lgofile="model_execution_parsed.log",show=False):
    """
    profile the model
    Parameters
    ----------
    scripts : parsing scripts
        shell scripts to be used for parsing
    loop: number of runs
        the loops to run the model in the main.c
    test_input: input image with path
        input image of said dimensions
    lgofile: log file for profiling
        output log file for profiling
    show: show the scripts output
        to sohw or hide the shell script output
    Returns
    -------
    None
    """
    print("------------- Performing the profiling...")
    if show == False:
        #subprocess.check_call([script, loop, test_input,lgofile], stdout=DEVNULL, stderr=STDOUT)
        !bash {script} {loop}  {test_input} {lgofile}
    else:
        #subprocess.check_call([script, loop, test_input,lgofile])
        !bash {script} {loop}  {test_input} {lgofile} > ../logs/jupyter.log
    
    print("------------- Performing the profiling done!")



def auto_profile(model,
                 loop=1,
                 imgChannel=3,
                 imgDimX=28,
                 imgDimY=28,
                 modelwithPath=model_name+".onnx",
                 testingInput="../convertdemo/dataset/mnist2.jpg",
                 performScript = script_path+"/"+"perform.sh",
                 parseScript = script_path+"/"+"parse.sh",
                 performLogFile = log_path+"/"+"model_execution.log",
                 parsedLogFile = log_path+"/"+"model_execution_parsed.log",
                 debug=False):
    """
    Convert torch model to onnx model and get layer bits config of onnx model.
    Parameters
    ----------
    model : pytorch model
        The model to speedup by quantization
    loop : loop
        the number of loops to run the model on khadas in main.c
    imgChannel: input channels
        input image channel
    imgDimX: input width
        image width
    imgDimY: input height
        image height
    modelwithPath: absolution model path
        the onnx model with path
    testingInput: input image with path
        input image of said dimensions
    performScript: profiling script
        profiling script that implements whole flow
    parseScript: parse script
        parse script which parses the profiling log
    performLogFile: log file for profiling
        output log file for profiling
    parsedLogFile: log file for parsed profiling
        output log file for parsed profiling
    debug : show debugging
        show the debugging output of the scripts
    Returns
    -------
    pandas frame
        contains the execution times (profiled time)
    status
        the error flag indicating the status
    """
    profilingDone = False;
    #export_model_to_onnx(model,input_shape=(1,imgChannel,imgDimX,imgDimY), path=modelwithPath)
    export_model_to_onnx(model,input_shape=(1,imgChannel,gen_img_input_dim_h,gen_img_input_dim_w), path=modelwithPath)

    #sajjad@teco:~/sajjad/scripts/notebook$ ../scripts/perform_r6.sh 10 ../convertdemo/dataset/mnist2.jpg ../logs/model_execution.log
    run_profiler(performScript,loop,testingInput,performLogFile,debug)

    parse_the_results(performLogFile,parsedLogFile,parseScript,loop,debug)

    #read the results into the pandas
    profiledFrames=pd.read_csv(parsedLogFile, sep=':',header = None)

    profilingDone = True;
    return profiledFrames,profilingDone



In [14]:

def doProfiling(model,
                 loop=1,
                 imgChannel=3,
                 imgDimX=28,
                 imgDimY=28,
                 debug=False):
    """
    Convert torch model to onnx model and get layer bits config of onnx model.
    Parameters
    ----------
    model : pytorch model
        The model to speedup by quantization
    loop : loop
        the number of loops to run the model on khadas in main.c
    imgChannel: input channels
        input image channel
    imgDimX: input width
        image width
    imgDimY: input height
        image height
    debug : show debugging
        show the debugging output of the scripts
    Returns
    -------
    pandas frame
        contains the execution times (profiled time)
    status
        the error flag indicating the status
    """

    perform_script_abs = script_path+"/"+perform_script
    parse_script_abs = script_path+"/"+parse_script
    perform_log_file_abs = log_path+"/"+perform_log_file
    parsed_log_file_abs = log_path+"/"+parsed_log_file
    model_with_Path = network_path+"/"+model_name+".onnx"

    torch.manual_seed(0)
    # choose the device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    ## generate 10 random input images based on the provided dimensions
    generate_random_images(gen_img_input_dim_w, gen_img_input_dim_h, channels=3, count=20, path=quant_image_path)

    [pArray,status] = auto_profile(model,
                            loop,
                            imgChannel,
                            imgDimX,
                            imgDimY,
                            model_with_Path,
                            test_input_data,
                            perform_script_abs,
                            parse_script_abs,
                            perform_log_file_abs,
                            parsed_log_file_abs,
                            debug)

    print("------------- auto_profile done!...")

    return pArray,status

loop_run = '10'

[ProfileArray,status] = doProfiling(model,
                 loop_run,
                 gen_img_input_channels,
                 gen_img_input_dim_h,
                 gen_img_input_dim_w,
                 debug=True)

accData=ProfileArray.values.tolist()

------------- Exporting to onnx
Exported graph: graph(%input : Float(1, 3, 1, 82, strides=[246, 82, 82, 1], requires_grad=0, device=cpu),
      %fc1.weight : Float(50, 82, strides=[82, 1], requires_grad=1, device=cpu),
      %fc1.bias : Float(50, strides=[1], requires_grad=1, device=cpu),
      %fc2.weight : Float(10, 50, strides=[50, 1], requires_grad=1, device=cpu),
      %fc2.bias : Float(10, strides=[1], requires_grad=1, device=cpu)):
  %onnx::Cast_5 : Long(device=cpu) = onnx::Constant[value={0}, onnx_name="Constant_0"]() # /tmp/ipykernel_26179/2041700439.py:11:0
  %onnx::Gather_13 : Long(requires_grad=0, device=cpu) = onnx::Cast[to=7, onnx_name="Cast_1"](%onnx::Cast_5) # /tmp/ipykernel_26179/2041700439.py:11:0
  %onnx::Gather_6 : Float(3, 1, 82, strides=[82, 82, 1], requires_grad=0, device=cpu) = onnx::Gather[axis=0, onnx_name="Gather_2"](%input, %onnx::Gather_13) # /tmp/ipykernel_26179/2041700439.py:11:0
  %onnx::Cast_7 : Long(device=cpu) = onnx::Constant[value={0}, onnx_name="Co

In [15]:
#subprocess.check_call(['../scripts/sajjad.sh'])
#subprocess.check_call(["../scripts/sajjad.sh"])
#!ls -la
#!echo "Hello"
#!bash ../scripts/perform_r6.sh 10  ../convertdemo/dataset/rand_3.jpg ../logs/model_execution.log > ../logs/jupyter.log
#myscript="../scripts/sajjad.sh"
#!bash {myscript}
 

In [16]:
C0 = np.array(ProfileArray[0])
C1 = np.array(ProfileArray[1])
print('---------\n')
print(C0)
print('---------\n')
print(C1)
print('---------\n')
print(status)

---------

['Create Neural Network' 'Verify Graph' 'Run the 1 time' 'Run the 2 time'
 'Run the 3 time' 'Run the 4 time' 'Run the 5 time' 'Run the 6 time'
 'Run the 7 time' 'Run the 8 time' 'Run the 9 time' 'Run the 10 time'
 'Total   ' 'Average ']
---------

[' 128368us' ' 10031us' ' 29282.00us' ' 76.00us' ' 54.00us' ' 54.00us'
 ' 52.00us' ' 52.00us' ' 51.00us' ' 52.00us' ' 51.00us' ' 51.00us'
 ' 29820.00us' ' 2982.00us']
---------

True


In [17]:
import pandas as pd
import numpy as np

import mymodule as  modu
x = np.array([2,4,6,8])
modu.my_mean(x)

------------- auto_profile done!...
Sajjad Hussain


5.0