In [1]:
import torch
import numpy as np
import time
import argparse
import torch.nn as nn
import coremltools as ct
from coremltools.models.neural_network import quantization_utils

In [2]:
def create_mlp(D,N,NumLayers) :
    layers = nn.ModuleList()
    for i in range(NumLayers):
        layers.append(nn.Linear(D, N, bias=False))
    return torch.nn.Sequential(*layers)

def init_weights(m):
    if isinstance(m, nn.Linear):
        torch.nn.init.xavier_normal_(m.weight)

In [3]:
def generate_mlmodels(M, N, batch, Layers):
    model = create_mlp(M,N,Layers)
    model.apply(init_weights)
    model.eval()

    with torch.no_grad():
        x = torch.randn(batch,M)
        output = model(x)

    traced_model = torch.jit.trace(model, x)

    apple_model = ct.convert(traced_model, inputs=[ct.TensorType(name="input", shape = (batch,M) )])
    #filename = str(M)+"x"+str(N)+"x"+str(Layers)+"x"+str(batch)+".mlmodel"
    #apple_model.save(filename)
                     
    apple_model_FP16 = quantization_utils.quantize_weights(apple_model, nbits=16)
    filename = str(M)+"x"+str(N)+"x"+str(Layers)+"x"+str(batch)+"_FP16.mlmodel"
    apple_model_FP16.save(filename)
    
    apple_model_INT8LUT = quantization_utils.quantize_weights(apple_model, nbits=8, quantization_mode="kmeans")
    filename = str(M)+"x"+str(N)+"x"+str(Layers)+"x"+str(batch)+"_INT8LUT.mlmodel"
    apple_model_INT8LUT.save(filename)

In [None]:
for i in range(7):
    M = 256*2**(i)
    N = M
    for l in range(1,5):
        layers = 8*l
        try:
            generate_mlmodels(M,N,1,layers)
        except GenerateModelError:
            print("Model with dimension ", M, "x", N, " with", lyaers, " layers not created") 

Converting PyTorch Frontend ==> MIL Ops:  94%|▉| 15/16 [00:00<00:00, 3468.28 ops
Running MIL frontend_pytorch pipeline: 100%|█| 5/5 [00:00<00:00, 4414.13 passes/
Running MIL default pipeline: 100%|██████| 56/56 [00:00<00:00, 3479.77 passes/s]
Running MIL backend_neuralnetwork pipeline: 100%|█| 8/8 [00:00<00:00, 25811.10 p
Translating MIL ==> NeuralNetwork Ops: 100%|██| 24/24 [00:00<00:00, 87.67 ops/s]


Quantizing using linear quantization
Quantizing layer input.3 of type innerProduct
Quantizing layer input.5 of type innerProduct
Quantizing layer input.7 of type innerProduct
Quantizing layer input.9 of type innerProduct
Quantizing layer input.11 of type innerProduct
Quantizing layer input.13 of type innerProduct
Quantizing layer input of type innerProduct
Quantizing layer 33 of type innerProduct
Quantizing using kmeans quantization
Optimizing Neural Network before Quantization:
Finished optimizing network. Quantizing neural network..
Quantizing layer input.3 of type innerProduct


  ).fit(wf)


Quantizing layer input.5 of type innerProduct


  ).fit(wf)


Quantizing layer input.7 of type innerProduct


  ).fit(wf)


Quantizing layer input.9 of type innerProduct


  ).fit(wf)


Quantizing layer input.11 of type innerProduct


  ).fit(wf)


Quantizing layer input.13 of type innerProduct


  ).fit(wf)


Quantizing layer input of type innerProduct


  ).fit(wf)


Quantizing layer 33 of type innerProduct


  ).fit(wf)
Converting PyTorch Frontend ==> MIL Ops:  97%|▉| 31/32 [00:00<00:00, 4327.91 ops
Running MIL frontend_pytorch pipeline: 100%|█| 5/5 [00:00<00:00, 13189.64 passes
Running MIL default pipeline: 100%|██████| 56/56 [00:00<00:00, 2071.08 passes/s]
Running MIL backend_neuralnetwork pipeline: 100%|█| 8/8 [00:00<00:00, 17467.17 p
Translating MIL ==> NeuralNetwork Ops: 100%|██| 48/48 [00:00<00:00, 89.26 ops/s]


Quantizing using linear quantization
Quantizing layer input.3 of type innerProduct
Quantizing layer input.5 of type innerProduct
Quantizing layer input.7 of type innerProduct
Quantizing layer input.9 of type innerProduct
Quantizing layer input.11 of type innerProduct
Quantizing layer input.13 of type innerProduct
Quantizing layer input.15 of type innerProduct
Quantizing layer input.17 of type innerProduct
Quantizing layer input.19 of type innerProduct
Quantizing layer input.21 of type innerProduct
Quantizing layer input.23 of type innerProduct
Quantizing layer input.25 of type innerProduct
Quantizing layer input.27 of type innerProduct
Quantizing layer input.29 of type innerProduct
Quantizing layer input of type innerProduct
Quantizing layer 65 of type innerProduct
Quantizing using kmeans quantization
Optimizing Neural Network before Quantization:
Finished optimizing network. Quantizing neural network..
Quantizing layer input.3 of type innerProduct


  ).fit(wf)


Quantizing layer input.5 of type innerProduct


  ).fit(wf)


Quantizing layer input.7 of type innerProduct


  ).fit(wf)


Quantizing layer input.9 of type innerProduct


  ).fit(wf)


Quantizing layer input.11 of type innerProduct


  ).fit(wf)


Quantizing layer input.13 of type innerProduct


  ).fit(wf)


Quantizing layer input.15 of type innerProduct


  ).fit(wf)


Quantizing layer input.17 of type innerProduct


  ).fit(wf)


Quantizing layer input.19 of type innerProduct


  ).fit(wf)


Quantizing layer input.21 of type innerProduct


  ).fit(wf)


Quantizing layer input.23 of type innerProduct


  ).fit(wf)


Quantizing layer input.25 of type innerProduct


  ).fit(wf)


Quantizing layer input.27 of type innerProduct


  ).fit(wf)


Quantizing layer input.29 of type innerProduct


  ).fit(wf)


Quantizing layer input of type innerProduct


  ).fit(wf)


Quantizing layer 65 of type innerProduct


  ).fit(wf)
Converting PyTorch Frontend ==> MIL Ops:  98%|▉| 47/48 [00:00<00:00, 4468.50 ops
Running MIL frontend_pytorch pipeline: 100%|█| 5/5 [00:00<00:00, 9804.36 passes/
Running MIL default pipeline: 100%|██████| 56/56 [00:00<00:00, 1556.98 passes/s]
Running MIL backend_neuralnetwork pipeline: 100%|█| 8/8 [00:00<00:00, 10768.43 p
Translating MIL ==> NeuralNetwork Ops: 100%|██| 72/72 [00:00<00:00, 87.99 ops/s]


Quantizing using linear quantization
Quantizing layer input.3 of type innerProduct
Quantizing layer input.5 of type innerProduct
Quantizing layer input.7 of type innerProduct
Quantizing layer input.9 of type innerProduct
Quantizing layer input.11 of type innerProduct
Quantizing layer input.13 of type innerProduct
Quantizing layer input.15 of type innerProduct
Quantizing layer input.17 of type innerProduct
Quantizing layer input.19 of type innerProduct
Quantizing layer input.21 of type innerProduct
Quantizing layer input.23 of type innerProduct
Quantizing layer input.25 of type innerProduct
Quantizing layer input.27 of type innerProduct
Quantizing layer input.29 of type innerProduct
Quantizing layer input.31 of type innerProduct
Quantizing layer input.33 of type innerProduct
Quantizing layer input.35 of type innerProduct
Quantizing layer input.37 of type innerProduct
Quantizing layer input.39 of type innerProduct
Quantizing layer input.41 of type innerProduct
Quantizing layer input.43 o

  ).fit(wf)


Quantizing layer input.5 of type innerProduct


  ).fit(wf)


Quantizing layer input.7 of type innerProduct


  ).fit(wf)


Quantizing layer input.9 of type innerProduct


  ).fit(wf)


Quantizing layer input.11 of type innerProduct


  ).fit(wf)


Quantizing layer input.13 of type innerProduct


  ).fit(wf)


Quantizing layer input.15 of type innerProduct


  ).fit(wf)


Quantizing layer input.17 of type innerProduct


  ).fit(wf)


Quantizing layer input.19 of type innerProduct


  ).fit(wf)


Quantizing layer input.21 of type innerProduct


  ).fit(wf)


Quantizing layer input.23 of type innerProduct


  ).fit(wf)


Quantizing layer input.25 of type innerProduct


  ).fit(wf)


Quantizing layer input.27 of type innerProduct


  ).fit(wf)


Quantizing layer input.29 of type innerProduct


  ).fit(wf)


Quantizing layer input.31 of type innerProduct


  ).fit(wf)


Quantizing layer input.33 of type innerProduct


  ).fit(wf)


Quantizing layer input.35 of type innerProduct


  ).fit(wf)


Quantizing layer input.37 of type innerProduct


  ).fit(wf)


Quantizing layer input.39 of type innerProduct


  ).fit(wf)


Quantizing layer input.41 of type innerProduct


  ).fit(wf)


Quantizing layer input.43 of type innerProduct


  ).fit(wf)


Quantizing layer input.45 of type innerProduct


  ).fit(wf)


Quantizing layer input of type innerProduct


  ).fit(wf)


Quantizing layer 97 of type innerProduct


  ).fit(wf)
Converting PyTorch Frontend ==> MIL Ops:  98%|▉| 63/64 [00:00<00:00, 4560.21 ops
Running MIL frontend_pytorch pipeline: 100%|█| 5/5 [00:00<00:00, 6659.74 passes/
Running MIL default pipeline: 100%|██████| 56/56 [00:00<00:00, 1324.10 passes/s]
Running MIL backend_neuralnetwork pipeline: 100%|█| 8/8 [00:00<00:00, 8938.31 pa
Translating MIL ==> NeuralNetwork Ops: 100%|██| 96/96 [00:01<00:00, 89.23 ops/s]


Quantizing using linear quantization
Quantizing layer input.3 of type innerProduct
Quantizing layer input.5 of type innerProduct
Quantizing layer input.7 of type innerProduct
Quantizing layer input.9 of type innerProduct
Quantizing layer input.11 of type innerProduct
Quantizing layer input.13 of type innerProduct
Quantizing layer input.15 of type innerProduct
Quantizing layer input.17 of type innerProduct
Quantizing layer input.19 of type innerProduct
Quantizing layer input.21 of type innerProduct
Quantizing layer input.23 of type innerProduct
Quantizing layer input.25 of type innerProduct
Quantizing layer input.27 of type innerProduct
Quantizing layer input.29 of type innerProduct
Quantizing layer input.31 of type innerProduct
Quantizing layer input.33 of type innerProduct
Quantizing layer input.35 of type innerProduct
Quantizing layer input.37 of type innerProduct
Quantizing layer input.39 of type innerProduct
Quantizing layer input.41 of type innerProduct
Quantizing layer input.43 o

  ).fit(wf)


Quantizing layer input.5 of type innerProduct


  ).fit(wf)


Quantizing layer input.7 of type innerProduct


  ).fit(wf)


Quantizing layer input.9 of type innerProduct


  ).fit(wf)


Quantizing layer input.11 of type innerProduct


  ).fit(wf)


Quantizing layer input.13 of type innerProduct


  ).fit(wf)


Quantizing layer input.15 of type innerProduct


  ).fit(wf)


Quantizing layer input.17 of type innerProduct


  ).fit(wf)


Quantizing layer input.19 of type innerProduct


  ).fit(wf)


Quantizing layer input.21 of type innerProduct


  ).fit(wf)


Quantizing layer input.23 of type innerProduct


  ).fit(wf)


Quantizing layer input.25 of type innerProduct


  ).fit(wf)


Quantizing layer input.27 of type innerProduct


  ).fit(wf)


Quantizing layer input.29 of type innerProduct


  ).fit(wf)


Quantizing layer input.31 of type innerProduct


  ).fit(wf)


Quantizing layer input.33 of type innerProduct


  ).fit(wf)


Quantizing layer input.35 of type innerProduct


  ).fit(wf)


Quantizing layer input.37 of type innerProduct


  ).fit(wf)


Quantizing layer input.39 of type innerProduct


  ).fit(wf)


Quantizing layer input.41 of type innerProduct


  ).fit(wf)


Quantizing layer input.43 of type innerProduct


  ).fit(wf)


Quantizing layer input.45 of type innerProduct


  ).fit(wf)


Quantizing layer input.47 of type innerProduct


  ).fit(wf)


Quantizing layer input.49 of type innerProduct


  ).fit(wf)


Quantizing layer input.51 of type innerProduct


  ).fit(wf)


Quantizing layer input.53 of type innerProduct


  ).fit(wf)


Quantizing layer input.55 of type innerProduct


  ).fit(wf)


Quantizing layer input.57 of type innerProduct


  ).fit(wf)


Quantizing layer input.59 of type innerProduct


  ).fit(wf)


Quantizing layer input.61 of type innerProduct


  ).fit(wf)


Quantizing layer input of type innerProduct


  ).fit(wf)


Quantizing layer 129 of type innerProduct


  ).fit(wf)
Converting PyTorch Frontend ==> MIL Ops:  94%|▉| 15/16 [00:00<00:00, 2623.74 ops
Running MIL frontend_pytorch pipeline: 100%|█| 5/5 [00:00<00:00, 16461.16 passes
Running MIL default pipeline: 100%|██████| 56/56 [00:00<00:00, 2815.51 passes/s]
Running MIL backend_neuralnetwork pipeline: 100%|█| 8/8 [00:00<00:00, 27191.60 p
Translating MIL ==> NeuralNetwork Ops: 100%|██| 24/24 [00:01<00:00, 22.19 ops/s]


Quantizing using linear quantization
Quantizing layer input.3 of type innerProduct
Quantizing layer input.5 of type innerProduct
Quantizing layer input.7 of type innerProduct
Quantizing layer input.9 of type innerProduct
Quantizing layer input.11 of type innerProduct
Quantizing layer input.13 of type innerProduct
Quantizing layer input of type innerProduct
Quantizing layer 33 of type innerProduct
Quantizing using kmeans quantization
Optimizing Neural Network before Quantization:
Finished optimizing network. Quantizing neural network..
Quantizing layer input.3 of type innerProduct


  ).fit(wf)


Quantizing layer input.5 of type innerProduct


  ).fit(wf)


Quantizing layer input.7 of type innerProduct


  ).fit(wf)


Quantizing layer input.9 of type innerProduct


  ).fit(wf)


Quantizing layer input.11 of type innerProduct


  ).fit(wf)


Quantizing layer input.13 of type innerProduct


  ).fit(wf)


Quantizing layer input of type innerProduct


  ).fit(wf)


Quantizing layer 33 of type innerProduct


  ).fit(wf)
Converting PyTorch Frontend ==> MIL Ops:  97%|▉| 31/32 [00:00<00:00, 2846.90 ops
Running MIL frontend_pytorch pipeline: 100%|█| 5/5 [00:00<00:00, 14246.96 passes
Running MIL default pipeline: 100%|██████| 56/56 [00:00<00:00, 1693.79 passes/s]
Running MIL backend_neuralnetwork pipeline: 100%|█| 8/8 [00:00<00:00, 18176.83 p
Translating MIL ==> NeuralNetwork Ops: 100%|██| 48/48 [00:02<00:00, 22.33 ops/s]


Quantizing using linear quantization
Quantizing layer input.3 of type innerProduct
Quantizing layer input.5 of type innerProduct
Quantizing layer input.7 of type innerProduct
Quantizing layer input.9 of type innerProduct
Quantizing layer input.11 of type innerProduct
Quantizing layer input.13 of type innerProduct
Quantizing layer input.15 of type innerProduct
Quantizing layer input.17 of type innerProduct
Quantizing layer input.19 of type innerProduct
Quantizing layer input.21 of type innerProduct
Quantizing layer input.23 of type innerProduct
Quantizing layer input.25 of type innerProduct
Quantizing layer input.27 of type innerProduct
Quantizing layer input.29 of type innerProduct
Quantizing layer input of type innerProduct
Quantizing layer 65 of type innerProduct
Quantizing using kmeans quantization
Optimizing Neural Network before Quantization:
Finished optimizing network. Quantizing neural network..
Quantizing layer input.3 of type innerProduct


  ).fit(wf)


Quantizing layer input.5 of type innerProduct


  ).fit(wf)


Quantizing layer input.7 of type innerProduct


  ).fit(wf)


Quantizing layer input.9 of type innerProduct


  ).fit(wf)


Quantizing layer input.11 of type innerProduct


  ).fit(wf)


Quantizing layer input.13 of type innerProduct


  ).fit(wf)


Quantizing layer input.15 of type innerProduct


  ).fit(wf)


Quantizing layer input.17 of type innerProduct


  ).fit(wf)


Quantizing layer input.19 of type innerProduct


  ).fit(wf)


Quantizing layer input.21 of type innerProduct


  ).fit(wf)


Quantizing layer input.23 of type innerProduct


  ).fit(wf)


Quantizing layer input.25 of type innerProduct


  ).fit(wf)


Quantizing layer input.27 of type innerProduct


  ).fit(wf)


Quantizing layer input.29 of type innerProduct


  ).fit(wf)


Quantizing layer input of type innerProduct


  ).fit(wf)


Quantizing layer 65 of type innerProduct


  ).fit(wf)
Converting PyTorch Frontend ==> MIL Ops:  98%|▉| 47/48 [00:00<00:00, 2836.07 ops
Running MIL frontend_pytorch pipeline: 100%|█| 5/5 [00:00<00:00, 8351.86 passes/
Running MIL default pipeline: 100%|██████| 56/56 [00:00<00:00, 1190.20 passes/s]
Running MIL backend_neuralnetwork pipeline: 100%|█| 8/8 [00:00<00:00, 12345.27 p
Translating MIL ==> NeuralNetwork Ops: 100%|██| 72/72 [00:03<00:00, 22.31 ops/s]


Quantizing using linear quantization
Quantizing layer input.3 of type innerProduct
Quantizing layer input.5 of type innerProduct
Quantizing layer input.7 of type innerProduct
Quantizing layer input.9 of type innerProduct
Quantizing layer input.11 of type innerProduct
Quantizing layer input.13 of type innerProduct
Quantizing layer input.15 of type innerProduct
Quantizing layer input.17 of type innerProduct
Quantizing layer input.19 of type innerProduct
Quantizing layer input.21 of type innerProduct
Quantizing layer input.23 of type innerProduct
Quantizing layer input.25 of type innerProduct
Quantizing layer input.27 of type innerProduct
Quantizing layer input.29 of type innerProduct
Quantizing layer input.31 of type innerProduct
Quantizing layer input.33 of type innerProduct
Quantizing layer input.35 of type innerProduct
Quantizing layer input.37 of type innerProduct
Quantizing layer input.39 of type innerProduct
Quantizing layer input.41 of type innerProduct
Quantizing layer input.43 o

  ).fit(wf)


Quantizing layer input.5 of type innerProduct


  ).fit(wf)


Quantizing layer input.7 of type innerProduct


  ).fit(wf)


Quantizing layer input.9 of type innerProduct


  ).fit(wf)


Quantizing layer input.11 of type innerProduct


  ).fit(wf)


Quantizing layer input.13 of type innerProduct


  ).fit(wf)


Quantizing layer input.15 of type innerProduct


  ).fit(wf)


Quantizing layer input.17 of type innerProduct


  ).fit(wf)


Quantizing layer input.19 of type innerProduct


  ).fit(wf)


Quantizing layer input.21 of type innerProduct


  ).fit(wf)


Quantizing layer input.23 of type innerProduct


  ).fit(wf)


Quantizing layer input.25 of type innerProduct


  ).fit(wf)


Quantizing layer input.27 of type innerProduct


  ).fit(wf)


Quantizing layer input.29 of type innerProduct


  ).fit(wf)


Quantizing layer input.31 of type innerProduct


  ).fit(wf)


Quantizing layer input.33 of type innerProduct


  ).fit(wf)


Quantizing layer input.35 of type innerProduct


  ).fit(wf)


Quantizing layer input.37 of type innerProduct


  ).fit(wf)


Quantizing layer input.39 of type innerProduct


  ).fit(wf)


Quantizing layer input.41 of type innerProduct


  ).fit(wf)


Quantizing layer input.43 of type innerProduct


  ).fit(wf)


Quantizing layer input.45 of type innerProduct


  ).fit(wf)


Quantizing layer input of type innerProduct


  ).fit(wf)


Quantizing layer 97 of type innerProduct


  ).fit(wf)
Converting PyTorch Frontend ==> MIL Ops:  98%|▉| 63/64 [00:00<00:00, 2808.75 ops
Running MIL frontend_pytorch pipeline: 100%|█| 5/5 [00:00<00:00, 6784.70 passes/
Running MIL default pipeline: 100%|███████| 56/56 [00:00<00:00, 939.47 passes/s]
Running MIL backend_neuralnetwork pipeline: 100%|█| 8/8 [00:00<00:00, 9152.87 pa
Translating MIL ==> NeuralNetwork Ops: 100%|██| 96/96 [00:04<00:00, 22.21 ops/s]


Quantizing using linear quantization
Quantizing layer input.3 of type innerProduct
Quantizing layer input.5 of type innerProduct
Quantizing layer input.7 of type innerProduct
Quantizing layer input.9 of type innerProduct
Quantizing layer input.11 of type innerProduct
Quantizing layer input.13 of type innerProduct
Quantizing layer input.15 of type innerProduct
Quantizing layer input.17 of type innerProduct
Quantizing layer input.19 of type innerProduct
Quantizing layer input.21 of type innerProduct
Quantizing layer input.23 of type innerProduct
Quantizing layer input.25 of type innerProduct
Quantizing layer input.27 of type innerProduct
Quantizing layer input.29 of type innerProduct
Quantizing layer input.31 of type innerProduct
Quantizing layer input.33 of type innerProduct
Quantizing layer input.35 of type innerProduct
Quantizing layer input.37 of type innerProduct
Quantizing layer input.39 of type innerProduct
Quantizing layer input.41 of type innerProduct
Quantizing layer input.43 o

  ).fit(wf)


Quantizing layer input.5 of type innerProduct


  ).fit(wf)


Quantizing layer input.7 of type innerProduct


  ).fit(wf)


Quantizing layer input.9 of type innerProduct


  ).fit(wf)


Quantizing layer input.11 of type innerProduct


  ).fit(wf)


Quantizing layer input.13 of type innerProduct


  ).fit(wf)


Quantizing layer input.15 of type innerProduct


  ).fit(wf)


Quantizing layer input.17 of type innerProduct


  ).fit(wf)


Quantizing layer input.19 of type innerProduct


  ).fit(wf)


Quantizing layer input.21 of type innerProduct


  ).fit(wf)


Quantizing layer input.23 of type innerProduct


  ).fit(wf)


Quantizing layer input.25 of type innerProduct


  ).fit(wf)


Quantizing layer input.27 of type innerProduct


  ).fit(wf)


Quantizing layer input.29 of type innerProduct


  ).fit(wf)


Quantizing layer input.31 of type innerProduct


  ).fit(wf)


Quantizing layer input.33 of type innerProduct


  ).fit(wf)


Quantizing layer input.35 of type innerProduct


  ).fit(wf)


Quantizing layer input.37 of type innerProduct


  ).fit(wf)


Quantizing layer input.39 of type innerProduct


  ).fit(wf)


Quantizing layer input.41 of type innerProduct


  ).fit(wf)


Quantizing layer input.43 of type innerProduct


  ).fit(wf)


Quantizing layer input.45 of type innerProduct


  ).fit(wf)


Quantizing layer input.47 of type innerProduct


  ).fit(wf)


Quantizing layer input.49 of type innerProduct


  ).fit(wf)


Quantizing layer input.51 of type innerProduct


  ).fit(wf)


Quantizing layer input.53 of type innerProduct


  ).fit(wf)


Quantizing layer input.55 of type innerProduct


  ).fit(wf)


Quantizing layer input.57 of type innerProduct


  ).fit(wf)


Quantizing layer input.59 of type innerProduct


  ).fit(wf)


Quantizing layer input.61 of type innerProduct


  ).fit(wf)


Quantizing layer input of type innerProduct


  ).fit(wf)


Quantizing layer 129 of type innerProduct


  ).fit(wf)
Converting PyTorch Frontend ==> MIL Ops:  94%|▉| 15/16 [00:00<00:00, 1550.04 ops
Running MIL frontend_pytorch pipeline: 100%|█| 5/5 [00:00<00:00, 15196.75 passes
Running MIL default pipeline: 100%|██████| 56/56 [00:00<00:00, 1794.75 passes/s]
Running MIL backend_neuralnetwork pipeline: 100%|█| 8/8 [00:00<00:00, 31506.51 p
Translating MIL ==> NeuralNetwork Ops: 100%|██| 24/24 [00:04<00:00,  5.55 ops/s]


Quantizing using linear quantization
Quantizing layer input.3 of type innerProduct
Quantizing layer input.5 of type innerProduct
Quantizing layer input.7 of type innerProduct
Quantizing layer input.9 of type innerProduct
Quantizing layer input.11 of type innerProduct
Quantizing layer input.13 of type innerProduct
Quantizing layer input of type innerProduct
Quantizing layer 33 of type innerProduct
Quantizing using kmeans quantization
Optimizing Neural Network before Quantization:
Finished optimizing network. Quantizing neural network..
Quantizing layer input.3 of type innerProduct


  ).fit(wf)


Quantizing layer input.5 of type innerProduct


  ).fit(wf)


Quantizing layer input.7 of type innerProduct


  ).fit(wf)


Quantizing layer input.9 of type innerProduct


  ).fit(wf)


Quantizing layer input.11 of type innerProduct


  ).fit(wf)


Quantizing layer input.13 of type innerProduct


  ).fit(wf)


Quantizing layer input of type innerProduct


  ).fit(wf)


Quantizing layer 33 of type innerProduct


  ).fit(wf)
Converting PyTorch Frontend ==> MIL Ops:  97%|▉| 31/32 [00:00<00:00, 1561.17 ops
Running MIL frontend_pytorch pipeline: 100%|█| 5/5 [00:00<00:00, 10968.37 passes
Running MIL default pipeline: 100%|██████| 56/56 [00:00<00:00, 1030.03 passes/s]
Running MIL backend_neuralnetwork pipeline: 100%|█| 8/8 [00:00<00:00, 14438.22 p
Translating MIL ==> NeuralNetwork Ops: 100%|██| 48/48 [00:08<00:00,  5.61 ops/s]


Quantizing using linear quantization
Quantizing layer input.3 of type innerProduct
Quantizing layer input.5 of type innerProduct
Quantizing layer input.7 of type innerProduct
Quantizing layer input.9 of type innerProduct
Quantizing layer input.11 of type innerProduct
Quantizing layer input.13 of type innerProduct
Quantizing layer input.15 of type innerProduct
Quantizing layer input.17 of type innerProduct
Quantizing layer input.19 of type innerProduct
Quantizing layer input.21 of type innerProduct
Quantizing layer input.23 of type innerProduct
Quantizing layer input.25 of type innerProduct
Quantizing layer input.27 of type innerProduct
Quantizing layer input.29 of type innerProduct
Quantizing layer input of type innerProduct
Quantizing layer 65 of type innerProduct
Quantizing using kmeans quantization
Optimizing Neural Network before Quantization:
Finished optimizing network. Quantizing neural network..
Quantizing layer input.3 of type innerProduct


  ).fit(wf)


Quantizing layer input.5 of type innerProduct


  ).fit(wf)


Quantizing layer input.7 of type innerProduct


  ).fit(wf)


Quantizing layer input.9 of type innerProduct


  ).fit(wf)


Quantizing layer input.11 of type innerProduct


  ).fit(wf)


Quantizing layer input.13 of type innerProduct


  ).fit(wf)


Quantizing layer input.15 of type innerProduct


  ).fit(wf)


Quantizing layer input.17 of type innerProduct


  ).fit(wf)


Quantizing layer input.19 of type innerProduct


  ).fit(wf)


Quantizing layer input.21 of type innerProduct


  ).fit(wf)


Quantizing layer input.23 of type innerProduct


  ).fit(wf)


Quantizing layer input.25 of type innerProduct


  ).fit(wf)


Quantizing layer input.27 of type innerProduct


  ).fit(wf)


Quantizing layer input.29 of type innerProduct


  ).fit(wf)


Quantizing layer input of type innerProduct


  ).fit(wf)


Quantizing layer 65 of type innerProduct


  ).fit(wf)
Converting PyTorch Frontend ==> MIL Ops:  98%|▉| 47/48 [00:00<00:00, 1604.70 ops
Running MIL frontend_pytorch pipeline: 100%|█| 5/5 [00:00<00:00, 8415.54 passes/
Running MIL default pipeline: 100%|███████| 56/56 [00:00<00:00, 728.33 passes/s]
Running MIL backend_neuralnetwork pipeline: 100%|█| 8/8 [00:00<00:00, 12777.77 p
Translating MIL ==> NeuralNetwork Ops: 100%|██| 72/72 [00:12<00:00,  5.60 ops/s]


Quantizing using linear quantization
Quantizing layer input.3 of type innerProduct
Quantizing layer input.5 of type innerProduct
Quantizing layer input.7 of type innerProduct
Quantizing layer input.9 of type innerProduct
Quantizing layer input.11 of type innerProduct
Quantizing layer input.13 of type innerProduct
Quantizing layer input.15 of type innerProduct
Quantizing layer input.17 of type innerProduct
Quantizing layer input.19 of type innerProduct
Quantizing layer input.21 of type innerProduct
Quantizing layer input.23 of type innerProduct
Quantizing layer input.25 of type innerProduct
Quantizing layer input.27 of type innerProduct
Quantizing layer input.29 of type innerProduct
Quantizing layer input.31 of type innerProduct
Quantizing layer input.33 of type innerProduct
Quantizing layer input.35 of type innerProduct
Quantizing layer input.37 of type innerProduct
Quantizing layer input.39 of type innerProduct
Quantizing layer input.41 of type innerProduct
Quantizing layer input.43 o

  ).fit(wf)


Quantizing layer input.5 of type innerProduct


  ).fit(wf)


Quantizing layer input.7 of type innerProduct


  ).fit(wf)


Quantizing layer input.9 of type innerProduct


  ).fit(wf)


Quantizing layer input.11 of type innerProduct


  ).fit(wf)


Quantizing layer input.13 of type innerProduct


  ).fit(wf)


Quantizing layer input.15 of type innerProduct


  ).fit(wf)


Quantizing layer input.17 of type innerProduct


  ).fit(wf)


Quantizing layer input.19 of type innerProduct


  ).fit(wf)


Quantizing layer input.21 of type innerProduct


  ).fit(wf)


Quantizing layer input.23 of type innerProduct


  ).fit(wf)


Quantizing layer input.25 of type innerProduct


  ).fit(wf)


Quantizing layer input.27 of type innerProduct


  ).fit(wf)


Quantizing layer input.29 of type innerProduct


  ).fit(wf)


Quantizing layer input.31 of type innerProduct


  ).fit(wf)


Quantizing layer input.33 of type innerProduct


  ).fit(wf)


Quantizing layer input.35 of type innerProduct


  ).fit(wf)


Quantizing layer input.37 of type innerProduct


  ).fit(wf)


Quantizing layer input.39 of type innerProduct


  ).fit(wf)


Quantizing layer input.41 of type innerProduct


  ).fit(wf)


Quantizing layer input.43 of type innerProduct


  ).fit(wf)


Quantizing layer input.45 of type innerProduct


  ).fit(wf)


Quantizing layer input of type innerProduct


  ).fit(wf)


Quantizing layer 97 of type innerProduct


  ).fit(wf)
Converting PyTorch Frontend ==> MIL Ops:  98%|▉| 63/64 [00:00<00:00, 1639.38 ops
Running MIL frontend_pytorch pipeline: 100%|█| 5/5 [00:00<00:00, 7463.17 passes/
Running MIL default pipeline: 100%|███████| 56/56 [00:00<00:00, 542.57 passes/s]
Running MIL backend_neuralnetwork pipeline: 100%|█| 8/8 [00:00<00:00, 9292.28 pa
Translating MIL ==> NeuralNetwork Ops: 100%|██| 96/96 [00:17<00:00,  5.56 ops/s]


Quantizing using linear quantization
Quantizing layer input.3 of type innerProduct
Quantizing layer input.5 of type innerProduct
Quantizing layer input.7 of type innerProduct
Quantizing layer input.9 of type innerProduct
Quantizing layer input.11 of type innerProduct
Quantizing layer input.13 of type innerProduct
Quantizing layer input.15 of type innerProduct
Quantizing layer input.17 of type innerProduct
Quantizing layer input.19 of type innerProduct
Quantizing layer input.21 of type innerProduct
Quantizing layer input.23 of type innerProduct
Quantizing layer input.25 of type innerProduct
Quantizing layer input.27 of type innerProduct
Quantizing layer input.29 of type innerProduct
Quantizing layer input.31 of type innerProduct
Quantizing layer input.33 of type innerProduct
Quantizing layer input.35 of type innerProduct
Quantizing layer input.37 of type innerProduct
Quantizing layer input.39 of type innerProduct
Quantizing layer input.41 of type innerProduct
Quantizing layer input.43 o

  ).fit(wf)


Quantizing layer input.5 of type innerProduct


  ).fit(wf)


Quantizing layer input.7 of type innerProduct


  ).fit(wf)


Quantizing layer input.9 of type innerProduct


  ).fit(wf)


Quantizing layer input.11 of type innerProduct


  ).fit(wf)


Quantizing layer input.13 of type innerProduct


  ).fit(wf)


Quantizing layer input.15 of type innerProduct


  ).fit(wf)


Quantizing layer input.17 of type innerProduct


  ).fit(wf)


Quantizing layer input.19 of type innerProduct


  ).fit(wf)


Quantizing layer input.21 of type innerProduct


  ).fit(wf)


Quantizing layer input.23 of type innerProduct


  ).fit(wf)


Quantizing layer input.25 of type innerProduct


  ).fit(wf)


Quantizing layer input.27 of type innerProduct


  ).fit(wf)


Quantizing layer input.29 of type innerProduct


  ).fit(wf)


Quantizing layer input.31 of type innerProduct


  ).fit(wf)


Quantizing layer input.33 of type innerProduct


  ).fit(wf)


Quantizing layer input.35 of type innerProduct


  ).fit(wf)


Quantizing layer input.37 of type innerProduct


  ).fit(wf)


Quantizing layer input.39 of type innerProduct


  ).fit(wf)


Quantizing layer input.41 of type innerProduct


  ).fit(wf)


Quantizing layer input.43 of type innerProduct


  ).fit(wf)


Quantizing layer input.45 of type innerProduct


  ).fit(wf)


Quantizing layer input.47 of type innerProduct


  ).fit(wf)


Quantizing layer input.49 of type innerProduct


  ).fit(wf)


Quantizing layer input.51 of type innerProduct


  ).fit(wf)


Quantizing layer input.53 of type innerProduct


  ).fit(wf)


Quantizing layer input.55 of type innerProduct


  ).fit(wf)


Quantizing layer input.57 of type innerProduct


  ).fit(wf)


Quantizing layer input.59 of type innerProduct


  ).fit(wf)


Quantizing layer input.61 of type innerProduct


  ).fit(wf)


Quantizing layer input of type innerProduct


  ).fit(wf)


Quantizing layer 129 of type innerProduct


  ).fit(wf)
Converting PyTorch Frontend ==> MIL Ops:  94%|▉| 15/16 [00:00<00:00, 797.58 ops/
Running MIL frontend_pytorch pipeline: 100%|█| 5/5 [00:00<00:00, 9562.94 passes/
Running MIL default pipeline: 100%|███████| 56/56 [00:00<00:00, 812.83 passes/s]
Running MIL backend_neuralnetwork pipeline: 100%|█| 8/8 [00:00<00:00, 29511.37 p
Translating MIL ==> NeuralNetwork Ops: 100%|██| 24/24 [00:17<00:00,  1.40 ops/s]


Quantizing using linear quantization
Quantizing layer input.3 of type innerProduct
Quantizing layer input.5 of type innerProduct
Quantizing layer input.7 of type innerProduct
Quantizing layer input.9 of type innerProduct
Quantizing layer input.11 of type innerProduct
Quantizing layer input.13 of type innerProduct
Quantizing layer input of type innerProduct
Quantizing layer 33 of type innerProduct
Quantizing using kmeans quantization
Optimizing Neural Network before Quantization:
Finished optimizing network. Quantizing neural network..
Quantizing layer input.3 of type innerProduct


  ).fit(wf)


Quantizing layer input.5 of type innerProduct
