In [13]:
import torch
import numpy as np
import time
import argparse
import torch.nn as nn
import coremltools as ct
from coremltools.models.neural_network import quantization_utils
import os

In [14]:
def create_mlp(D,N,NumLayers) :
    layers = nn.ModuleList()
    for i in range(NumLayers):
        layers.append(nn.Linear(D, N, bias=False))
    return torch.nn.Sequential(*layers)

def init_weights(m):
    if isinstance(m, nn.Linear):
        torch.nn.init.xavier_normal_(m.weight)

class GenerateModelError (Exception):
    pass

In [28]:
def generate_mlmodels(M, N, batch, Layers):

    filename1 = str(M)+"x"+str(N)+"x"+str(Layers)+"x"+str(batch)+"_FP16.mlmodel"
    filename2 = str(M)+"x"+str(N)+"x"+str(Layers)+"x"+str(batch)+"_INT8LUT.mlmodel"
    if os.path.exists(filename1) and os.path.exists(filename2):
        return
    
    model = create_mlp(M,N,Layers)
    model.apply(init_weights)
    model.eval()

    with torch.no_grad():
        x = torch.randn(batch,M)
        output = model(x)

    traced_model = torch.jit.trace(model, x)
    try:
        apple_model = ct.convert(traced_model, inputs=[ct.TensorType(name="input", shape = (batch,M) )])
        #filename = str(M)+"x"+str(N)+"x"+str(Layers)+"x"+str(batch)+".mlmodel"
        #apple_model.save(filename)
    except:
        raise GenerateModelError
        return

    try:
        apple_model_FP16 = quantization_utils.quantize_weights(apple_model, nbits=16)
    except:
        raise GenerateModelError
        return
    filename = str(M)+"x"+str(N)+"x"+str(Layers)+"x"+str(batch)+"_FP16.mlmodel"
    apple_model_FP16.save(filename)

    try:
        apple_model_INT8LUT = quantization_utils.quantize_weights(apple_model, nbits=8, quantization_mode="kmeans")
    except:
        raise GenerateModelError
        return
    filename = str(M)+"x"+str(N)+"x"+str(Layers)+"x"+str(batch)+"_INT8LUT.mlmodel"
    apple_model_INT8LUT.save(filename)

In [None]:
for i in range(7):
    M = 256*2**(i)
    N = M
    for l in range(5):
        if l ==0:
            layers = 1
        else:
            layers = 8*l
        try:
            generate_mlmodels(M,N,1,layers)
        except GenerateModelError:
            print("Model with dimension ", M, "x", N, " with", layers, " layers not created")
            pass


Converting PyTorch Frontend ==> MIL Ops:  50%|▌| 1/2 [00:00<00:00, 1760.83 ops/s
Running MIL frontend_pytorch pipeline: 100%|█| 5/5 [00:00<00:00, 35787.58 passes
Running MIL default pipeline: 100%|██████| 56/56 [00:00<00:00, 9914.78 passes/s]
Running MIL backend_neuralnetwork pipeline: 100%|█| 8/8 [00:00<00:00, 70789.94 p
Translating MIL ==> NeuralNetwork Ops: 100%|████| 3/3 [00:00<00:00, 86.11 ops/s]


Quantizing using linear quantization
Quantizing layer 5 of type innerProduct
Quantizing using kmeans quantization
Optimizing Neural Network before Quantization:
Finished optimizing network. Quantizing neural network..
Quantizing layer 5 of type innerProduct


  ).fit(wf)
Converting PyTorch Frontend ==> MIL Ops:  50%|▌| 1/2 [00:00<00:00, 1110.78 ops/s
Running MIL frontend_pytorch pipeline: 100%|█| 5/5 [00:00<00:00, 49461.13 passes
Running MIL default pipeline: 100%|██████| 56/56 [00:00<00:00, 9408.79 passes/s]
Running MIL backend_neuralnetwork pipeline: 100%|█| 8/8 [00:00<00:00, 65027.97 p
Translating MIL ==> NeuralNetwork Ops: 100%|████| 3/3 [00:00<00:00, 20.68 ops/s]


Quantizing using linear quantization
Quantizing layer 5 of type innerProduct
Quantizing using kmeans quantization
Optimizing Neural Network before Quantization:
Finished optimizing network. Quantizing neural network..
Quantizing layer 5 of type innerProduct


  ).fit(wf)
Converting PyTorch Frontend ==> MIL Ops:  50%|▌| 1/2 [00:00<00:00, 373.29 ops/s]
Running MIL frontend_pytorch pipeline: 100%|█| 5/5 [00:00<00:00, 37650.84 passes
Running MIL default pipeline: 100%|██████| 56/56 [00:00<00:00, 5852.86 passes/s]
Running MIL backend_neuralnetwork pipeline: 100%|█| 8/8 [00:00<00:00, 28173.33 p
Translating MIL ==> NeuralNetwork Ops: 100%|████| 3/3 [00:00<00:00,  5.57 ops/s]


Quantizing using linear quantization
Quantizing layer 5 of type innerProduct
Quantizing using kmeans quantization
Optimizing Neural Network before Quantization:
Finished optimizing network. Quantizing neural network..
Quantizing layer 5 of type innerProduct


  ).fit(wf)
Converting PyTorch Frontend ==> MIL Ops:  50%|▌| 1/2 [00:00<00:00, 394.46 ops/s]
Running MIL frontend_pytorch pipeline: 100%|█| 5/5 [00:00<00:00, 56223.91 passes
Running MIL default pipeline: 100%|██████| 56/56 [00:00<00:00, 4127.02 passes/s]
Running MIL backend_neuralnetwork pipeline: 100%|█| 8/8 [00:00<00:00, 28581.29 p
Translating MIL ==> NeuralNetwork Ops: 100%|████| 3/3 [00:02<00:00,  1.42 ops/s]


Quantizing using linear quantization
Quantizing layer 5 of type innerProduct
Quantizing using kmeans quantization
Optimizing Neural Network before Quantization:
Finished optimizing network. Quantizing neural network..
Quantizing layer 5 of type innerProduct


  ).fit(wf)
Converting PyTorch Frontend ==> MIL Ops:  50%|▌| 1/2 [00:00<00:00, 207.94 ops/s]
Running MIL frontend_pytorch pipeline: 100%|█| 5/5 [00:00<00:00, 57456.22 passes
Running MIL default pipeline: 100%|██████| 56/56 [00:00<00:00, 2636.30 passes/s]
Running MIL backend_neuralnetwork pipeline: 100%|█| 8/8 [00:00<00:00, 60025.82 p
Translating MIL ==> NeuralNetwork Ops: 100%|████| 3/3 [00:08<00:00,  2.84s/ ops]


Quantizing using linear quantization
Quantizing layer 5 of type innerProduct
Quantizing using kmeans quantization
Optimizing Neural Network before Quantization:
Finished optimizing network. Quantizing neural network..
Quantizing layer 5 of type innerProduct


  ).fit(wf)
Converting PyTorch Frontend ==> MIL Ops:  98%|▉| 63/64 [00:00<00:00, 445.91 ops/
Running MIL frontend_pytorch pipeline: 100%|█| 5/5 [00:00<00:00, 7135.60 passes/
Running MIL default pipeline: 100%|███████| 56/56 [00:00<00:00, 108.32 passes/s]
Running MIL backend_neuralnetwork pipeline: 100%|█| 8/8 [00:00<00:00, 10362.70 p
Translating MIL ==> NeuralNetwork Ops: 100%|██| 96/96 [04:44<00:00,  2.96s/ ops]
