Change the EdgeTapor to a simpler version.

In [8]:
import os
import numpy as np
import torch
from torch.utils.data import Dataset
import cv2
import random
import pickle   
from tqdm import tqdm
import torch.nn as nn
import torch
from thop import profile
from torchinfo import summary

In [9]:
# trained model weights
trained_model_weight = None

trained_model_weight = 'NanoTapor_files/NanoTapor_model_weights/44_20243715330_tm2_adaptor.pth'

if not os.path.exists(trained_model_weight):
    print('Model weights found')
    trained_model_weight = None

In [10]:
saved_path = 'NanoTapor_files/'
if not os.path.exists(saved_path):
    os.makedirs(saved_path)


# k=8 # the final edge model size is: unsigned int converted_model_tflite_len = 124288;  params: 115791.00 FLOPs: 715608.00
# last_conv_channel = 48  #should bigger than 64, when exteme_light=True, last_conv_channel doesn't work
# # RAM:   [=====     ]  46.3% (used 151828 bytes from 327680 bytes)
# # Flash: [=         ]  13.8% (used 460853 bytes from 3342336 bytes)
# # Inference time delay (millis): 371.000000

####### finally used ################
k=16 # the final edge model size is: unsigned int converted_model_tflite_len = 165048;  params: 156479.00 FLOPs: 647856.00
last_conv_channel = 32  #should bigger than 64, when exteme_light=True, last_conv_channel doesn't work
# RAM:   [======    ]  58.8% (used 192596 bytes from 327680 bytes)
# Flash: [==        ]  15.0% (used 501621 bytes from 3342336 bytes)
# Inference time delay (millis): 313.000000

# k=16 # the final edge model size is: unsigned int converted_model_tflite_len = 198680;  params: 189895.00 FLOPs: 735408.00
# last_conv_channel = 40  #should bigger than 64, when exteme_light=True, last_conv_channel doesn't work
# RAM:   [=======   ]  69.0% (used 226228 bytes from 327680 bytes)
# Flash: [==        ]  16.0% (used 535253 bytes from 3342336 bytes)
# Inference time delay (millis): 347.000000

# k=12 # the final edge model size is: unsigned int converted_model_tflite_len = 178296;  params: 169551.00 FLOPs: 769284.00
# last_conv_channel = 48  #should bigger than 64, when exteme_light=True, last_conv_channel doesn't work
# # RAM:   [======    ]  62.8% (used 205844 bytes from 327680 bytes)
# # Flash: [==        ]  15.4% (used 514869 bytes from 3342336 bytes)
# # Inference time delay (millis): 376.000000

# k=16 # the final edge model size is: unsigned int converted_model_tflite_len = 232312;  params: 223311.00 FLOPs: 822960.00
# last_conv_channel = 48  #should bigger than 64, when exteme_light=True, last_conv_channel doesn't work
# # RAM:   [========  ]  79.3% (used 259860 bytes from 327680 bytes)
# # Flash: [==        ]  17.0% (used 568885 bytes from 3342336 bytes)
# # Inference time delay (millis): 381.000000

# k=16 # the final edge model size is: unsigned int converted_model_tflite_len = 299576;  params: 290143.00 FLOPs: 998064.00
# last_conv_channel = 64  #should bigger than 64, when exteme_light=True, last_conv_channel doesn't work
# # region `dram0_0_seg' overflowed by 18472 bytes

feat_dim = 21*k   # this parameter to control the size of the tapor edge model
model_save_path = "NanoTapor_files/testing_edge_strudent_pytorch.pth"  # pytorch model saved path
onnx_saved_path = "NanoTapor_files/testing_edge_tapor_v12.onnx"

In [11]:

class EdgeTaporStudent(nn.Module):
    def __init__(self, feat_dim =21*48, last_conv_channel=128):
        super(EdgeTaporStudent, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 8, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(8, 16, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(16, last_conv_channel, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Flatten(),
            nn.Linear(last_conv_channel*3*4, feat_dim),
            nn.ReLU(),
        )
        self.decoder = nn.Sequential(
            nn.Linear(feat_dim, 21*3)
        )
    
    def forward(self, thermal_map):
        feat = self.encoder(thermal_map)
        pose = self.decoder(feat)
        return pose, feat

x = torch.randn(1, 1, 24, 32)
edge_tapor_strudent = EdgeTaporStudent(feat_dim, last_conv_channel)
a, b = edge_tapor_strudent(x)
print(a.shape)
print(b.shape)

# random intit a model and save it for code testing
torch.save(edge_tapor_strudent.state_dict(), model_save_path)

flops, params = profile(edge_tapor_strudent, inputs=(x,), verbose=False)
print("params: {:.2f}".format(params))
print("FLOPs: {:.2f}".format(flops))

summary(edge_tapor_strudent, input_size=(1, 1, 24, 32))

torch.Size([1, 63])
torch.Size([1, 336])
params: 156479.00
FLOPs: 647856.00


Layer (type:depth-idx)                   Output Shape              Param #
EdgeTaporStudent                         [1, 63]                   --
├─Sequential: 1-1                        [1, 336]                  --
│    └─Conv2d: 2-1                       [1, 8, 24, 32]            80
│    └─ReLU: 2-2                         [1, 8, 24, 32]            --
│    └─MaxPool2d: 2-3                    [1, 8, 12, 16]            --
│    └─Conv2d: 2-4                       [1, 16, 12, 16]           1,168
│    └─ReLU: 2-5                         [1, 16, 12, 16]           --
│    └─MaxPool2d: 2-6                    [1, 16, 6, 8]             --
│    └─Conv2d: 2-7                       [1, 32, 6, 8]             4,640
│    └─ReLU: 2-8                         [1, 32, 6, 8]             --
│    └─MaxPool2d: 2-9                    [1, 32, 3, 4]             --
│    └─Flatten: 2-10                     [1, 384]                  --
│    └─Linear: 2-11                      [1, 336]                  129,360
│   

In [12]:
# we delete the feat output for the version that deploy on edge device
class EdgeTapor(nn.Module):
    def __init__(self, feat_dim =21*48,last_conv_channel=128):
        super(EdgeTapor, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 8, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(8, 16, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(16, last_conv_channel, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Flatten(),
            nn.Linear(last_conv_channel*3*4, feat_dim),
            nn.ReLU(),
        )
        self.decoder = nn.Sequential(
            nn.Linear(feat_dim, 21*3)
        )
    
    def forward(self, thermal_map):
        feat = self.encoder(thermal_map)
        pose = self.decoder(feat)
        return pose


# transfor edge student model to the edge model, just for code testing
edge_tapor = EdgeTapor(feat_dim, last_conv_channel)

if trained_model_weight is not None:
    edge_tapor.load_state_dict(torch.load(trained_model_weight))
    print('Load trained model weight')
else:
    edge_tapor.load_state_dict(torch.load(model_save_path))
edge_tapor.eval()

Load trained model weight


EdgeTapor(
  (encoder): Sequential(
    (0): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (9): Flatten(start_dim=1, end_dim=-1)
    (10): Linear(in_features=384, out_features=336, bias=True)
    (11): ReLU()
  )
  (decoder): Sequential(
    (0): Linear(in_features=336, out_features=63, bias=True)
  )
)

In [13]:
torch_input = torch.randn(1, 1, 24, 32)
input_names = [ "thermal_map" ]
output_names = [ "pose" ]
torch.onnx.export(edge_tapor, torch_input, onnx_saved_path, verbose=True, input_names=input_names, output_names=output_names, opset_version=12)

# related materials online:
# https://github.com/onnx/tutorials/blob/main/tutorials/VersionConversion.md
# https://onnxruntime.ai/docs/reference/compatibility.html


# check the generated onnx model
##################################

import onnx
import numpy as np
import onnxruntime as ort
# Load the ONNX model
model = onnx.load(onnx_saved_path)
onnx.checker.check_model(model)
onnx.helper.printable_graph(model.graph)

ort_session = ort.InferenceSession(onnx_saved_path)
channels =1
height = 24
width = 32
batch_size = 1
outputs = ort_session.run(
    None,
    {'thermal_map': np.random.randn(batch_size, channels, height, width).astype(np.float32)}
)


Exported graph: graph(%thermal_map : Float(1, 1, 24, 32, strides=[768, 768, 32, 1], requires_grad=0, device=cpu),
      %encoder.0.weight : Float(8, 1, 3, 3, strides=[9, 9, 3, 1], requires_grad=1, device=cpu),
      %encoder.0.bias : Float(8, strides=[1], requires_grad=1, device=cpu),
      %encoder.3.weight : Float(16, 8, 3, 3, strides=[72, 9, 3, 1], requires_grad=1, device=cpu),
      %encoder.3.bias : Float(16, strides=[1], requires_grad=1, device=cpu),
      %encoder.6.weight : Float(32, 16, 3, 3, strides=[144, 9, 3, 1], requires_grad=1, device=cpu),
      %encoder.6.bias : Float(32, strides=[1], requires_grad=1, device=cpu),
      %encoder.10.weight : Float(336, 384, strides=[384, 1], requires_grad=1, device=cpu),
      %encoder.10.bias : Float(336, strides=[1], requires_grad=1, device=cpu),
      %decoder.0.weight : Float(63, 336, strides=[336, 1], requires_grad=1, device=cpu),
      %decoder.0.bias : Float(63, strides=[1], requires_grad=1, device=cpu)):
  %/encoder/encoder.0/Con