In [1]:
!pip install 'h5py==2.10.0' --force-reinstall
!pip install tensorflow-gpu==1.15

!curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -

!echo "deb https://packages.cloud.google.com/apt coral-edgetpu-stable main" | sudo tee /etc/apt/sources.list.d/coral-edgetpu.list

!sudo apt-get update

!sudo apt-get install edgetpu-compiler


Collecting h5py==2.10.0
  Downloading h5py-2.10.0-cp37-cp37m-manylinux1_x86_64.whl (2.9 MB)
[K     |████████████████████████████████| 2.9 MB 7.7 MB/s 
[?25hCollecting numpy>=1.7
  Downloading numpy-1.21.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (15.7 MB)
[K     |████████████████████████████████| 15.7 MB 75 kB/s 
[?25hCollecting six
  Downloading six-1.16.0-py2.py3-none-any.whl (11 kB)
Installing collected packages: six, numpy, h5py
  Attempting uninstall: six
    Found existing installation: six 1.15.0
    Uninstalling six-1.15.0:
      Successfully uninstalled six-1.15.0
  Attempting uninstall: numpy
    Found existing installation: numpy 1.19.5
    Uninstalling numpy-1.19.5:
      Successfully uninstalled numpy-1.19.5
  Attempting uninstall: h5py
    Found existing installation: h5py 3.1.0
    Uninstalling h5py-3.1.0:
      Successfully uninstalled h5py-3.1.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are ins

Collecting tensorflow-gpu==1.15
  Downloading tensorflow_gpu-1.15.0-cp37-cp37m-manylinux2010_x86_64.whl (411.5 MB)
[K     |████████████████████████████████| 411.5 MB 8.0 kB/s 
Collecting keras-applications>=1.0.8
  Downloading Keras_Applications-1.0.8-py3-none-any.whl (50 kB)
[K     |████████████████████████████████| 50 kB 7.4 MB/s 
Collecting gast==0.2.2
  Downloading gast-0.2.2.tar.gz (10 kB)
Collecting tensorboard<1.16.0,>=1.15.0
  Downloading tensorboard-1.15.0-py3-none-any.whl (3.8 MB)
[K     |████████████████████████████████| 3.8 MB 39.7 MB/s 
[?25hCollecting tensorflow-estimator==1.15.1
  Downloading tensorflow_estimator-1.15.1-py2.py3-none-any.whl (503 kB)
[K     |████████████████████████████████| 503 kB 43.9 MB/s 
Building wheels for collected packages: gast
  Building wheel for gast (setup.py) ... [?25l[?25hdone
  Created wheel for gast: filename=gast-0.2.2-py3-none-any.whl size=7553 sha256=2e35864943aac6c2d6ccbf8c1b5ab6d22319f11a60dc910f460fae7e0be00775
  Stored in di

In [15]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import DenseNet121, DenseNet169, DenseNet201, \
  InceptionResNetV2, InceptionV3, MobileNet, MobileNetV2, NASNetLarge, NASNetMobile,\
  ResNet101, ResNet101V2, ResNet152, ResNet152V2, ResNet50, ResNet50V2, \
  VGG16, VGG19, Xception 
import os
from functools import partial

In [14]:
def representative_data_gen(input_shape):
  for i in range(100):
    yield [np.random.random((1,)+input_shape).astype(np.float32)]

def testEdgeTPUCompilation(modelGenerationFunction, fn="test"):
  # Load model and save as h5 file
  m = modelGenerationFunction()

  if fn is None:
    fn = m.name

  print(f"Model name: {fn}")

  input_shape = m.layers[0].input_shape[0][1:]
  print(f"Input shape: ", input_shape)
  m.save(fn+".h5")

  rdg = partial(representative_data_gen, input_shape=input_shape)

  # Compile to TFLITE
  converter = tf.lite.TFLiteConverter.from_keras_model_file(fn+".h5")
  converter.optimizations = [tf.lite.Optimize.DEFAULT]
  converter.representative_dataset = rdg
  converter.inference_input_type = tf.uint8
  converter.inference_output_type = tf.uint8
  tflite_model = converter.convert()

  # Save the model.
  with open(fn+".tflite", 'wb') as f:
    f.write(tflite_model)

  # Compile to Edge TPU
  edge_log = os.popen(f"edgetpu_compiler {fn}.tflite").read()
  
  with open(f"{fn}_edgetpu_compilation.log","w+") as fp:
    fp.write(edge_log)
    print(edge_log)

  # Load log
  with open(f"{fn}_edgetpu.log") as fp:
    log = fp.read()
    print(log)

%time testEdgeTPUCompilation(MobileNetV2, None)

Model name: mobilenetv2_1.00_224
Input shape
INFO:tensorflow:Froze 262 variables.
INFO:tensorflow:Converted 262 variables to const ops.
Edge TPU Compiler version 16.0.384591198
Started a compilation timeout timer of 180 seconds.

Model compiled successfully in 1297 ms.

Input model: mobilenetv2_1.00_224.tflite
Input size: 3.80MiB
Output model: mobilenetv2_1.00_224_edgetpu.tflite
Output size: 4.06MiB
On-chip memory used for caching model parameters: 3.88MiB
On-chip memory remaining for caching model parameters: 3.81MiB
Off-chip memory used for streaming uncached model parameters: 0.00B
Number of Edge TPU subgraphs: 1
Total number of operations: 72
Operation log: mobilenetv2_1.00_224_edgetpu.log
See the operation log file for individual operation details.
Compilation child process completed within timeout period.
Compilation succeeded! 

Edge TPU Compiler version 16.0.384591198
Input: mobilenetv2_1.00_224.tflite
Output: mobilenetv2_1.00_224_edgetpu.tflite

Operator                       

In [16]:
%time testEdgeTPUCompilation(MobileNet, None)

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.6/mobilenet_1_0_224_tf.h5
Model name: mobilenet_1.00_224
Input shape
INFO:tensorflow:Froze 137 variables.
INFO:tensorflow:Converted 137 variables to const ops.
Edge TPU Compiler version 16.0.384591198
Started a compilation timeout timer of 180 seconds.

Model compiled successfully in 1170 ms.

Input model: mobilenet_1.00_224.tflite
Input size: 4.35MiB
Output model: mobilenet_1.00_224_edgetpu.tflite
Output size: 4.46MiB
On-chip memory used for caching model parameters: 4.30MiB
On-chip memory remaining for caching model parameters: 3.40MiB
Off-chip memory used for streaming uncached model parameters: 0.00B
Number of Edge TPU subgraphs: 1
Total number of operations: 39
Operation log: mobilenet_1.00_224_edgetpu.log
See the operation log file for individual operation details.
Compilation child process completed within timeout period.
Compilation succeeded! 

Edge TPU Compiler version 16.0.384591198
I

In [17]:
%time testEdgeTPUCompilation(DenseNet121, None)

Model name: densenet121
Input shape
INFO:tensorflow:Froze 606 variables.
INFO:tensorflow:Converted 606 variables to const ops.
Edge TPU Compiler version 16.0.384591198
Started a compilation timeout timer of 180 seconds.

Model compiled successfully in 4392 ms.

Input model: densenet121.tflite
Input size: 7.95MiB
Output model: densenet121_edgetpu.tflite
Output size: 9.34MiB
On-chip memory used for caching model parameters: 7.04MiB
On-chip memory remaining for caching model parameters: 0.00B
Off-chip memory used for streaming uncached model parameters: 2.98MiB
Number of Edge TPU subgraphs: 1
Total number of operations: 384
Operation log: densenet121_edgetpu.log
See the operation log file for individual operation details.
Compilation child process completed within timeout period.
Compilation succeeded! 

Edge TPU Compiler version 16.0.384591198
Input: densenet121.tflite
Output: densenet121_edgetpu.tflite

Operator                       Count      Status

MAX_POOL_2D                    1  

In [18]:
%time testEdgeTPUCompilation(DenseNet169, None)

Downloading data from https://github.com/keras-team/keras-applications/releases/download/densenet/densenet169_weights_tf_dim_ordering_tf_kernels.h5
Model name: densenet169
Input shape
INFO:tensorflow:Froze 846 variables.
INFO:tensorflow:Converted 846 variables to const ops.
Edge TPU Compiler version 16.0.384591198
Started a compilation timeout timer of 180 seconds.

Model compiled successfully in 7514 ms.

Input model: densenet169.tflite
Input size: 13.97MiB
Output model: densenet169_edgetpu.tflite
Output size: 16.51MiB
On-chip memory used for caching model parameters: 7.04MiB
On-chip memory remaining for caching model parameters: 1.50KiB
Off-chip memory used for streaming uncached model parameters: 8.59MiB
Number of Edge TPU subgraphs: 1
Total number of operations: 533
Operation log: densenet169_edgetpu.log
See the operation log file for individual operation details.
Compilation child process completed within timeout period.
Compilation succeeded! 

Edge TPU Compiler version 16.0.3845

In [19]:
%time testEdgeTPUCompilation(DenseNet201, None)

Downloading data from https://github.com/keras-team/keras-applications/releases/download/densenet/densenet201_weights_tf_dim_ordering_tf_kernels.h5
Model name: densenet201
Input shape
INFO:tensorflow:Froze 1006 variables.
INFO:tensorflow:Converted 1006 variables to const ops.
Edge TPU Compiler version 16.0.384591198
Started a compilation timeout timer of 180 seconds.

Model compiled successfully in 9801 ms.

Input model: densenet201.tflite
Input size: 19.65MiB
Output model: densenet201_edgetpu.tflite
Output size: 23.27MiB
On-chip memory used for caching model parameters: 7.04MiB
On-chip memory remaining for caching model parameters: 2.00KiB
Off-chip memory used for streaming uncached model parameters: 15.17MiB
Number of Edge TPU subgraphs: 1
Total number of operations: 622
Operation log: densenet201_edgetpu.log
See the operation log file for individual operation details.
Compilation child process completed within timeout period.
Compilation succeeded! 

Edge TPU Compiler version 16.0.3

In [20]:
%time testEdgeTPUCompilation(InceptionResNetV2, None)

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.7/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels.h5
Model name: inception_resnet_v2
Input shape
INFO:tensorflow:Froze 898 variables.
INFO:tensorflow:Converted 898 variables to const ops.
Edge TPU Compiler version 16.0.384591198
Started a compilation timeout timer of 180 seconds.

Model compiled successfully in 12451 ms.

Input model: inception_resnet_v2.tflite
Input size: 55.31MiB
Output model: inception_resnet_v2_edgetpu.tflite
Output size: 55.86MiB
On-chip memory used for caching model parameters: 5.52MiB
On-chip memory remaining for caching model parameters: 2.50KiB
Off-chip memory used for streaming uncached model parameters: 49.61MiB
Number of Edge TPU subgraphs: 1
Total number of operations: 395
Operation log: inception_resnet_v2_edgetpu.log
See the operation log file for individual operation details.
Compilation child process completed within timeout period.
Compilation succeeded!

In [21]:
%time testEdgeTPUCompilation(InceptionV3, None)

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.5/inception_v3_weights_tf_dim_ordering_tf_kernels.h5
Model name: inception_v3
Input shape
INFO:tensorflow:Froze 378 variables.
INFO:tensorflow:Converted 378 variables to const ops.
Edge TPU Compiler version 16.0.384591198
Started a compilation timeout timer of 180 seconds.

Model compiled successfully in 5223 ms.

Input model: inception_v3.tflite
Input size: 23.21MiB
Output model: inception_v3_edgetpu.tflite
Output size: 23.99MiB
On-chip memory used for caching model parameters: 5.53MiB
On-chip memory remaining for caching model parameters: 256.00B
Off-chip memory used for streaming uncached model parameters: 17.97MiB
Number of Edge TPU subgraphs: 1
Total number of operations: 162
Operation log: inception_v3_edgetpu.log
See the operation log file for individual operation details.
Compilation child process completed within timeout period.
Compilation succeeded! 

Edge TPU Compiler version 16.0.38

In [22]:
%time testEdgeTPUCompilation(NASNetLarge, None)

Downloading data from https://github.com/titu1994/Keras-NASNet/releases/download/v1.2/NASNet-large.h5
Model name: NASNet
Input shape
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
INFO:tensorflow:Froze 1546 variables.
INFO:tensorflow:Converted 1546 variables to const ops.
Edge TPU Compiler version 16.0.384591198
Started a compilation timeout timer of 180 seconds.

Model compiled successfully in 558 ms.

Input model: NASNet.tflite
Input size: 88.37MiB
Output model: NASNet_edgetpu.tflite
Output size: 88.43MiB
On-chip memory used for caching model parameters: 6.00KiB
On-chip memory remaining for caching model parameters: 7.67MiB
Off-chip memory used for streaming uncached model parameters: 0.00B
Number of Edge TPU subgraphs: 1
Total number of operations: 1333
Operation log: NASNet_edgetpu.log

Model successfully compiled but not all operations are supported by the Edge TPU. A percentage of the model will instead run on

In [23]:
%time testEdgeTPUCompilation(NASNetMobile, None)

Downloading data from https://github.com/titu1994/Keras-NASNet/releases/download/v1.2/NASNet-mobile.h5
Model name: NASNet
Input shape
INFO:tensorflow:Froze 1126 variables.
INFO:tensorflow:Converted 1126 variables to const ops.
Edge TPU Compiler version 16.0.384591198
Started a compilation timeout timer of 180 seconds.

Model compiled successfully in 98 ms.

Input model: NASNet.tflite
Input size: 5.94MiB
Output model: NASNet_edgetpu.tflite
Output size: 5.96MiB
On-chip memory used for caching model parameters: 3.00KiB
On-chip memory remaining for caching model parameters: 7.90MiB
Off-chip memory used for streaming uncached model parameters: 0.00B
Number of Edge TPU subgraphs: 1
Total number of operations: 991
Operation log: NASNet_edgetpu.log

Model successfully compiled but not all operations are supported by the Edge TPU. A percentage of the model will instead run on the CPU, which is slower. If possible, consider updating your model to use only operations supported by the Edge TPU. Fo

In [24]:
%time testEdgeTPUCompilation(ResNet101, None)

Downloading data from https://github.com/keras-team/keras-applications/releases/download/resnet/resnet101_weights_tf_dim_ordering_tf_kernels.h5
Model name: resnet101
Input shape
INFO:tensorflow:Froze 626 variables.
INFO:tensorflow:Converted 626 variables to const ops.
Edge TPU Compiler version 16.0.384591198
Started a compilation timeout timer of 180 seconds.

Model compiled successfully in 9061 ms.

Input model: resnet101.tflite
Input size: 43.85MiB
Output model: resnet101_edgetpu.tflite
Output size: 43.43MiB
On-chip memory used for caching model parameters: 7.14MiB
On-chip memory remaining for caching model parameters: 512.00B
Off-chip memory used for streaming uncached model parameters: 35.90MiB
Number of Edge TPU subgraphs: 1
Total number of operations: 145
Operation log: resnet101_edgetpu.log
See the operation log file for individual operation details.
Compilation child process completed within timeout period.
Compilation succeeded! 

Edge TPU Compiler version 16.0.384591198
Input

In [25]:
%time testEdgeTPUCompilation(ResNet101V2, None)

Downloading data from https://github.com/keras-team/keras-applications/releases/download/resnet/resnet101v2_weights_tf_dim_ordering_tf_kernels.h5
Model name: resnet101v2
Input shape
INFO:tensorflow:Froze 544 variables.
INFO:tensorflow:Converted 544 variables to const ops.
Edge TPU Compiler version 16.0.384591198
Started a compilation timeout timer of 180 seconds.

Model compiled successfully in 9854 ms.

Input model: resnet101v2.tflite
Input size: 43.94MiB
Output model: resnet101v2_edgetpu.tflite
Output size: 44.43MiB
On-chip memory used for caching model parameters: 6.60MiB
On-chip memory remaining for caching model parameters: 1.00KiB
Off-chip memory used for streaming uncached model parameters: 37.36MiB
Number of Edge TPU subgraphs: 1
Total number of operations: 249
Operation log: resnet101v2_edgetpu.log
See the operation log file for individual operation details.
Compilation child process completed within timeout period.
Compilation succeeded! 

Edge TPU Compiler version 16.0.38459

In [26]:
%time testEdgeTPUCompilation(ResNet152, None)

Downloading data from https://github.com/keras-team/keras-applications/releases/download/resnet/resnet152_weights_tf_dim_ordering_tf_kernels.h5
Model name: resnet152
Input shape
INFO:tensorflow:Froze 932 variables.
INFO:tensorflow:Converted 932 variables to const ops.
Edge TPU Compiler version 16.0.384591198
Started a compilation timeout timer of 180 seconds.

Model compiled successfully in 12408 ms.

Input model: resnet152.tflite
Input size: 59.36MiB
Output model: resnet152_edgetpu.tflite
Output size: 58.71MiB
On-chip memory used for caching model parameters: 7.14MiB
On-chip memory remaining for caching model parameters: 512.00B
Off-chip memory used for streaming uncached model parameters: 51.04MiB
Number of Edge TPU subgraphs: 1
Total number of operations: 213
Operation log: resnet152_edgetpu.log
See the operation log file for individual operation details.
Compilation child process completed within timeout period.
Compilation succeeded! 

Edge TPU Compiler version 16.0.384591198
Inpu

In [27]:
%time testEdgeTPUCompilation(ResNet152V2, None)

Downloading data from https://github.com/keras-team/keras-applications/releases/download/resnet/resnet152v2_weights_tf_dim_ordering_tf_kernels.h5
Model name: resnet152v2
Input shape
INFO:tensorflow:Froze 816 variables.
INFO:tensorflow:Converted 816 variables to const ops.
Edge TPU Compiler version 16.0.384591198
Started a compilation timeout timer of 180 seconds.

Model compiled successfully in 13632 ms.

Input model: resnet152v2.tflite
Input size: 59.50MiB
Output model: resnet152v2_edgetpu.tflite
Output size: 60.20MiB
On-chip memory used for caching model parameters: 6.60MiB
On-chip memory remaining for caching model parameters: 1.00KiB
Off-chip memory used for streaming uncached model parameters: 52.94MiB
Number of Edge TPU subgraphs: 1
Total number of operations: 368
Operation log: resnet152v2_edgetpu.log
See the operation log file for individual operation details.
Compilation child process completed within timeout period.
Compilation succeeded! 

Edge TPU Compiler version 16.0.3845

In [28]:
%time testEdgeTPUCompilation(ResNet50, None)

Downloading data from https://github.com/keras-team/keras-applications/releases/download/resnet/resnet50_weights_tf_dim_ordering_tf_kernels.h5
Model name: resnet50
Input shape
INFO:tensorflow:Froze 320 variables.
INFO:tensorflow:Converted 320 variables to const ops.
Edge TPU Compiler version 16.0.384591198
Started a compilation timeout timer of 180 seconds.

Model compiled successfully in 5162 ms.

Input model: resnet50.tflite
Input size: 25.06MiB
Output model: resnet50_edgetpu.tflite
Output size: 24.91MiB
On-chip memory used for caching model parameters: 7.14MiB
On-chip memory remaining for caching model parameters: 512.00B
Off-chip memory used for streaming uncached model parameters: 17.54MiB
Number of Edge TPU subgraphs: 1
Total number of operations: 77
Operation log: resnet50_edgetpu.log
See the operation log file for individual operation details.
Compilation child process completed within timeout period.
Compilation succeeded! 

Edge TPU Compiler version 16.0.384591198
Input: resn

In [29]:
%time testEdgeTPUCompilation(ResNet50V2, None)

Downloading data from https://github.com/keras-team/keras-applications/releases/download/resnet/resnet50v2_weights_tf_dim_ordering_tf_kernels.h5
Model name: resnet50v2
Input shape
INFO:tensorflow:Froze 272 variables.
INFO:tensorflow:Converted 272 variables to const ops.
Edge TPU Compiler version 16.0.384591198
Started a compilation timeout timer of 180 seconds.

Model compiled successfully in 5537 ms.

Input model: resnet50v2.tflite
Input size: 25.11MiB
Output model: resnet50v2_edgetpu.tflite
Output size: 25.37MiB
On-chip memory used for caching model parameters: 6.60MiB
On-chip memory remaining for caching model parameters: 1.00KiB
Off-chip memory used for streaming uncached model parameters: 18.49MiB
Number of Edge TPU subgraphs: 1
Total number of operations: 130
Operation log: resnet50v2_edgetpu.log
See the operation log file for individual operation details.
Compilation child process completed within timeout period.
Compilation succeeded! 

Edge TPU Compiler version 16.0.384591198


In [30]:
%time testEdgeTPUCompilation(VGG16, None)

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels.h5
Model name: vgg16
Input shape
INFO:tensorflow:Froze 32 variables.
INFO:tensorflow:Converted 32 variables to const ops.
Edge TPU Compiler version 16.0.384591198
Started a compilation timeout timer of 180 seconds.

Model compiled successfully in 23654 ms.

Input model: vgg16.tflite
Input size: 132.09MiB
Output model: vgg16_edgetpu.tflite
Output size: 132.32MiB
On-chip memory used for caching model parameters: 5.33MiB
On-chip memory remaining for caching model parameters: 170.25KiB
Off-chip memory used for streaming uncached model parameters: 126.78MiB
Number of Edge TPU subgraphs: 1
Total number of operations: 24
Operation log: vgg16_edgetpu.log
See the operation log file for individual operation details.
Compilation child process completed within timeout period.
Compilation succeeded! 

Edge TPU Compiler version 16.0.384591198
Input: vgg16.tflite
Outp

In [31]:
%time testEdgeTPUCompilation(VGG19, None)

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg19_weights_tf_dim_ordering_tf_kernels.h5
Model name: vgg19
Input shape
INFO:tensorflow:Froze 38 variables.
INFO:tensorflow:Converted 38 variables to const ops.
Edge TPU Compiler version 16.0.384591198
Started a compilation timeout timer of 180 seconds.

Model compiled successfully in 24589 ms.

Input model: vgg19.tflite
Input size: 137.19MiB
Output model: vgg19_edgetpu.tflite
Output size: 137.41MiB
On-chip memory used for caching model parameters: 5.33MiB
On-chip memory remaining for caching model parameters: 170.25KiB
Off-chip memory used for streaming uncached model parameters: 131.86MiB
Number of Edge TPU subgraphs: 1
Total number of operations: 27
Operation log: vgg19_edgetpu.log
See the operation log file for individual operation details.
Compilation child process completed within timeout period.
Compilation succeeded! 

Edge TPU Compiler version 16.0.384591198
Input: vgg19.tflite
Outp

In [32]:
%time testEdgeTPUCompilation(Xception, None)

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.4/xception_weights_tf_dim_ordering_tf_kernels.h5
Model name: xception
Input shape
INFO:tensorflow:Froze 236 variables.
INFO:tensorflow:Converted 236 variables to const ops.
Edge TPU Compiler version 16.0.384591198
Started a compilation timeout timer of 180 seconds.

Model compiled successfully in 534 ms.

Input model: xception.tflite
Input size: 22.84MiB
Output model: xception_edgetpu.tflite
Output size: 23.10MiB
On-chip memory used for caching model parameters: 101.50KiB
On-chip memory remaining for caching model parameters: 6.45MiB
Off-chip memory used for streaming uncached model parameters: 0.00B
Number of Edge TPU subgraphs: 1
Total number of operations: 128
Operation log: xception_edgetpu.log

Model successfully compiled but not all operations are supported by the Edge TPU. A percentage of the model will instead run on the CPU, which is slower. If possible, consider updating your model to 