In [2]:
import os
import cv2
import uuid
import numpy as np

# Setting up Convert to JPG helper function
Adapted from https://github.com/mateusz-michalik/cr2-to-jpg/blob/master/cr2-to-jpg.py

In [3]:
# CR2 to jpg function
import glob
import time
import argparse
from PIL import Image
from rawkit import raw

def convert_cr2_to_jpg(raw_image, store_path):
    # file vars
    file_name = os.path.basename(raw_image)
    file_without_ext = os.path.splitext(file_name)[0]
    file_timestamp = os.path.getmtime(raw_image)
    # parse CR2 image
    raw_image_process = raw.Raw(raw_image)
    buffered_image = np.array(raw_image_process.to_buffer())
    # check orientation due to PIL image stretch issue
    if raw_image_process.metadata.orientation == 0:
        jpg_image_height = raw_image_process.metadata.height
        jpg_image_width = raw_image_process.metadata.width
    else:
        jpg_image_height = raw_image_process.metadata.width
        jpg_image_width = raw_image_process.metadata.height
    # prep JPG details
    #jpg_image_location = store_path + file_without_ext + '.jpg'
    jpg_image_location = os.path.join(store_path, file_without_ext + '.jpg')
    jpg_image = Image.frombytes('RGB', (jpg_image_width, jpg_image_height), buffered_image)
    jpg_image.save(jpg_image_location, format="jpeg")

    # update JPG file timestamp to match CR2
    os.utime(jpg_image_location, (file_timestamp,file_timestamp))

    # close to prevent too many open files error
    jpg_image.close()
    raw_image_process.close()

# Manipulating Pill Directory CSV
Translating PD Dataframe into a Dict of NDCs to their image URLs and types

In [9]:
import pandas as pd

image_data = pd.read_csv('directory_of_images.csv', sep='|', converters={'NDC': str})
ndc_to_url = {}
NDCs = []
for i in range(len(image_data)):
    NDC = image_data.iloc[i]['NDC']
    if not ndc_to_url.get(NDC):
        # cap size at 400
        if len(NDCs) == 400:
            break
        NDCs.append(NDC)
        ndc_to_url[NDC] = []
    url = image_data.iloc[i]['url']
    image_type = image_data.iloc[i]['type']
    ndc_to_url[NDC].append((url, image_type))
    


# Gathering Pill Images
Multithreaded fetch of pill images, convert .cr2 images to .jpg

In [5]:
import threading
import urllib.request as request
FETCH_BASE = 'https://data.lhncbc.nlm.nih.gov/public/Pills/'
IMAGES_DIR = "pills"

cr2_images = []

def fetch_pills(start, end):
    for i in range(start, end + 1):
        NDC = NDCs[i]
        images = ndc_to_url[NDC]
        # image is tuple of (URL, TYPE)
        index = 0
        for image in images:
            url = image[0]
            image_type = image[1]
            binary_type = "train" if "reference" in image_type.lower() else "test"
            file_ext = url.split('.')[1]
            if file_ext.lower() == 'wmv':
                continue
            # path will be pills/<NDC>/<type>/
            path = os.path.join(IMAGES_DIR, NDC, binary_type)
            if not os.path.exists(path):
                !mkdir -p {path}
            
            file = os.path.join(path, str(index) + "." + file_ext)
            response = request.urlretrieve(FETCH_BASE + url, file)
            if file_ext.lower() == "cr2":
                cr2_images.append((file, path))
            index += 1
            
threads = []
start = 0
end = 9
# 40 threads, this will tank your internet..
while end < 400:
    threads.append(threading.Thread(target=fetch_pills, args=(start,end)))
    start += 10
    end += 10

for thread in threads:
    #thread.start()
for thread in threads:
    #thread.join()


In [6]:
for cr2_image in cr2_images:
    file = cr2_image[0]
    path = cr2_image[1]
    convert_cr2_to_jpg(file, path)
    os.remove(file)

# Image Preprocessing
Cropping, saliency detection

In [193]:
import cv2
# Generate saliency map, then use threshold method to get a nice bound to crop from
def crop_train_img(image):
    cropped_image = image[:][:1500]
    saliency = cv2.saliency.StaticSaliencyFineGrained_create()
    (success, saliencyMap) = saliency.computeSaliency(cropped_image)
    threshMap = cv2.threshold(saliencyMap.astype("uint8"), 0, 255,
        cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
    x_max = -1
    x_min = 100000
    y_max = -1
    y_min = 100000
    thresh = 0.5

    for y in range(len(saliencyMap)):
        for x in range(len(saliencyMap[y])):
            if saliencyMap[y][x] > thresh:
                x_max = max(x_max, x)
                x_min = min(x_min, x)
                y_max = max(y_max, y)
                y_min = min(y_min, y)
    return cropped_image[y_min:y_max, x_min:x_max]
        

In [198]:
for i in range(1, 100):
    train_dir = os.path.join('pills',NDCs[i],'train')
    for file in os.listdir(train_dir):
        full_path = os.path.join(train_dir, file)
        print(full_path)
        image = cv2.imread(full_path)
        cropped_image = crop_train_img(image)
        os.remove(full_path)
        cv2.imwrite(full_path, cropped_image)
        #image = cv2.imread(os.path.join(train_dir))
        #cv2.imwrite(os.path.join(train_dir, file), crop_train_img())
        
#TEST_IMAGE_PATH = os.path.join('pills',NDCs[0],'train','20.PNG')
#image = cv2.imread(TEST_IMAGE_PATH)

pills/00002322830/train/31.PNG
pills/00002322830/train/30.PNG
pills/00002322830/train/35.PNG
pills/00002322830/train/34.PNG
pills/00002322830/train/36.PNG
pills/00002322830/train/29.PNG
pills/00002322830/train/32.PNG
pills/00002322830/train/33.PNG
pills/00002322930/train/23.PNG
pills/00002322930/train/19.PNG
pills/00002322930/train/24.PNG
pills/00002322930/train/25.PNG
pills/00002322930/train/26.PNG
pills/00002322930/train/22.PNG
pills/00002322930/train/21.PNG
pills/00002322930/train/20.PNG
pills/00002323560/train/23.PNG
pills/00002323560/train/19.PNG
pills/00002323560/train/24.PNG
pills/00002323560/train/25.PNG
pills/00002323560/train/26.PNG
pills/00002323560/train/22.PNG
pills/00002323560/train/21.PNG
pills/00002323560/train/20.PNG
pills/00002323830/train/23.PNG
pills/00002323830/train/19.PNG
pills/00002323830/train/24.PNG
pills/00002323830/train/25.PNG
pills/00002323830/train/26.PNG
pills/00002323830/train/22.PNG
pills/00002323830/train/21.PNG
pills/00002323830/train/20.PNG
pills/00

pills/00006022761/train/26.PNG
pills/00006022761/train/22.PNG
pills/00006022761/train/21.PNG
pills/00006022761/train/28.PNG
pills/00006022761/train/27.PNG
pills/00006027754/train/15.PNG
pills/00006027754/train/18.PNG
pills/00006027754/train/19.PNG
pills/00006027754/train/22.PNG
pills/00006027754/train/16.PNG
pills/00006027754/train/21.PNG
pills/00006027754/train/17.PNG
pills/00006027754/train/20.PNG
pills/00006046106/train/9.PNG
pills/00006046106/train/8.PNG
pills/00006046106/train/7.PNG
pills/00006046106/train/4.PNG
pills/00006046106/train/5.PNG
pills/00006046106/train/6.PNG
pills/00006046106/train/11.PNG
pills/00006046106/train/10.PNG
pills/00006046405/train/9.PNG
pills/00006046405/train/8.PNG
pills/00006046405/train/7.PNG
pills/00006046405/train/4.PNG
pills/00006046405/train/5.PNG
pills/00006046405/train/6.PNG
pills/00006046405/train/11.PNG
pills/00006046405/train/10.PNG
pills/00006057561/train/23.PNG
pills/00006057561/train/18.PNG
pills/00006057561/train/19.PNG
pills/00006057561/tr

pills/00007464113/train/26.PNG
pills/00007464113/train/22.PNG
pills/00007464113/train/21.PNG
pills/00007464113/train/28.PNG
pills/00007464113/train/27.PNG
pills/00007464213/train/23.PNG
pills/00007464213/train/19.PNG
pills/00007464213/train/24.PNG
pills/00007464213/train/25.PNG
pills/00007464213/train/26.PNG
pills/00007464213/train/22.PNG
pills/00007464213/train/21.PNG
pills/00007464213/train/20.PNG
pills/00007488213/train/23.PNG
pills/00007488213/train/24.PNG
pills/00007488213/train/25.PNG
pills/00007488213/train/26.PNG
pills/00007488213/train/22.PNG
pills/00007488213/train/21.PNG
pills/00007488213/train/28.PNG
pills/00007488213/train/27.PNG
pills/00007488313/train/23.PNG
pills/00007488313/train/24.PNG
pills/00007488313/train/25.PNG
pills/00007488313/train/26.PNG
pills/00007488313/train/22.PNG
pills/00007488313/train/21.PNG
pills/00007488313/train/28.PNG
pills/00007488313/train/27.PNG
pills/00007488561/train/23.PNG
pills/00007488561/train/24.PNG
pills/00007488561/train/25.PNG
pills/00

pills/00024420010/train/26.PNG
pills/00024420010/train/22.PNG
pills/00024420010/train/21.PNG
pills/00024420010/train/20.PNG


# Model training
Download TF Object Detection library, Fetch pretrained model from Tensorflow Zoo, Convert data to TFRecord, Train

In [6]:
CUSTOM_MODEL_NAME = 'pill-mill' 
PRETRAINED_MODEL_NAME = 'ssd_resnet152_v1_fpn_1024x1024_coco17_tpu-8'
PRETRAINED_MODEL_URL = 'http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_resnet152_v1_fpn_1024x1024_coco17_tpu-8.tar.gz'
TF_RECORD_SCRIPT_NAME = 'generate_tfrecord.py'
LABEL_MAP_NAME = 'label_map.pbtxt'

In [7]:
paths = {
    'WORKSPACE_PATH': os.path.join('Tensorflow', 'workspace'),
    'SCRIPTS_PATH': os.path.join('Tensorflow','scripts'),
    'APIMODEL_PATH': os.path.join('Tensorflow','models'),
    'ANNOTATION_PATH': os.path.join('Tensorflow', 'workspace','annotations'),
    'IMAGE_PATH': os.path.join('Tensorflow', 'workspace','images'),
    'MODEL_PATH': os.path.join('Tensorflow', 'workspace','models'),
    'PRETRAINED_MODEL_PATH': os.path.join('Tensorflow', 'workspace','pre-trained-models'),
    'CHECKPOINT_PATH': os.path.join('Tensorflow', 'workspace','models',CUSTOM_MODEL_NAME), 
    'OUTPUT_PATH': os.path.join('Tensorflow', 'workspace','models',CUSTOM_MODEL_NAME, 'export'), 
    'TFJS_PATH':os.path.join('Tensorflow', 'workspace','models',CUSTOM_MODEL_NAME, 'tfjsexport'), 
    'TFLITE_PATH':os.path.join('Tensorflow', 'workspace','models',CUSTOM_MODEL_NAME, 'tfliteexport'), 
    'PROTOC_PATH':os.path.join('Tensorflow','protoc')
 }

files = {
    'PIPELINE_CONFIG':os.path.join('Tensorflow', 'workspace','models', CUSTOM_MODEL_NAME, 'pipeline.config'),
    'TF_RECORD_SCRIPT': os.path.join(paths['SCRIPTS_PATH'], TF_RECORD_SCRIPT_NAME), 
    'LABELMAP': os.path.join(paths['ANNOTATION_PATH'], LABEL_MAP_NAME)
}

In [201]:
for path in paths.values():
    if not os.path.exists(path):
        !mkdir -p {path}


In [202]:
if not os.path.exists(os.path.join(paths['APIMODEL_PATH'], 'research', 'object_detection')):
    !git clone https://github.com/tensorflow/models {paths['APIMODEL_PATH']}

Cloning into 'Tensorflow/models'...
remote: Enumerating objects: 79335, done.[K
remote: Counting objects: 100% (34/34), done.[K
remote: Compressing objects: 100% (29/29), done.[K
remote: Total 79335 (delta 12), reused 15 (delta 4), pack-reused 79301[K
Receiving objects: 100% (79335/79335), 593.90 MiB | 14.98 MiB/s, done.
Resolving deltas: 100% (56468/56468), done.


In [203]:
!cd Tensorflow/models/research && protoc object_detection/protos/*.proto --python_out=. && cp object_detection/packages/tf2/setup.py . && python -m pip install . 

Processing /home/eddie/code/ml/pill-mill/Tensorflow/models/research
  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting avro-python3
  Downloading avro-python3-1.10.2.tar.gz (38 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting apache-beam
  Downloading apache_beam-2.43.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (14.0 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.0/14.0 MB[0m [31m26.7 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m[36m0:00:01[0m
Collecting Cython
  Using cached Cython-0.29.32-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (1.9 MB)
Collecting contextlib2
  Downloading contextlib2-21.6.0-py2.py3-none-any.whl (13 kB)
Collecting tf-slim
  Downloading tf_slim-1.1.0-py2.py3-none-any.whl (352 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m352.1/352.1 kB[0m [31m26.8 MB/s[0m eta [36m0:00:00[0m
Collecting pycocotools


Collecting dm-tree~=0.1.1
  Downloading dm_tree-0.1.7-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (142 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m142.6/142.6 kB[0m [31m14.6 MB/s[0m eta [36m0:00:00[0m
Collecting typeguard>=2.7
  Downloading typeguard-2.13.3-py3-none-any.whl (17 kB)
Collecting toml
  Downloading toml-0.10.2-py2.py3-none-any.whl (16 kB)
Collecting etils[epath]
  Downloading etils-0.9.0-py3-none-any.whl (140 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m140.1/140.1 kB[0m [31m11.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting promise
  Downloading promise-2.3.tar.gz (19 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting tensorflow-metadata
  Downloading tensorflow_metadata-1.11.0-py3-none-any.whl (52 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.3/52.3 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
Collecting googleapis-common-pro

  Building wheel for promise (setup.py) ... [?25ldone
[?25h  Created wheel for promise: filename=promise-2.3-py3-none-any.whl size=21486 sha256=9765ae0103697c9227f19bad91baff38f09a8312bfc4cecf5cb4fdb91b2d5fdd
  Stored in directory: /home/eddie/.cache/pip/wheels/54/4e/28/3ed0e1c8a752867445bab994d2340724928aa3ab059c57c8db
Successfully built object-detection avro-python3 pycocotools crcmod dill kaggle pyyaml seqeval docopt promise
Installing collected packages: text-unidecode, sentencepiece, py-cpuinfo, gin-config, docopt, dm-tree, crcmod, zstandard, zipp, uritemplate, typeguard, tqdm, toml, tf-slim, tensorflow-io-gcs-filesystem, tabulate, regex, pyyaml, python-slugify, pyparsing, pymongo, proto-plus, promise, portalocker, orjson, objsize, numpy, importlib_resources, immutabledict, googleapis-common-protos, fasteners, fastavro, etils, dill, Cython, contextlib2, colorama, cloudpickle, avro-python3, tensorflow-model-optimization, tensorflow-metadata, tensorflow_io, tensorflow-hub, sacrebl

In [209]:
VERIFICATION_SCRIPT = os.path.join(paths['APIMODEL_PATH'], 'research', 'object_detection', 'builders', 'model_builder_tf2_test.py')
# Verify Installation
!python {VERIFICATION_SCRIPT}

2022-12-05 09:23:20.630432: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-12-05 09:23:20.697054: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/eddie/code/ml/ml_env/lib/python3.10/site-packages/cv2/../../lib64:
2022-12-05 09:23:20.697067: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2022-12-05 09:23:20.712779: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has a

INFO:tensorflow:time(__main__.ModelBuilderTF2Test.test_create_faster_rcnn_models_from_config_faster_rcnn_with_matmul): 0.06s
I1205 09:23:24.444179 140122739644224 test_util.py:2460] time(__main__.ModelBuilderTF2Test.test_create_faster_rcnn_models_from_config_faster_rcnn_with_matmul): 0.06s
[       OK ] ModelBuilderTF2Test.test_create_faster_rcnn_models_from_config_faster_rcnn_with_matmul
[ RUN      ] ModelBuilderTF2Test.test_create_faster_rcnn_models_from_config_faster_rcnn_without_matmul
INFO:tensorflow:time(__main__.ModelBuilderTF2Test.test_create_faster_rcnn_models_from_config_faster_rcnn_without_matmul): 0.05s
I1205 09:23:24.498949 140122739644224 test_util.py:2460] time(__main__.ModelBuilderTF2Test.test_create_faster_rcnn_models_from_config_faster_rcnn_without_matmul): 0.05s
[       OK ] ModelBuilderTF2Test.test_create_faster_rcnn_models_from_config_faster_rcnn_without_matmul
[ RUN      ] ModelBuilderTF2Test.test_create_faster_rcnn_models_from_config_mask_rcnn_with_matmul
INFO:ten

I1205 09:23:26.885322 140122739644224 efficientnet_model.py:143] round_filter input=1280 output=1280
I1205 09:23:26.912090 140122739644224 efficientnet_model.py:453] Building model efficientnet with params ModelConfig(width_coefficient=1.0, depth_coefficient=1.1, resolution=240, dropout_rate=0.2, blocks=(BlockConfig(input_filters=32, output_filters=16, kernel_size=3, num_repeat=1, expand_ratio=1, strides=(1, 1), se_ratio=0.25, id_skip=True, fused_conv=False, conv_type='depthwise'), BlockConfig(input_filters=16, output_filters=24, kernel_size=3, num_repeat=2, expand_ratio=6, strides=(2, 2), se_ratio=0.25, id_skip=True, fused_conv=False, conv_type='depthwise'), BlockConfig(input_filters=24, output_filters=40, kernel_size=5, num_repeat=2, expand_ratio=6, strides=(2, 2), se_ratio=0.25, id_skip=True, fused_conv=False, conv_type='depthwise'), BlockConfig(input_filters=40, output_filters=80, kernel_size=3, num_repeat=3, expand_ratio=6, strides=(2, 2), se_ratio=0.25, id_skip=True, fused_conv=F

I1205 09:23:29.610125 140122739644224 ssd_efficientnet_bifpn_feature_extractor.py:150] EfficientDet EfficientNet backbone version: efficientnet-b4
I1205 09:23:29.610200 140122739644224 ssd_efficientnet_bifpn_feature_extractor.py:152] EfficientDet BiFPN num filters: 224
I1205 09:23:29.610237 140122739644224 ssd_efficientnet_bifpn_feature_extractor.py:153] EfficientDet BiFPN num iterations: 7
I1205 09:23:29.611130 140122739644224 efficientnet_model.py:143] round_filter input=32 output=48
I1205 09:23:29.621746 140122739644224 efficientnet_model.py:143] round_filter input=32 output=48
I1205 09:23:29.621804 140122739644224 efficientnet_model.py:143] round_filter input=16 output=24
I1205 09:23:29.699874 140122739644224 efficientnet_model.py:143] round_filter input=16 output=24
I1205 09:23:29.699946 140122739644224 efficientnet_model.py:143] round_filter input=24 output=32
I1205 09:23:29.886492 140122739644224 efficientnet_model.py:143] round_filter input=24 output=32
I1205 09:23:29.886567 14

I1205 09:23:33.593205 140122739644224 efficientnet_model.py:143] round_filter input=16 output=32
I1205 09:23:33.593282 140122739644224 efficientnet_model.py:143] round_filter input=24 output=40
I1205 09:23:33.888121 140122739644224 efficientnet_model.py:143] round_filter input=24 output=40
I1205 09:23:33.888201 140122739644224 efficientnet_model.py:143] round_filter input=40 output=72
I1205 09:23:34.186789 140122739644224 efficientnet_model.py:143] round_filter input=40 output=72
I1205 09:23:34.186864 140122739644224 efficientnet_model.py:143] round_filter input=80 output=144
I1205 09:23:34.588216 140122739644224 efficientnet_model.py:143] round_filter input=80 output=144
I1205 09:23:34.588291 140122739644224 efficientnet_model.py:143] round_filter input=112 output=200
I1205 09:23:35.100652 140122739644224 efficientnet_model.py:143] round_filter input=112 output=200
I1205 09:23:35.100728 140122739644224 efficientnet_model.py:143] round_filter input=192 output=344
I1205 09:23:35.714313 

In [5]:
import object_detection

In [213]:
!wget {PRETRAINED_MODEL_URL}
!mv {PRETRAINED_MODEL_NAME+'.tar.gz'} {paths['PRETRAINED_MODEL_PATH']}
!cd {paths['PRETRAINED_MODEL_PATH']} && tar -zxvf {PRETRAINED_MODEL_NAME+'.tar.gz'}

--2022-12-05 09:45:47--  http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_resnet152_v1_fpn_1024x1024_coco17_tpu-8.tar.gz
Resolving download.tensorflow.org (download.tensorflow.org)... 2607:f8b0:4009:801::2010, 142.250.191.240
Connecting to download.tensorflow.org (download.tensorflow.org)|2607:f8b0:4009:801::2010|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 504180168 (481M) [application/x-tar]
Saving to: ‘ssd_resnet152_v1_fpn_1024x1024_coco17_tpu-8.tar.gz’


2022-12-05 09:46:12 (24.3 MB/s) - ‘ssd_resnet152_v1_fpn_1024x1024_coco17_tpu-8.tar.gz’ saved [504180168/504180168]

ssd_resnet152_v1_fpn_1024x1024_coco17_tpu-8/
ssd_resnet152_v1_fpn_1024x1024_coco17_tpu-8/checkpoint/
ssd_resnet152_v1_fpn_1024x1024_coco17_tpu-8/checkpoint/ckpt-0.data-00000-of-00001
ssd_resnet152_v1_fpn_1024x1024_coco17_tpu-8/checkpoint/checkpoint
ssd_resnet152_v1_fpn_1024x1024_coco17_tpu-8/checkpoint/ckpt-0.index
ssd_resnet152_v1_fpn_1024x1024_coco17_tpu-8/pipeline.

In [14]:
labels = []
for i in range(100):
    labels.append({'name': NDCs[i], 'id': i + 1})

with open(files['LABELMAP'], 'w') as f:
    for label in labels:
        f.write('item { \n')
        f.write('\tname:\'{}\'\n'.format(label['name']))
        f.write('\tid:{}\n'.format(label['id']))
        f.write('}\n')


In [19]:
import tensorflow.compat.v1 as tf

import sys
import os
def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def _bytes_feature(value):
    if isinstance(value, type(tf.constant(0))):
        value = value.numpy() # BytesList won't unpack a string from an EagerTensor.
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def convert_to_example(image_string, label, filename):
    file_ext = filename.split('.')[1].lower()
    if file_ext == "png":
        image_shape = tf.io.decode_png(image_string).shape
    elif file_ext == "jpg":
        image_shape = tf.io.decode_jpeg(image_string).shape
    feature = {
        'height': _int64_feature(image_shape[0]),
        'width': _int64_feature(image_shape[1]),
        'depth': _int64_feature(image_shape[2]),
        'label': _int64_feature(label["id"]),
        'image_raw': _bytes_feature(image_string),
    }

    return tf.train.Example(features=tf.train.Features(feature=feature))

train_writer = tf.python_io.TFRecordWriter(
        os.path.join(paths['ANNOTATION_PATH'], 'train.record'))
test_writer = tf.python_io.TFRecordWriter(
        os.path.join(paths['ANNOTATION_PATH'], 'test.record'))

for i in range(100):
    ndc = NDCs[i]
    label = labels[i]
    train_dir = os.path.join('pills', ndc,'train')
    test_dir = os.path.join('pills', ndc, 'test')
    for file in os.listdir(train_dir):
        full_path = os.path.join(train_dir, file)
        image = open(full_path, 'rb').read()
        tf_example = convert_to_example(image, label, full_path)
        train_writer.write(tf_example.SerializeToString())
    for file in os.listdir(test_dir):
        full_path = os.path.join(test_dir, file)
        image = open(full_path, 'rb').read()
        tf_example = convert_to_example(image, label, full_path)
        test_writer.write(tf_example.SerializeToString())


test_writer.close()
train_writer.close()

In [228]:
!cp {os.path.join(paths['PRETRAINED_MODEL_PATH'], PRETRAINED_MODEL_NAME, 'pipeline.config')} {os.path.join(paths['CHECKPOINT_PATH'])}

In [17]:
import tensorflow as tf
from object_detection.utils import config_util
from object_detection.protos import pipeline_pb2
from google.protobuf import text_format

config = config_util.get_configs_from_pipeline_file(files['PIPELINE_CONFIG'])
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
with tf.io.gfile.GFile(files['PIPELINE_CONFIG'], "r") as f:                                                                                                                                                                                                                     
    proto_str = f.read()                                                                                                                                                                                                                                          
    text_format.Merge(proto_str, pipeline_config)  

pipeline_config.model.ssd.num_classes = len(labels)
pipeline_config.train_config.batch_size = 4
pipeline_config.train_config.fine_tune_checkpoint = os.path.join(paths['PRETRAINED_MODEL_PATH'], PRETRAINED_MODEL_NAME, 'checkpoint', 'ckpt-0')
pipeline_config.train_config.fine_tune_checkpoint_type = "detection"
pipeline_config.train_input_reader.label_map_path= files['LABELMAP']
pipeline_config.train_input_reader.tf_record_input_reader.input_path[:] = [os.path.join(paths['ANNOTATION_PATH'], 'train.record')]
pipeline_config.eval_input_reader[0].label_map_path = files['LABELMAP']
pipeline_config.eval_input_reader[0].tf_record_input_reader.input_path[:] = [os.path.join(paths['ANNOTATION_PATH'], 'test.record')]

config_text = text_format.MessageToString(pipeline_config)                                                                                                                                                                                                        
with tf.io.gfile.GFile(files['PIPELINE_CONFIG'], "wb") as f:                                                                                                                                                                                                                     
    f.write(config_text)   

In [233]:
TRAINING_SCRIPT = os.path.join(paths['APIMODEL_PATH'], 'research', 'object_detection', 'model_main_tf2.py')
command = "python {} --model_dir={} --pipeline_config_path={} --num_train_steps=2000".format(TRAINING_SCRIPT, paths['CHECKPOINT_PATH'],files['PIPELINE_CONFIG'])
print(command)

python Tensorflow/models/research/object_detection/model_main_tf2.py --model_dir=Tensorflow/workspace/models/pill-mill --pipeline_config_path=Tensorflow/workspace/models/pill-mill/pipeline.config --num_train_steps=2000


In [None]:
!{command}