In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
import tensorflow as tf
print(tf.__version__)
import json
import pycocotools

2.11.0


In [2]:
# Define the path to the annotations file
annotations_file = 'annotation/ADT_1_0.txt'

# Define the path to the image directory
image_dir = 'annotation/ADT_1_0.png'

# Define the categories
categories = [
    {'id': 0, 'name': 'nucleus'},
]

# Define the output file path
output_file = 'annotation/ADT_1_0.json'

# Initialize the annotations dictionary
annotations = {
    'images': [],
    'annotations': [],
    'categories': categories
}

# Read the annotations file
with open(annotations_file, 'r') as f:
    lines = f.readlines()

# Process each line of the annotations file
for line in lines:
    # Parse the line to extract the image file name, bounding box coordinates, and category label
    category, x1, y1, x2, y2, = line.strip().split()
    
    # Add the image information to the annotations dictionary
    image_id = len(annotations['images']) + 1
    image_info = {
        'id': image_id,
        #'file_name': image_file,
        'width': 0,  # Set to the actual width of the image
        'height': 0  # Set to the actual height of the image
    }
    annotations['images'].append(image_info)
    
    # Add the annotation information to the annotations dictionary
    annotation_id = len(annotations['annotations']) + 1
    annotation_info = {
        'id': annotation_id,
        'image_id': image_id,
        'category_id': int(category),
        'bbox': [float(x1), float(y1), float(x2) - float(x1), float(y2) - float(y1)],
        'area': (float(x2) - float(x1)) * (float(y2) - float(y1)),
        'iscrowd': 0
    }
    annotations['annotations'].append(annotation_info)

# Write the annotations to the output file
with open(output_file, 'w') as f:
    json.dump(annotations, f)


In [3]:
# Define the path to the annotations file
annotations_file = 'annotation/ADT_1_0.json'

# Define the path to the image directory
image_dir = '/path/to/images/'
image_dir = 'annotation/ADT_1_0.png'

# Define the batch size
batch_size = 8

# Define the image size
image_size = (256, 256)

# Define the preprocessing function
def preprocess(image, labels):
    # Resize the image
    image = tf.image.resize(image, image_size)
    
    # Normalize the image
    image = tf.keras.applications.resnet50.preprocess_input(image)
    
    # Return the preprocessed image and labels
    return image, labels

# Define the dataset
dataset = tf.data.Dataset.from_generator(
    lambda: pycocotools.coco.COCO(annotations_file).imgs.values(),
    output_types=(tf.string, tf.float32, tf.float32),
    output_shapes=(tf.TensorShape([]), tf.TensorShape([None, 4]), tf.TensorShape([None, None])),
)
dataset = dataset.map(
    lambda image_file, bbox, masks: (tf.io.read_file(image_dir + image_file), bbox, masks),
    num_parallel_calls=tf.data.AUTOTUNE,
)
dataset = dataset.map(
    lambda image, bbox, masks: (tf.image.decode_jpeg(image), bbox, masks),
    num_parallel_calls=tf.data.AUTOTUNE,
)
dataset = dataset.map(
    lambda image, bbox, masks: preprocess(image, (bbox, masks)),
    num_parallel_calls=tf.data.AUTOTUNE,
)
dataset = dataset.batch(batch_size)

Metal device set to: Apple M2

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


2023-02-19 16:46:34.555342: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-02-19 16:46:34.555365: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [5]:
class RoILayer(tf.keras.layers.Layer):
    def __init__(self, pool_size, **kwargs):
        self.pool_size = pool_size
        super(RoILayer, self).__init__(**kwargs)

    def call(self, inputs):
        feature_map, rois = inputs

        # Get the shape of the feature map
        feature_map_shape = tf.shape(feature_map)
        feature_map_height = feature_map_shape[1]
        feature_map_width = feature_map_shape[2]

        # Normalize the RoIs to be in the range [0, 1]
        normalized_rois = tf.cast(rois, tf.float32) / tf.constant([feature_map_height, feature_map_width, feature_map_height, feature_map_width])

        # Crop and resize the feature map for each RoI
        cropped_rois = tf.image.crop_and_resize(feature_map, normalized_rois, tf.zeros([tf.shape(rois)[0]], dtype=tf.int32), [self.pool_size, self.pool_size])

        return cropped_rois

    def get_config(self):
        config = super(RoILayer, self).get_config()
        config.update({'pool_size': self.pool_size})
        return config

In [10]:
# Define the backbone network
backbone = tf.keras.applications.ResNet50(include_top=False, weights='imagenet')

# Define the region proposal network
rpn_conv = tf.keras.layers.Conv2D(512, (3, 3), activation='relu', padding='same', name='rpn_conv')
rpn_class = tf.keras.layers.Conv2D(2, (1, 1), activation='sigmoid', name='rpn_class')
rpn_bbox = tf.keras.layers.Conv2D(4, (1, 1), activation='linear', name='rpn_bbox')

def rpn(images):
    x = rpn_conv(images)
    class_logits = rpn_class(x)
    bbox_pred = rpn_bbox(x)
    # Generate proposals
    proposals = []
    for i in range(images.shape[0]):
        # Convert class probabilities to objectness scores
        objectness = tf.squeeze(class_logits[i, :, :, 1])
        
        # Generate bounding box proposals using non-maximum suppression
        proposals_i = tf.image.combined_non_max_suppression(
            tf.expand_dims(bbox_pred[i], axis=0), 
            tf.expand_dims(objectness, axis=0), 
            max_output_size_per_class=100, 
            max_total_size=100, 
            iou_threshold=0.5, 
            score_threshold=0.5
        )
        
        # Append proposals to the list
        proposals.append(proposals_i)
        
    return proposals, class_logits, bbox_pred

# Define the mask prediction network
mask_conv1 = tf.keras.layers.Conv2D(256, (3, 3), activation='relu', padding='same', name='mask_conv1')
mask_conv2 = tf.keras.layers.Conv2D(256, (3, 3), activation='relu', padding='same', name='mask_conv2')
mask_conv3 = tf.keras.layers.Conv2D(256, (3, 3), activation='relu', padding='same', name='mask_conv3')
mask_conv4 = tf.keras.layers.Conv2D(256, (3, 3), activation='relu', padding='same', name='mask_conv4')
mask_deconv = tf.keras.layers.Conv2DTranspose(256, (2, 2), strides=(2, 2), activation='relu', name='mask_deconv')
mask_logits = tf.keras.layers.Conv2D(1, (1, 1), activation='sigmoid', name='mask_logits')

def mask_net(images, proposals):
    x = tf.image.crop_and_resize(images, proposals, box_indices=tf.range(tf.shape(images)[0]), crop_size=(14, 14))
    x = mask_conv1(x)
    x = mask_conv2(x)
    x = mask_conv3(x)
    x = mask_conv4(x)
    x = mask_deconv(x)
    mask_logits = mask_logits(x)
    return mask_logits

# Define the loss function
def mask_rcnn_loss(y_true, y_pred):
    # Compute the binary cross-entropy loss for object detection
    rpn_class_loss = tf.keras.losses.binary_crossentropy(y_true['rpn_class'], y_pred['rpn_class'])
    
    # Compute the smooth L1 loss for bounding box regression
    rpn_bbox_loss = tf.keras.losses.smooth_l1(y_true['rpn_bbox'], y_pred['rpn_bbox'])
    
    # Compute the binary cross-entropy loss for mask prediction
    mask_loss = tf.keras.losses.binary_crossentropy(y_true['masks'], y_pred['masks'])
    
    # Combine the losses and return the total loss
    total_loss = rpn_class_loss + rpn_bbox_loss + mask_loss
    return total_loss

# Define the number of classes (including the background class)
num_classes = 2

# Define the input image size (assuming square images)
input_shape = (256, 256, 3)

# Define the model
inputs = tf.keras.Input(shape=input_shape)
print(inputs.shape)
proposals, rpn_class_logits, rpn_bbox = rpn(inputs)
classes, boxes = backbone(inputs)
mask_logits = mask_net(inputs, proposals)

model = tf.keras.Model(inputs=inputs, outputs=[rpn_class_logits, rpn_bbox, classes, boxes, mask_logits])
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4), loss=mask_rcnn_loss)

# Train the model
model.fit(dataset, epochs=10)


(None, 256, 256, 3)


TypeError: 'NoneType' object cannot be interpreted as an integer