In [1]:
!pip install tensorflow-datasets

Collecting tensorflow-datasets
  Downloading tensorflow_datasets-4.8.3-py3-none-any.whl (5.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.4/5.4 MB[0m [31m48.8 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting toml
  Downloading toml-0.10.2-py2.py3-none-any.whl (16 kB)
Collecting tensorflow-metadata
  Downloading tensorflow_metadata-1.12.0-py3-none-any.whl (52 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.3/52.3 kB[0m [31m14.2 MB/s[0m eta [36m0:00:00[0m
Collecting googleapis-common-protos<2,>=1.52.0
  Downloading googleapis_common_protos-1.58.0-py2.py3-none-any.whl (223 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m223.0/223.0 kB[0m [31m45.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: toml, googleapis-common-protos, tensorflow-metadata, tensorflow-datasets
Successfully installed googleapis-common-protos-1.58.0 tensorflow-datasets-4.8.3 tensorflow-metadata-1.12.0 toml-0.10

In [2]:
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers.experimental.preprocessing import Normalization
import numpy as np
import matplotlib.pyplot as plt

In [7]:
# Load COCO 2017 dataset with annotations
dataset, info = tfds.load('coco/2017', with_info=True, data_dir='/notebooks/tensorflow_datasets')
num_classes = 80 # number of classes in COCO dataset



In [12]:
def create_datasets_from_coco(dataset, transform_image, num_classes):
    def encode_coco_categories(coco_categories, num_classes):
        return tf.reduce_max(tf.one_hot(coco_categories, num_classes), axis=0)

    def set_labels_as_zeros():
        return tf.zeros(num_classes, tf.float32)

    def preprocess(example, transform_image):
        image = example['image']
        image = transform_image(image)

        coco_categories = example['objects']['label']
        is_empty = tf.equal(tf.size(coco_categories), 0)
        labels = tf.cond(is_empty, set_labels_as_zeros, lambda: encode_coco_categories(coco_categories, num_classes))
        return image, labels

    batch_size = 32

    train_ds = dataset['train'].map(lambda x: preprocess(x, transform_image)).batch(batch_size).prefetch(tf.data.AUTOTUNE)
    val_ds = dataset['validation'].map(lambda x: preprocess(x, transform_image)).batch(batch_size).prefetch(tf.data.AUTOTUNE)
    # see a picture and its label
    # for image, label in train_ds.take(1):
    #     image_number = 3
    #     plt.imshow(image[image_number])
    #     # print(image[image_number][0])
    #     plt.show()
    #     print(label[image_number])
    return train_ds, val_ds

## Create your own model

In [15]:
def transform_image(image):
    image = tf.image.resize(image, (227, 227))
    image = tf.cast(image, tf.float32) / 127.5 - 1.0
    mean = [0.485, 0.456, 0.406]
    variance = [0.229, 0.224, 0.225]
    normalization_layer = Normalization(mean=mean, variance=variance)
    image = normalization_layer(image)
    return image

train_ds, val_ds = create_datasets_from_coco(dataset, transform_image, num_classes)

input_shape = (227, 227, 3)

model = Sequential()
# Convolutional layers
model.add(Conv2D(96, (11, 11), strides=(4, 4), activation="relu", input_shape=input_shape, padding="same", kernel_initializer="he_normal"))
model.add(BatchNormalization())
model.add(MaxPooling2D((3, 3), strides=(2, 2)))
model.add(Conv2D(256, (5, 5), activation="relu", padding="same", kernel_initializer="he_normal"))
model.add(BatchNormalization())
model.add(MaxPooling2D((3, 3), strides=(2, 2)))
model.add(Conv2D(384, (3, 3), activation="relu", padding="same", kernel_initializer="he_normal"))
model.add(BatchNormalization())
model.add(Conv2D(384, (3, 3), activation="relu", padding="same", kernel_initializer="he_normal"))
model.add(BatchNormalization())
model.add(Conv2D(256, (3, 3), activation="relu", padding="same", kernel_initializer="he_normal"))
model.add(BatchNormalization())
model.add(MaxPooling2D((3, 3), strides=(2, 2)))

# Fully connected layers
model.add(Flatten())
model.add(Dense(4096, activation="relu", kernel_initializer="glorot_normal"))
model.add(Dropout(0.5))
model.add(Dense(4096, activation="relu", kernel_initializer="glorot_normal"))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation="sigmoid"))

# Compile the model
optimizer = tf.keras.optimizers.Adam()
loss = tf.keras.losses.BinaryCrossentropy()
model.compile(optimizer=optimizer, loss=loss, metrics='accuracy')

## Or load pretrained archtecture and change the last layer

In [13]:
def transform_image(image):
    image = tf.image.resize(image, (224, 224))
    image = tf.keras.applications.vgg16.preprocess_input(image)
    return image
    
train_ds, val_ds = create_datasets_from_coco(dataset, transform_image, num_classes)

# Load the VGG16 model without the top layers
base_model = tf.keras.applications.VGG16(weights=None, include_top=False, input_shape=(224, 224, 3))

# Add top layers for multi-label classification
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)
predictions = Dense(num_classes, activation='sigmoid')(x)

# Create the final model
model = tf.keras.Model(inputs=base_model.input, outputs=predictions)

# Print the layer names
print(*(layer.name for layer in model.layers), sep=' -> ')

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

input_1 -> block1_conv1 -> block1_conv2 -> block1_pool -> block2_conv1 -> block2_conv2 -> block2_pool -> block3_conv1 -> block3_conv2 -> block3_conv3 -> block3_pool -> block4_conv1 -> block4_conv2 -> block4_conv3 -> block4_pool -> block5_conv1 -> block5_conv2 -> block5_conv3 -> block5_pool -> global_average_pooling2d -> dropout -> dense


## Train and save

In [16]:
model.fit(train_ds, epochs=10, validation_data=val_ds)

model.save('model.h5')

Epoch 1/10
  34/3697 [..............................] - ETA: 2:57 - loss: 0.5962 - accuracy: 0.2656

KeyboardInterrupt: 