[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/joconnor-ml/osm-ai-tools/blob/master/examples/chimneys/tag_classification.ipynb)

In [None]:
#@title Authenticate, Import, Download Data

from google.colab import auth
auth.authenticate_user()

!pip install -q fsspec gcsfs

import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import os
import math

# This is the data generated by `data_pipeline.sh` locally and pushed as-is to GCS.
# You'll have to replace this with your own GCS bucket

!mkdir data
!gsutil -m rsync -rd gs://osm-object-detector/data/chimneys ./data
!mkdir pretrained_models
!gsutil -m rsync -rd gs://osm-object-detector/pretrained_models ./pretrained_models

In [None]:
patches = pd.read_csv("data/bboxes.csv")
patches.head()

In [None]:
def get_base_dataset(image_dir, patches):
    image_patches = []
    patch_ids = []
    for image_id, grp in patches.groupby("image_id"):
        image_patches.append(grp[["y_min", "x_min", "y_max", "x_max"]].values)
        patch_ids.append(grp["osm_id"].values)

    def patch_gen():
        for coords in image_patches:
            yield coords

    def patch_id_gen():
        for i in patch_ids:
            yield i

    filename_dataset = tf.data.Dataset.from_tensor_slices(image_dir + "/" + patches["image_id"].unique() + ".png")
    images = filename_dataset.map(lambda x: tf.io.decode_png(tf.io.read_file(x)))
    bboxes = tf.data.Dataset.from_generator(patch_gen, output_types=tf.float32)
    bbox_ids = tf.data.Dataset.from_generator(patch_id_gen, output_types=tf.int32)
    return tf.data.Dataset.zip((images, bboxes, bbox_ids))

images_and_bboxes = get_base_dataset('data/images', patches)
for img, bbox, bbox_id in images_and_bboxes.take(1):
    break
plt.imshow(img)
bbox.numpy(), bbox_id.numpy()

In [None]:
# for balancing positives and negatives:
bboxes_per_image = patches.shape[0] / patches["image_id"].nunique()
bboxes_per_image

In [None]:
IMAGE_SIZE = 224

def get_final_dataset(images_and_bboxes):
    # generate positives -- grab crops for each bbox
    def sample_positives(img, bboxes, bbox_ids):
        crops = tf.image.crop_and_resize(
            tf.expand_dims(img, axis=0), bboxes, box_indices=tf.zeros_like(bboxes[:, 0], dtype=tf.int32),
            crop_size=[IMAGE_SIZE, IMAGE_SIZE], method='bilinear',
            extrapolation_value=127, name=None
        )
        return tf.data.Dataset.zip((
            tf.data.Dataset.from_tensor_slices(crops),
            tf.data.Dataset.from_tensor_slices(bbox_ids),
            tf.data.Dataset.from_tensor_slices([1]).repeat(-1),
        ))

    # use random crops for "negatives" -- as long as image size >> object size, this should be OK
    def sample_negatives(img, boxes, cls):
        return {"image": tf.cast(tf.image.random_crop(img, size=[IMAGE_SIZE, IMAGE_SIZE, 3]), np.float32), "bbox_id": -1, "label": 0}

    positives = images_and_bboxes.flat_map(sample_positives).map(lambda img, box_id, cls: {"image": img, "bbox_id": box_id, "label": cls})
    # use `repeat` to balance the data
    negatives = images_and_bboxes.repeat(round(bboxes_per_image)).map(sample_negatives)
    final_dataset = tf.data.experimental.sample_from_datasets([positives, negatives])
    return final_dataset

In [None]:
final_dataset = get_final_dataset(images_and_bboxes)
for row in final_dataset.take(3):
    plt.imshow((row["image"].numpy()).astype(np.uint8))
    plt.title(f"{row['bbox_id']}, {row['label']}")
    plt.show()


In [None]:
# get size of dataset -- since we changed the number of rows dynamically we have to count them in full
# TODO: we probably can work it out and avoid this slowness
for i, _ in enumerate(final_dataset.take(-1)):
    pass
num_samples = i+1
num_samples

In [None]:
BATCH_SIZE = 128
half_the_data = int(num_samples/2)
train_ds = final_dataset.take(half_the_data)
val_ds = final_dataset.skip(half_the_data)

In [None]:
def to_tuple(row):
    return row["image"], row["label"]

def get_model():
    module = tf.keras.models.load_model(os.path.join("pretrained_models", "resisc_224px_rgb_resnet50"))
    module.trainable = True
    module.summary()

    images = tf.keras.layers.Input((IMAGE_SIZE, IMAGE_SIZE, 3))
    data_augmentation = tf.keras.Sequential([
        tf.keras.layers.experimental.preprocessing.RandomFlip("horizontal_and_vertical"),
        tf.keras.layers.experimental.preprocessing.RandomContrast(0.1),
        tf.keras.layers.experimental.preprocessing.Resizing(256,256),
        tf.keras.layers.experimental.preprocessing.RandomTranslation(0.2, 0.2),
        tf.keras.layers.experimental.preprocessing.RandomRotation(2*math.pi),
        tf.keras.layers.experimental.preprocessing.RandomZoom(0.25),
        tf.keras.layers.experimental.preprocessing.CenterCrop(224,224),
    ])
    features = module(data_augmentation(images))
    features = tf.keras.layers.GlobalAveragePooling2D()(features)
    features = tf.keras.layers.Dropout(0.5)(features)
    output = tf.keras.layers.Dense(1, activation="sigmoid")(features)
    model = tf.keras.Model(inputs=images, outputs=output)

    lr = 0.003 * BATCH_SIZE / 512

    # Decay learning rate by a factor of 10 at SCHEDULE_BOUNDARIES.
    lr_schedule = tf.keras.optimizers.schedules.PiecewiseConstantDecay(
        boundaries=[int(50*BATCH_SIZE/512), int(75*BATCH_SIZE/512), int(100*BATCH_SIZE/512)],
        values=[lr, lr*0.1, lr*0.001, lr*0.0001]
    )
    optimizer = tf.keras.optimizers.SGD(learning_rate=lr_schedule, momentum=0.9)

    model.compile(
      optimizer=optimizer,
      # use label smoothing since we know quite a few labels will be wrong
      loss=tf.keras.losses.BinaryCrossentropy(label_smoothing=0.01),
      metrics=['acc']
    )
    return model


In [None]:
model = get_model()
model.fit(train_ds.map(to_tuple).shuffle(500).batch(BATCH_SIZE).prefetch(2), validation_data=val_ds.map(to_tuple).batch(BATCH_SIZE).prefetch(2), epochs=10)

In [None]:
pred_df = []
for row in val_ds.take(-1):
  if row["label"].numpy() == 0:
    continue
  pred = model.predict(tf.expand_dims(row["image"], axis=0)).item()
  pred_df.append({"pred": pred, "label": row["label"].numpy(), "osm_id": row["bbox_id"].numpy()})
pred_df = pd.DataFrame(pred_df)

In [None]:
def plot_one_object(object_id):
    filename = patches.loc[patches.osm_id==object_id, "image_id"].iloc[0]
    img = tf.io.decode_png(tf.io.read_file(f"data/images/{filename}.png"))
    bboxes = patches.loc[patches.osm_id==object_id, ["y_min", "x_min", "y_max", "x_max"]].values
    crops = tf.image.crop_and_resize(
        tf.expand_dims(img, axis=0), bboxes, box_indices=tf.zeros_like(bboxes[:, 0], dtype=tf.int32),
        crop_size=[IMAGE_SIZE, IMAGE_SIZE], method='bilinear',
        extrapolation_value=0, name=None
    )
    plt.imshow((crops[0].numpy()).astype(np.uint8))

In [None]:
#@title Plot 10 images the model disagrees with: if we succeeded, this should be primarily mistagged OSM data
for i, row in pred_df.nsmallest(10, "pred").iterrows():
  plot_one_object(row.osm_id)
  plt.title(f"{row.pred:.3f}, {row.label}, {row.osm_id}")
  plt.show()

In [None]:
#@title Complete the cross-val loop: train on the second half of objects, predict on the first
model = get_model()
model.fit(val_ds.map(to_tuple).shuffle(500).batch(BATCH_SIZE).prefetch(2), validation_data=train_ds.map(to_tuple).batch(BATCH_SIZE).prefetch(2), epochs=10)

In [None]:
pred_df2 = []
for row in train_ds.take(-1):
  if row["label"].numpy() == 0:
    continue
  pred = model.predict(tf.expand_dims(row["image"], axis=0)).item()
  pred_df2.append({"pred": pred, "label": row["label"].numpy(), "osm_id": row["bbox_id"].numpy()})
pred_df2 = pd.DataFrame(pred_df2)

In [None]:
df = pd.concat([
  pred_df,
  pred_df2
])
df["mislabel_score"] = 1 - df["pred"]
df.to_csv("gs://osm-object-detector/chimneys/mislabel_scores.csv")

In [None]:
#@title Hand-label surprising objects

!pip install -q pigeon-jupyter

from pigeon import annotate

def plot_and_show(x):
    plot_one_object(x)
    plt.show()

annotations = annotate(
  df.query("pred<0.5").osm_id,
  options=['correct', 'mistagged'],
  display_fn=plot_and_show
)

In [None]:
annotations = pd.DataFrame(annotations, columns=["osm_id", "status"])
annotations.to_csv("gs://osm-object-detector/chimneys/hand_labels.csv")

In [None]:
#@title Prune known mislabels and recreate dataset
patches = patches[patches.osm_id.map(annotations.set_index("osm_id")["status"])!="mistagged"]
images_and_bboxes = get_base_dataset('data/images', patches)
final_dataset = get_final_dataset(images_and_bboxes)

# get size of dataset -- since we changed the number of rows dynamically we have to count them in full
for i, _ in enumerate(final_dataset.take(-1)):
    pass
num_samples = i+1
num_train_samples = int(num_samples*0.9)
train_ds = final_dataset.take(num_train_samples)
val_ds = final_dataset.skip(num_train_samples)

In [None]:
#@title Train final model
model = get_model()
model.fit(train_ds.map(to_tuple).shuffle(500).batch(BATCH_SIZE).prefetch(2), validation_data=val_ds.map(to_tuple).batch(BATCH_SIZE).prefetch(2), epochs=10)

In [None]:
model.save("gs://osm-object-detector/chimneys/model")

In [None]:
#@title Bonus: Rudimentary object detection
for img, _ in images_and_bboxes.skip(patches["image_id"].nunique()-20).take(10):
    images = []
    fig, ax = plt.subplots(1+(1280//IMAGE_SIZE),1+(1280//IMAGE_SIZE),figsize=(20,20), sharex=True, sharey=True)
    fig.tight_layout()

    for i, x in enumerate(range(0, 1260, IMAGE_SIZE)):
        for j, y in enumerate(range(0, 1260, IMAGE_SIZE)):
            image = np.zeros((IMAGE_SIZE, IMAGE_SIZE, 3))
            new_image = img.numpy()[y:y+IMAGE_SIZE, x:x+IMAGE_SIZE, :]
            image[:new_image.shape[0], :new_image.shape[1], :] = new_image
            pred = model.predict(tf.expand_dims(image, axis=0))[0][0]
            if pred < 0.25:
              image *= 0.25  # darken panels with no detections for emphasis
            else:
              image *= pred
            ax[j][i].imshow(image.astype(np.uint8))
            ax[j][i].set_title(pred)
    plt.show()