In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt

In [None]:
# Check if GPU is available
print(tf.test.is_built_with_cuda())
print(tf.config.list_physical_devices("GPU"))

In [None]:
def draw_images(images):
    images_count = images.shape[0]
    fig, axs = plt.subplots(1, images_count, figsize=(images_count * 2, 2))
    for i, ax in enumerate(axs.flat):
        ax.axis("off")
        ax.imshow(images[i], cmap="gray")
    plt.tight_layout()
    plt.show()

In [None]:
def generate_random_circles(count, min_radius, max_radius, min_x, max_x, min_y, max_y):
    x = tf.random.uniform([count], minval=min_x, maxval=max_x, dtype=tf.float32)
    y = tf.random.uniform([count], minval=min_y, maxval=max_y, dtype=tf.float32)
    radius = tf.random.uniform(
        [count], minval=min_radius, maxval=max_radius, dtype=tf.float32
    )
    return tf.stack([x, y, radius], axis=-1)


generate_random_circles(
    count=2, min_radius=1, max_radius=10, min_x=-100, max_x=100, min_y=-100, max_y=100
)

In [None]:
def generate_circle_images(circles, width, height, offset_x, offset_y):
    x, y = tf.meshgrid(
        tf.linspace(0, width - 1, width), tf.linspace(0, height - 1, height)
    )
    x = tf.cast(x + offset_x, dtype=tf.float32)
    y = tf.cast(y + offset_y, dtype=tf.float32)
    x = tf.tile(x[tf.newaxis, ...], [circles.shape[0], 1, 1])
    y = tf.tile(y[tf.newaxis, ...], [circles.shape[0], 1, 1])
    circles = tf.tile(circles[:, tf.newaxis, tf.newaxis, :], [1, width, height, 1])
    distance = tf.sqrt(tf.square(x - circles[..., 0]) + tf.square(y - circles[..., 1]))
    return tf.cast(distance <= circles[..., 2], dtype=tf.float32)


circles = generate_random_circles(
    count=15, min_radius=1, max_radius=100, min_x=-100, max_x=100, min_y=-100, max_y=100
)
images = generate_circle_images(
    circles, width=200, height=200, offset_x=-100, offset_y=-100
)
draw_images(images)

In [None]:
def generate_data(count, image_width, image_height, offset_x, offset_y):
    circles = generate_random_circles(
        count=count,
        min_radius=min(image_width, image_height) * 0.1,
        max_radius=min(image_width, image_height) * 0.6,
        min_x=offset_x,
        max_x=offset_x + image_width,
        min_y=offset_y,
        max_y=offset_y + image_height,
    )
    images = generate_circle_images(
        circles,
        width=image_width,
        height=image_height,
        offset_x=offset_x,
        offset_y=offset_y,
    )

    latent = tf.tile(
        circles[:, tf.newaxis, tf.newaxis, :], [1, image_width, image_height, 1]
    )
    x, y = tf.meshgrid(
        tf.cast(
            tf.linspace(0, image_width - 1, image_width) + offset_x, dtype=tf.float32
        ),
        tf.cast(
            tf.linspace(0, image_height - 1, image_height) + offset_y, dtype=tf.float32
        ),
    )
    x = tf.expand_dims(tf.tile(x[tf.newaxis, ...], [circles.shape[0], 1, 1]), axis=-1)
    y = tf.expand_dims(tf.tile(y[tf.newaxis, ...], [circles.shape[0], 1, 1]), axis=-1)

    inputs = tf.concat([x, y, latent], axis=-1)

    outputs = tf.expand_dims(images, axis=-1)

    return inputs, outputs


inputs, outputs = generate_data(
    count=20, image_width=200, image_height=200, offset_x=1000, offset_y=-500
)
print(inputs.shape)
print(outputs.shape)
draw_images(outputs)

In [None]:
def create_positional_decoder(
    position_dimensions,
    latent_dimensions,
    output_dimensions,
    hidden_layers_count,
    hidden_layers_units,
):
    inputs_layer = tf.keras.layers.Input(
        shape=(
            None,
            None,
            position_dimensions + latent_dimensions,
        )
    )
    hidden_layer = inputs_layer
    for i in range(hidden_layers_count):
        hidden_layer = tf.keras.layers.Dense(
            units=hidden_layers_units, activation=tf.keras.activations.relu
        )(hidden_layer)
    output_layer = tf.keras.layers.Dense(
        units=output_dimensions, activation=tf.keras.activations.relu
    )(hidden_layer)
    model = tf.keras.Model(inputs=inputs_layer, outputs=output_layer)
    model.compile(
        optimizer=tf.keras.optimizers.Adam(), loss=tf.keras.losses.mean_squared_error
    )
    return model


create_positional_decoder(
    position_dimensions=2,
    latent_dimensions=3,
    output_dimensions=1,
    hidden_layers_count=8,
    hidden_layers_units=8,
)

In [None]:
def train_until_improvement_treshold(fit, threshold=0.8, patience=10):
    # treshold=0.8 means 20% improvement threshold
    last_loss = float("inf")
    while patience > 0:
        patience -= 1
        history = fit()
        loss = history.history["loss"][-1]
        if loss < last_loss * threshold:
            last_loss = loss
        else:
            break

In [None]:
model = create_positional_decoder(
    position_dimensions=2,  # x, y of a pixel
    latent_dimensions=3,  # intuitively x, y, radius of a circle
    output_dimensions=1,  # graysacale image
    hidden_layers_count=16,
    hidden_layers_units=8,
)

model.summary()

train_inputs, train_outputs = generate_data(
    count=2048, image_width=128, image_height=128, offset_x=0, offset_y=0
)

train_until_improvement_treshold(
    lambda: model.fit(train_inputs, train_outputs, epochs=1, batch_size=2)
)

In [None]:
page_size = 10
draw_images(train_outputs[0:page_size])
draw_images(model.predict(train_inputs[0:page_size]))

test_inputs, test_outputs = generate_data(
    count=page_size, image_width=800, image_height=800, offset_x=0, offset_y=0
)
draw_images(test_outputs)
draw_images(model.predict(test_inputs))