## Tensorboard Projector Vizualisation

Tutorial: https://medium.com/looka-engineering/how-to-visualize-feature-vectors-with-sprites-and-tensorflows-tensorboard-3950ca1fb2c7

In [46]:
import os
import glob

import cv2
import numpy as np
import tensorflow as tf
import pandas as pd
from tqdm import tqdm
from tensorflow.contrib.tensorboard.plugins import projector

In [8]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [96]:
LOG_DIR = "../tensorboard-logs"
IMAGES_DIR = "../data/train-jpg"
IMAGE_SIZE = (64, 64)
SPRITES_FILE = "sprites.png"
SPRITES_PATH = os.path.join(LOG_DIR, SPRITES_FILE)

FEATURES_KAGGLE = os.path.abspath("../data/features/Kaggle")
FEATURE_VECTORS_BIGEARTHNET = os.path.join(FEATURES_KAGGLE, 'features_bigearthnet_Kaggle.npy')

PLANET_KAGGLE_ROOT = os.path.abspath("../data/")
PLANET_KAGGLE_LABEL_CSV = os.path.join(PLANET_KAGGLE_ROOT, 'train_v2.csv')

METADATA_FILE = "metadata.tsv"
METADATA_PATH = os.path.join(LOG_DIR, METADATA_FILE)
CHECKPOINT_FILE = os.path.join(LOG_DIR, "features.ckpt")

# Max sprite size is 8192 x 8192
#MAX_NUMBER_SAMPLES = 8191
MAX_NUMBER_SAMPLES = 200
FIRST_SAMPLE_ID = 0

In [97]:
def create_sprite(data):
    """
    Tile images into sprite image. 
    Add any necessary padding
    """
    
    # For B&W or greyscale images
    if len(data.shape) == 3:
        data = np.tile(data[...,np.newaxis], (1,1,1,3))

    n = int(np.ceil(np.sqrt(data.shape[0])))
    padding = ((0, n ** 2 - data.shape[0]), (0, 0), (0, 0), (0, 0))
    data = np.pad(data, padding, mode='constant',
            constant_values=0)
    
    # Tile images into sprite
    data = data.reshape((n, n) + data.shape[1:]).transpose((0, 2, 1, 3, 4))
    # print(data.shape) => (n, image_height, n, image_width, 3)
    
    data = data.reshape((n * data.shape[1], n * data.shape[3]) + data.shape[4:])
    # print(data.shape) => (n * image_height, n * image_width, 3) 
    return data

In [98]:
# Preprocess labels and file names

labels_df = pd.read_csv(PLANET_KAGGLE_LABEL_CSV)

# Build list with unique labels
label_list = []
for tag_str in labels_df.tags.values:
    labels = tag_str.split(' ')
    for label in labels:
        if label not in label_list:
            label_list.append(label)
            
# Map labels 
label_map = {l: i for i, l in enumerate(label_list)}

In [105]:
# Create sprite image and label vectors

img_data = []
labels = []
for f, tags in tqdm(labels_df.loc[FIRST_SAMPLE_ID:FIRST_SAMPLE_ID+MAX_NUMBER_SAMPLES-1].values, miniters=1000):
    input_img = cv2.imread('../data/train-jpg/{}.jpg'.format(f))
    img_data.append(cv2.resize(input_img, IMAGE_SIZE))
    targets = np.zeros(17)
    for t in tags.split(' '):
        targets[label_map[t]] = 1
    labels.append(targets)
img_data = np.array(img_data)
labels = np.array(labels)

sprite = create_sprite(img_data)
cv2.imwrite(SPRITES_PATH, sprite)

100%|██████████| 200/200 [00:00<00:00, 558.05it/s]


True

In [100]:
# Create metadata, configure for tensorboard embedding

# Create metadata
# Can include class data in here if interested / have available
with open(METADATA_PATH, 'w+') as wrf:
    wrf.write("image_id\t" + "\t".join([str(tag) for tag in label_list]) + "\n")
    for i,a in enumerate(labels[FIRST_SAMPLE_ID:FIRST_SAMPLE_ID+MAX_NUMBER_SAMPLES]):
        wrf.write(str(i)+"\t"+"\t".join([str(tag) for tag in labels[i]])+"\n")

feature_vectors = np.load(FEATURE_VECTORS_BIGEARTHNET)

features = tf.Variable(feature_vectors[FIRST_SAMPLE_ID:FIRST_SAMPLE_ID+MAX_NUMBER_SAMPLES], name='features')

# Write summaries for tensorboard
with tf.Session() as sess:
    saver = tf.train.Saver([features])

    sess.run(features.initializer)
    saver.save(sess, CHECKPOINT_FILE)

    config = projector.ProjectorConfig()
    embedding = config.embeddings.add()
    embedding.tensor_name = features.name
    embedding.metadata_path = METADATA_FILE

    # This adds the sprite images
    embedding.sprite.image_path = SPRITES_FILE
    embedding.sprite.single_image_dim.extend(IMAGE_SIZE)
    projector.visualize_embeddings(tf.summary.FileWriter(LOG_DIR), config)

In [13]:
%load_ext tensorboard
#%reload_ext tensorboard

In [102]:
#!kill 11544 #when updating data - kill pid of the old projector (error message or the following cell)

In [103]:
%tensorboard --logdir ../tensorboard-logs