# Imports

In [None]:
from keras.utils import load_img, img_to_array
import tensorflow as tf
from tensorboard.plugins import projector

import os
import shutil
import numpy as np
import pandas as pd

In [None]:
# Configuration
WORK_DIR = '/Users/ariskoutris/Library/CloudStorage/OneDrive-Personal/Programming/wordnet_diffusion'
DATA_DIR = f'{WORK_DIR}/data'
IMG_DIR = f'{DATA_DIR}/images'
VEC_DIR = f'{DATA_DIR}/vectors'
LOG_DIR = f'{DATA_DIR}/tensorboard_logs'
HIERARCHY_DIR = f'{DATA_DIR}/hierarchies'

# Default category to process
root_category = 'Dog'

In [13]:
IMG_EXTENSIONS = [".jpg", ".JPG", ".jpeg", ".JPEG", ".png", ".PNG", 
                  ".ppm", ".PPM", ".bmp", ".BMP", ".tiff"]

def is_image_file(filename):
    return any(filename.endswith(extension) for extension in IMG_EXTENSIONS)

def make_dataset(dir_path):
    images = []
    assert os.path.isdir(dir_path), f"{dir_path} is not a valid directory"
    for root, _, fnames in os.walk(dir_path):
        for fname in fnames:
            if is_image_file(fname):
                path = os.path.join(root, fname)
                images.append(path)
    return images

def normalize(data):
    return (data - np.min(data)) / (np.max(data) - np.min(data))

def clear_directory(dir_path):
    if os.path.exists(dir_path):
        shutil.rmtree(dir_path)
    os.makedirs(dir_path, exist_ok=True)

def extract_metadata_from_paths(img_paths):
    segmented_paths = [path.replace('\\','/').split('/') for path in img_paths]
    filenames = [path[-1] for path in segmented_paths]
    dates = [path[-2] for path in segmented_paths]
    labels = [path[-3] for path in segmented_paths]
    seq_nums = [fname.split('-')[0] for fname in filenames]
    seeds = [fname.split('-')[1].split('.')[0] for fname in filenames]
    ids = ['-'.join([date, seq_num, seed]) for (date, seq_num, seed) in zip(dates, seq_nums, seeds)]
    
    return {
        'ids': ids,
        'labels': labels,
        'seq_nums': seq_nums, 
        'seeds': seeds,
        'dates': dates,
        'filenames': filenames
    }

def load_images(img_paths, target_size=(224, 224)):
    images = []
    for path in img_paths:
        img = load_img(path, target_size=target_size)
        img_arr = img_to_array(img)
        images.append(img_arr)
    return images

def register_embedding(embedding_tensor_name, meta_data_fname, log_dir):
    config = projector.ProjectorConfig()
    embedding = config.embeddings.add()
    embedding.tensor_name = embedding_tensor_name
    embedding.metadata_path = meta_data_fname
    projector.visualize_embeddings(log_dir, config)

In [None]:
# Setup directories
if clear_logs:
    clear_directory(LOG_DIR)

# Load images
img_paths = make_dataset(os.path.join(IMG_DIR, category))
metadata = extract_metadata_from_paths(img_paths)

# Create metadata dataframe
metadata_df = pd.DataFrame({
    'id': metadata['ids'],
    'class_name': metadata['labels'],
    'sequence_number': metadata['seq_nums'],
    'seed': metadata['seeds'],
    'date_created': metadata['dates'],
    'filename': metadata['filenames'],
    'path': img_paths
})

vecs = np.load(os.path.join(VEC_DIR, 'npy', f'{category.lower()}.npy'))

# Import wordnet hierarchy
hier_df = pd.read_csv(os.path.join(HIERARCHY_DIR, f"{category.casefold()}.csv"))

# Merge metadata with hierarchy
tensorboard_metadata = metadata_df.reset_index().merge(
    hier_df, left_on='class_name', right_on='class', suffixes=['','_y'])

# Get indices of merged data for selecting vectors
indices = tensorboard_metadata['index'].values

# Select relevant columns and clean data
tensorboard_metadata = tensorboard_metadata[
    ['class', 'cat_depth_0', 'cat_depth_1', 'cat_depth_2', 'frequency']
].fillna('None')

# Setup Tensorboard projection
PROJ_DIR = LOG_DIR
META_DATA_FNAME = f'meta_{category.casefold()}.tsv'
EMBEDDINGS_TENSOR_NAME = f'embeddings_{category.casefold()}'
EMBEDDINGS_FPATH = os.path.join(PROJ_DIR, EMBEDDINGS_TENSOR_NAME + '.ckpt')

# Save metadata for Tensorboard
tensorboard_metadata.to_csv(
    os.path.join(PROJ_DIR, META_DATA_FNAME), sep='\t', index=False)

# Register embedding configuration
register_embedding(EMBEDDINGS_TENSOR_NAME, META_DATA_FNAME, PROJ_DIR)

# Save embeddings for Tensorboard
tensor_embeddings = tf.Variable(vecs[indices], name=EMBEDDINGS_TENSOR_NAME)
saver = tf.compat.v1.train.Saver([tensor_embeddings])
_ = saver.save(sess=None, global_step=0, save_path=EMBEDDINGS_FPATH)

print(f"Tensorboard visualization prepared for {category}")
print(f"To view, run: tensorboard --logdir={LOG_DIR}")