<a href="https://colab.research.google.com/github/UrielM21/IB/blob/main/TripletLoss_MNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# MNIST Digits

In [None]:
import io
import numpy as np
import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow_datasets as tfds
from tensorboard.plugins import projector

from sklearn.manifold import TSNE
from sklearn.utils import resample

from google.colab import files
from PIL import Image
import json
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D  # noqa: F401 unused import
plt.style.use('seaborn')

SMALL_SIZE = 16
MEDIUM_SIZE = 16
BIGGER_SIZE = 18

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title


def images_to_sprite(data):
    """
    Source : https://github.com/tensorflow/tensorflow/issues/6322
    """
    if len(data.shape) == 3:
        data = np.tile(data[..., np.newaxis], (1, 1, 1, 3))
    data = data.astype(np.float32)
    min = np.min(data.reshape((data.shape[0], -1)), axis=1)
    data = (data.transpose(1, 2, 3, 0) - min).transpose(3, 0, 1, 2)
    max = np.max(data.reshape((data.shape[0], -1)), axis=1)
    data = (data.transpose(1, 2, 3, 0) / max).transpose(3, 0, 1, 2)

    n = int(np.ceil(np.sqrt(data.shape[0])))
    padding = ((0, n ** 2 - data.shape[0]), (0, 0),
               (0, 0)) + ((0, 0),) * (data.ndim - 3)
    data = np.pad(data, padding, mode='constant',
                  constant_values=0)
    # Tile the individual thumbnails into an image.
    data = data.reshape((n, n) + data.shape[1:]).transpose((0, 2, 1, 3)
                                                           + tuple(range(4, data.ndim + 1)))
    data = data.reshape((n * data.shape[1], n * data.shape[3]) + data.shape[4:])
    data = (data * 255).astype(np.uint8)
    return data

def prepare_for_visualization(embeddings, data_ds, name='test'):
  images = np.concatenate([x[0] for x in data_ds.as_numpy_iterator()]) # Podria usar .unbatch tambien
  images = normalize(images, scale=255, dtype=np.uint8)
  images = np.squeeze(images)
  images = resize(images, (images.shape[0], 64, 64, 3))

  labels = np.concatenate([x[1] for x in data_ds.as_numpy_iterator()])
  labels = [str(x) for x in labels]

  sprite_name = name+'_sprite.png'
  tensor_shape = list(embeddings.shape)
  single_image_dim = [images.shape[1], images.shape[2]]
  tensor_name = name+"_embeddings.bytes"
  metadataPath = name+'_meta.tsv'

  folder = 'oss_data/'
  try:
    os.mkdir(folder)
  except FileExistsError:
    pass
  out_m = io.open(os.path.join(folder, metadataPath), 'w', encoding='utf-8')
  # for img, labels in data_ds.as_numpy_iterator():
  #     [out_m.write(str(x) + "\n") for x in labels]
  for label in labels:
    out_m.write(label + "\n")
  out_m.close()

  sprite = Image.fromarray(images_to_sprite(images).astype(np.uint8))
  sprite.save(os.path.join(folder, sprite_name))
  print(name+': ', sprite.size)

  embeddings.tofile(os.path.join(folder, tensor_name))

  oss_json = {'embeddings':[]}
  json_to_append = {"tensorName": 'Visualization_'+name,
                    "tensorShape": tensor_shape,
                    "tensorPath": "oss_data/" + tensor_name,
                    "metadataPath": "oss_data/" + metadataPath,
                    "sprite": {"imagePath": "oss_data/" + sprite_name,
                                "singleImageDim": single_image_dim}}
  oss_json['embeddings'].append(json_to_append)
  with open(os.path.join(folder, name+'_projector_config.json'), 'w+') as f:
      json.dump(oss_json, f, ensure_ascii=False, indent=4)

def _normalize_img(img, label):
    img = tf.cast(img, tf.float32) / 255.
    return (img, label)

In [None]:
train_dataset, test_dataset = tfds.load(name="mnist", split=['train', 'test'], as_supervised=True)

# Build your input pipelines
train_dataset = train_dataset.shuffle(1024).batch(32)
train_dataset = train_dataset.map(_normalize_img)

test_dataset = test_dataset.batch(32)
test_dataset = test_dataset.map(_normalize_img)

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(filters=64, kernel_size=2, padding='same', activation='relu', input_shape=(28,28,1)),
    tf.keras.layers.MaxPooling2D(pool_size=2),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Conv2D(filters=32, kernel_size=2, padding='same', activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=2),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256, activation=None), # No activation on final dense layer
    tf.keras.layers.Lambda(lambda x: tf.math.l2_normalize(x, axis=1)) # L2 normalize embeddings

])

# Compile the model
model.compile(
    optimizer=tf.keras.optimizers.Adam(0.001),
    loss=tfa.losses.TripletSemiHardLoss())

# Train the network
history = model.fit(
    train_dataset,
    epochs=10)

# Loss function

In [None]:
plt.plot(history.history['loss'])

# Prepare data for visualization in Tensorboard Projector

In [None]:
embeddings = model.predict(test_dataset)
prepare_for_visualization(embeddings, test_dataset, name='test')