# Autoencoder

(Si no tienes GPU puedes [ejecutar este notebook en COLAB](https://colab.research.google.com/github/AlbertoRuiz/umucv/blob/master/notebooks/bottleneck.ipynb))

## Load MNIST data

In [None]:
import os
os.environ["KERAS_BACKEND"] = "torch"

In [None]:
import matplotlib.pyplot as plt
import numpy             as np

import keras

In [None]:
(kxl,cl), (kxt,ct) = keras.datasets.mnist.load_data()

xl = kxl.reshape(len(kxl),-1)/255
xt = kxt.reshape(len(kxt),-1)/255
print(xl.shape, cl.shape)
print(xt.shape, ct.shape)

## 2D embedding

In [None]:
model = keras.Sequential(
    [keras.Input(shape=(28*28,))] +
    [keras.layers.Dense(u, activation='relu' if u!=2 else 'sigmoid') for u in [256,128,64,32,2,32,64,128,256]]+
    [keras.layers.Dense(28*28, activation='linear')]
)

model.summary()

In [None]:
model.compile(loss="mse", optimizer="adam")

In [None]:
history = model.fit(xl, xl, epochs=100, batch_size=500)

In [None]:
plt.plot(history.history['loss']);

In [None]:
pred = model.predict(xl)

In [None]:
def shdig(v):
    x = np.reshape(v,[28,28])
    plt.imshow(1-x, 'gray', vmin=0, vmax=1, interpolation="nearest");

def compare(k):
    plt.figure(figsize=(8,4))
    plt.subplot(1,2,1); shdig(xl[k])
    plt.subplot(1,2,2); shdig(pred[k])

In [None]:
compare(37)

In [None]:
compare(10235)

In [None]:
encoder = keras.Sequential()
encoder.add(keras.Input(shape=(28*28,)))
for x in model.layers[:5]:
    encoder.add(x)
    
decoder = keras.Sequential()
decoder.add(keras.Input(shape=(2,)))
for x in model.layers[5:]:
    decoder.add(x)

In [None]:
xy = encoder.predict(xl)

In [None]:
plt.figure(figsize=(5,5))
plt.scatter(*xy.T,s=1,c=cl,cmap='tab10');

In [None]:
ns = 1
plt.figure(figsize=(12,5))
for d in range(10):
    plt.subplot(2,5,ns)
    ns += 1
    plt.scatter(*xy.T,s=1,c=cl==d,cmap='coolwarm');
    plt.title(d)
    plt.axis('off')

## 3D embedding

In [None]:
encoder = keras.Sequential()
encoder.add(keras.Input(shape=(28*28,)))
for u in [256, 128, 64, 32]:
    encoder.add(keras.layers.Dense(units=u, activation='relu'))
encoder.add(keras.layers.Dense(units=3, activation='sigmoid'))

decoder = keras.Sequential()
decoder.add(keras.Input(shape=(3,)))
for u in [32, 64, 128, 256]:
    decoder.add(keras.layers.Dense(units=u, activation='relu'))
decoder.add(keras.layers.Dense(units=28*28, activation='linear'))

model = keras.Sequential([encoder, decoder])
model.compile(loss='mse', optimizer='adam')

In [None]:
encoder.summary()

In [None]:
decoder.summary()

In [None]:
history = model.fit(xl, xl, epochs=100, batch_size=500)

In [None]:
plt.plot(history.history['loss']);

In [None]:
xyz = encoder.predict(xl)

In [None]:
showclass = 3

fig = plt.figure(figsize=(6,6))
ax = fig.add_subplot(projection='3d')
ax.plot(*xyz[cl!=showclass].T,'.',markersize=1,alpha=0.5,color='gray');
ax.plot(*xyz[cl==showclass].T,'.',markersize=1,alpha=0.5,color='red');

#ax.scatter(*map.T,s=0.1,c=cl==0,cmap='coolwarm',alpha=0.5);


In [None]:
fig = plt.figure(figsize=(6,6))
ax = fig.add_subplot(projection='3d')
for c in range(10):
    ax.plot(*xyz[cl==c].T,'.',markersize=1,alpha=0.5,label=c);
plt.legend(markerscale=10);

Si estás en tu máquina local puedes visualizar dinámicamente el gráfico 3D con el backend tk de matplotlib:

In [None]:
%matplotlib tk

In [None]:
fig = plt.figure(figsize=(8,8))
ax = fig.add_subplot(projection='3d')
for c in range(10):
    ax.plot(*xyz[cl==c].T,'.',markersize=1,alpha=0.5,label=c);
plt.legend(markerscale=10);

Volvemos al los gráficos inline:

In [None]:
%matplotlib inline

In [None]:
from ipywidgets import interactive, FloatSlider

def fun(a=0.5,b=0.5,c=0.5):
    shdig(decoder.predict(np.array([[a,b,c]]),verbose=False));

interactive(fun, a=(0.,1,0.01), b=(0.,1,0.01), c=(0.,1,0.01))

Vemos que muchas posiciones del embedding producen imágenes que no tienen forma de dígitos reconocibles. Si queremos aprender un modelo de un conjunto de objetos que permita generar muestras realistas es necesario hacer alguna mejoras. Algunas técnicas para esto son los variational autoencoders y stable diffusion.