<a href="https://colab.research.google.com/github/akhanf/biophys9709/blob/2025/Lecture_8b_Autoencoders.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Boilerplate

In [None]:
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
import numpy as np


# Autoencoder for dimensionality reduction

## Load data

Load MNIST data

In [None]:
# load and examine the data
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

## Pre-processing

Perform the same pre-processing as the previous examples in class.
Except, we use the input image as the class label.

In [None]:
# Keras needs the image tensors to have a channel dimension, even if there is only one channel, so we reshape the tensors accordingly.

x_train = x_train.reshape(60000, 28, 28, 1)
x_test = x_test.reshape(10000, 28, 28, 1)

# Convert the pixels to float32 type.
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

# Rescale the pixel values to run from 0 to 1.
x_train = x_train / 255
x_test = x_test / 255

#save the digit labels (for visualization)
y_label_train = y_train
y_label_test = y_test

#use the input image as the class label (autoencoder)
y_train =
y_test =



## Define hyperparameters

In [None]:
#input shape:
img_shape = (x_train.shape[1], x_train.shape[2], 1)

# hyperparameters to define and train the model
batch_size = 32
epochs = 5
val_split = 0.2

#steps to cycle through entire dataset in an epoch
steps_per_epoch = int(np.floor(x_train.shape[0] *(1-val_split)  /batch_size) )



In [None]:
steps_per_epoch

## Define the AutoEncoder network

Build using fully-connected layers:


In [None]:
flat_shape = np.prod(img_shape)

size_bottleneck =
size_encode_decode =

#we will define an encoder and decoder separately,
# then connect them together


#define layers of encoder

#input layer
input_layer = keras.layers.Input(img_shape)
x = keras.layers.Flatten()(input_layer)

#encoder layer
x =

#bottleneck layer
encoded_layer =


#define layers of decoder
encoded_shape = (size_bottleneck,)
encoded_input = keras.layers.Input(encoded_shape)

#decoder layer:
x =


#output layer:
x = keras.layers.Dense(flat_shape, activation='sigmoid')(x)
output_layer = keras.layers.Reshape(img_shape)(x)

#define the models:
encoder = keras.Model(input_layer,encoded_layer)
decoder = keras.Model(encoded_input,output_layer)

#define autoencoder from input layer to output of decoder
autoencoder = keras.Model(input_layer, decoder(encoded_layer))


In [None]:
# Compile the model
loss = keras.losses.mean_squared_error
optim = keras.optimizers.Adam()
metric = keras.metrics.mean_squared_error

autoencoder.compile(loss=loss,
                    optimizer=optim,
                    metrics=metric )

# What does the finished model look like?
autoencoder.summary()


In [None]:
encoder.summary()
print('encoder:')
keras.utils.plot_model(encoder, show_shapes=True, rankdir='LR')


In [None]:
decoder.summary()
print('decoder:')
keras.utils.plot_model(decoder, show_shapes=True, rankdir='LR')

## Train the network

In [None]:
#fit the model
history = autoencoder.fit(x_train,y_train,
            batch_size=batch_size,
            validation_split=val_split,
            epochs=epochs,
            steps_per_epoch=steps_per_epoch)

## Plot the loss & metric over training epochs and test dataset

In [None]:
import seaborn as sns
sns.lineplot(data=history.history)

In [None]:
#evaluate on the test dataset:
autoencoder.evaluate(x_test,y_test)

## Inspect the reconstructed images

In [None]:
# run inference to get reconstructed outputs
reconst = autoencoder.predict(x_test)


In [None]:
# make a quick function to plot images in a MxN

def plot_mnist_compare(img_tuple,num_cols=10):
  base_size=1
  M=len(img_tuple)
  N=num_cols
  fig, axs = plt.subplots(M,N,figsize=(base_size*N,base_size*M))

  for i,img in enumerate(img_tuple):
    for j in range(N):
      axs[i,j].imshow(img[j,:,:,0])



In [None]:
# call the function to plot
plot_mnist_compare((x_test,reconst))

## Inspect the encoded representations

In [None]:
#run samples through the encoder only:
encoded = encoder.predict(x_test)
encoded.shape


In [None]:
# define a quick function to visualize a scatter plot

def plot_latent_2d(latent, class_labels):
  fig, axs = plt.subplots(figsize=(10,10))
  #loop over each digit class in mnist, plot with a number marker
  for i in range(9):
    latent_i = latent[class_labels==i]
    axs.scatter(latent_i[:,0],latent_i[:,1],marker=f'${i}$',s=50)


In [None]:
plot_latent_2d(encoded,y_label_test)

## Try changing the network to see the difference in encoded representations

# Denoising Autoencoder

## Adding noise

Here, in addition to applying preprocessing as in past demos, we will also add random noise to all images. We will then treat these corrupted images in the network input, x, and the original image as the network outputs, y. The class labels will not be used at all.

In [None]:
# corrupt the input images
noise_fac = 0.4
x_train = y_train + noise_fac * np.random.normal(size=y_train.shape)
x_test = y_test + noise_fac * np.random.normal(size=y_test.shape)

#clip to ensure still in 0-1 range
x_train = np.clip(x_train,0,1)
x_test = np.clip(x_test,0,1)


## Examine the data before and after adding noise:

In [None]:
plot_mnist_compare((x_test,y_test))

## Define the AutoEncoder network

Copy the autoencoder already defined here, and adapt

In [None]:
# Compile the model
loss = keras.losses.mean_squared_error
optim = keras.optimizers.Adam()
metric = keras.metrics.mean_squared_error

autoencoder.compile(loss=loss,
                    optimizer=optim,
                    metrics=metric )

# What does the finished model look like?
autoencoder.summary()


## Train the network

In [None]:
#fit the model
history = autoencoder.fit(x_train,y_train,
            batch_size=batch_size,
            validation_split=val_split,
            epochs=epochs,
            steps_per_epoch=steps_per_epoch)

In [None]:
#evaluate on the test dataset:
autoencoder.evaluate(x_test,y_test)

## Inspect the reconstructed images

In [None]:
# run inference to get reconstructed outputs
reconst = autoencoder.predict(x_test)


In [None]:
plot_mnist_compare((x_test,y_test,reconst))

# Bonus: Compare autoencoder to PCA?
