<a href="https://colab.research.google.com/github/brandon-birchall/AutoEncoder/blob/main/Autoencoder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Frame interpolation

## Setting up

In [None]:
!pip install tfds-nightly imageio
!pip install git+https://github.com/tensorflow/docs

Collecting tfds-nightly
[?25l  Downloading https://files.pythonhosted.org/packages/f0/d1/0bacdf3005c01c8650adef86306056d86a26baf977700ba04c22d3749613/tfds_nightly-4.3.0.dev202107090108-py3-none-any.whl (3.9MB)
[K     |████████████████████████████████| 4.0MB 4.0MB/s 
Installing collected packages: tfds-nightly
Successfully installed tfds-nightly-4.3.0.dev202107090108
Collecting git+https://github.com/tensorflow/docs
  Cloning https://github.com/tensorflow/docs to /tmp/pip-req-build-h0lzzvph
  Running command git clone -q https://github.com/tensorflow/docs /tmp/pip-req-build-h0lzzvph
Building wheels for collected packages: tensorflow-docs
  Building wheel for tensorflow-docs (setup.py) ... [?25l[?25hdone
  Created wheel for tensorflow-docs: filename=tensorflow_docs-0.0.0964cc6614d3285dc9513b812a211556ff114c7fc_-cp37-none-any.whl size=154244 sha256=1024e98db0c548099a1a39d8a9f4309cf30aa01ff783a7a28ada85f745e9bb80
  Stored in directory: /tmp/pip-ephem-wheel-cache-vx8rvczv/wheels/eb/1b/3

In [None]:
!rm out/*.gif

rm: cannot remove 'out/*.gif': No such file or directory


In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds

import glob
import imageio
import matplotlib.pyplot as plt
import numpy as np
import os
import PIL
import time

import tensorflow_docs.vis.embed as embed

from IPython import display

In [None]:
ds = tfds.load('davis', split='train', shuffle_files=True)

videos = []
for x in iter(ds):
  frames = tf.math.reduce_mean(x['video']['frames'][:, :480, :640, :3], axis=3)
  #frames = x['video']['frames'][:, :480, :640, :3]
  videos.append(tf.cast(frames, tf.float32) / 255)
videos = tf.concat(videos, 0)

[1mDownloading and preparing dataset 794.19 MiB (download: 794.19 MiB, generated: 792.26 MiB, total: 1.55 GiB) to /root/tensorflow_datasets/davis/480p/2.1.0...[0m


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Dl Completed...', max=1.0, style=Progre…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Dl Size...', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Extraction completed...', max=1.0, styl…






HBox(children=(FloatProgress(value=0.0, description='Generating splits...', max=2.0, style=ProgressStyle(descr…

HBox(children=(FloatProgress(value=0.0, description='Generating train examples...', max=60.0, style=ProgressSt…

HBox(children=(FloatProgress(value=0.0, description='Shuffling davis-train.tfrecord...', max=60.0, style=Progr…

HBox(children=(FloatProgress(value=0.0, description='Generating validation examples...', max=30.0, style=Progr…

HBox(children=(FloatProgress(value=0.0, description='Shuffling davis-validation.tfrecord...', max=30.0, style=…

[1mDataset davis downloaded and prepared to /root/tensorflow_datasets/davis/480p/2.1.0. Subsequent calls will reuse this data.[0m


In [None]:
videos = tf.reshape(videos, [4209, 480, 640, 1])

In [None]:
def to_gif(images, filename):
  anim_file = 'out/' + filename + ".gif"
  images = images.numpy()
  with imageio.get_writer(anim_file, mode='I') as writer:
    for i in range(len(images)):
      writer.append_data(images[i])


  embed.embed_file(anim_file)

In [None]:
#to_gif(videos[0], 'test')

# The autoencoder
First, I will train a simple convolutional autoencoder. This will take our input images and translate it into a lower-dimensional representation and later reconstruct the original image.

In our Frame interpolator, we will use these lower-dimensional representations as inputs

In [None]:
class Autoencoder(tf.keras.Model):
  def __init__(self):
    super(Autoencoder, self).__init__()
    self.encoder = tf.keras.Sequential([
      tf.keras.layers.InputLayer([480,640,1]),

      tf.keras.layers.Conv2D(20, [3,3], [1,1], activation='relu', kernel_initializer='he_normal', padding='same'),
      tf.keras.layers.MaxPooling2D(padding='same', pool_size=(2, 2)),

      tf.keras.layers.Conv2D(20, [3,3], [1,1], activation='relu', kernel_initializer='he_normal', padding='same'),
      tf.keras.layers.MaxPooling2D(padding='same', pool_size=(2, 2)),

      tf.keras.layers.Conv2D(20, [3,3], [1,1], activation='relu', kernel_initializer='he_normal', padding='same'),
      tf.keras.layers.MaxPooling2D(padding='same', pool_size=(2, 2)),

      tf.keras.layers.Conv2D(1, [3,3], [1,1], activation='relu', kernel_initializer='he_normal', padding='same'),

      tf.keras.layers.Dense(64),
      tf.keras.layers.Dense(64),
      tf.keras.layers.Dense(1),

      tf.keras.layers.Reshape([60, 80, 1])
     ])
    self.decoder = tf.keras.Sequential([
      tf.keras.layers.InputLayer([60,80,1]),

      tf.keras.layers.Dense(64),
      tf.keras.layers.Dense(64),
      tf.keras.layers.Dense(1),

      tf.keras.layers.Conv2DTranspose(1, [3,3], [1,1], activation='relu', kernel_initializer='he_normal', padding='same'),

      tf.keras.layers.Conv2DTranspose(20, [3,3], [2,2], activation='relu', kernel_initializer='he_normal', padding='same'),
      tf.keras.layers.Reshape((120, 160,20)),

      tf.keras.layers.Conv2DTranspose(20, [3,3], [2,2], activation='relu', kernel_initializer='he_normal', padding='same'),
      tf.keras.layers.Reshape([240,320,20]),

      tf.keras.layers.Conv2DTranspose(20, [3,3], [2,2], activation='relu', kernel_initializer='he_normal', padding='same'),
      tf.keras.layers.Reshape([480,640,20]),

      tf.keras.layers.Conv2D(1, [3, 3], activation='sigmoid', padding='same'),
      tf.keras.layers.Reshape([480,640,1]),

      tf.keras.layers.Dense(32),
      tf.keras.layers.Dropout(0.3),      
      tf.keras.layers.Dense(1),
    ])

  def call(self, x):
    encoded = self.encoder(x)
    decoded = self.decoder(encoded)
    return decoded

  def decode(self, image):
    return self.decoder(image)
  def encode(self, image):
    return self.encoder(image)

In [None]:
print(auto_encoder.summary())

Model: "autoencoder_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sequential_2 (Sequential)    (None, 60, 80, 1)         7621      
_________________________________________________________________
sequential_3 (Sequential)    (None, 480, 640, 1)       7631      
Total params: 15,252
Trainable params: 15,252
Non-trainable params: 0
_________________________________________________________________
None


In [None]:
auto_encoder = Autoencoder()
auto_encoder.compile(optimizer='adam', loss=tf.keras.losses.MeanSquaredError())
auto_encoder.fit(videos, videos, epochs=3, batch_size = 32)

Epoch 1/3


KeyboardInterrupt: ignored

In [None]:
encoded = auto_encoder.encode(tf.reshape(videos[:20], [20,480,640,1]))
demo = tf.cast(encoded * 255, tf.uint8)
plt.subplot(2,2,1)
plt.imshow(demo[0,:,:,0], cmap='gray')
plt.subplot(2,2,2)
plt.imshow(demo[0,:,:,1], cmap='gray')
plt.subplot(2,2,3)
plt.imshow(demo[0,:,:,2], cmap='gray')
plt.subplot(2,2,4)
plt.imshow(demo[0,:,:,3], cmap='gray')

to_gif(demo[:,:,:,0], 'interm')

In [None]:
input = tf.cast(videos[:30] * 255, tf.uint8)
out = tf.cast(auto_encoder(videos[:30]) * 255, tf.uint8)
#print(out)
to_gif(input, 'input')
to_gif(out, 'result')

In [None]:
interpolator = tf.keras.Sequential([
      tf.keras.layers.InputLayer([2,60,80,4]),
      tf.keras.layers.Reshape((60, 80, 8)),
      tf.keras.layers.Dense(64),
      tf.keras.layers.Dropout(0.5),

      tf.keras.layers.Dense(64),
      tf.keras.layers.Dropout(0.5),

      tf.keras.layers.Conv2D(4, [3,3], [1,1], activation='relu', kernel_initializer='he_normal', padding='same'),
      tf.keras.layers.Reshape([60,80,4]),
])