<a href="https://colab.research.google.com/github/davyrisso/videodream/blob/main/videodream.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Videodream

A Tensorflow implementation of the Deepdream algorithm that uses optical flow to process videos.

The Tensorflow Deepdream part comes from the [official Tensorflow documentation](https://www.tensorflow.org/tutorials/generative/deepdream)

Optical flow is computed with [OpenCV contributed optflow module](https://docs.opencv.org/3.4/d2/d84/group__optflow.html) using the [DeepFlow](https://thoth.inrialpes.fr/src/deepflow/) method.

## Imports

In [None]:
%tensorflow_version 2.x

import datetime
import json
import os
import sys
import time

import tensorflow
import numpy
import cv2

from scipy.ndimage.filters import gaussian_filter

TensorFlow 2.x selected.


# Main

###Params & Constants

In [None]:
# Input video, change to desired input video (use the file panel on the left).
INPUT_VIDEO_PATH = '/content/drive/My Drive/Deepdream/samples/To_the_wonder_clip_1.mov'
BASE_OUTPUT_PATH = '/content/drive/My Drive/Deepdream/output'

DEEPDREAM_BASE_MODEL = tensorflow.keras.applications.DenseNet201(
        include_top=False, weights='imagenet')

DEEPDREAM_LAYERS = ['conv4_block17_1_relu']
DEEPDREAM_STEPS_PER_OCTAVE = 100
DEEPDREAM_STEP_SIZE = 0.01
DEEPDREAM_OCTAVES = [-2, -1, 0, 1, 2]
DEEPDREAM_OCTAVE_SCALE = 1.3
DEEPDREAM_TILE_SIZE = 512
DEEPDREAM_SMOOTH_DETAILS = False
DEEPDREAM_DETAILS_BLUR_BASE_VALUE = 2.5
DEEPDREAM_DETAILS_BLUR_OFFSET = 0.125
DEEPDREAM_SMOOTH_COLORS = True
DEEPDREAM_COLORS_BLUR_BASE_VALUE = 2.5
DEEPDREAM_COLORS_BLUR_OFFSET = 0.25
DEEPDREAM_INITIAL_IMAGE_BLEND_FACTOR = 0.0
DEEPDREAM_PRESERVE_SIZE = True

VIDEODREAM_START_FRAME = 0
VIDEODREAM_END_FRAME = 300
VIDEODREAM_FRAME_SCALING_FACTOR = 0.5
VIDEODREAM_PREVIOUS_FRAME_BLEND_FACTOR = 0.15
VIDEODREAM_INITIAL_FRAME_BLEND_FACTOR = 0.05



### Main

In [None]:
def main():
  input_video_path = INPUT_VIDEO_PATH
  base_output_path = BASE_OUTPUT_PATH

  base_model = DEEPDREAM_BASE_MODEL
  
  layer_names = DEEPDREAM_LAYERS
  layers = [base_model.get_layer(
        layer_name).output for layer_name in layer_names]
  
  dream_model = tensorflow.keras.Model(
        inputs=base_model.input, outputs=layers)

  timestamp = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
  output_path = os.path.join(base_output_path, base_model.name, timestamp)
  if not os.path.exists(output_path):
        os.makedirs(output_path)

  config = {
        'input_video_path': INPUT_VIDEO_PATH,
        'base_output_path': BASE_OUTPUT_PATH,
        'deepdream' : {
            'base_model': DEEPDREAM_BASE_MODEL.name,
            'layers': DEEPDREAM_LAYERS,
            'steps_per_octave': DEEPDREAM_STEPS_PER_OCTAVE,
            'step_size': DEEPDREAM_STEP_SIZE,
            'octaves': list(DEEPDREAM_OCTAVES),
            'octave_scale': DEEPDREAM_OCTAVE_SCALE,
            'tile_size': DEEPDREAM_TILE_SIZE,
            'smooth_details': DEEPDREAM_SMOOTH_DETAILS,
            'details_blur_base_value': DEEPDREAM_DETAILS_BLUR_BASE_VALUE,
            'details_blur_offset': DEEPDREAM_DETAILS_BLUR_OFFSET,
            'smooth_colors': DEEPDREAM_SMOOTH_COLORS,
            'colors_blur_base_value': DEEPDREAM_COLORS_BLUR_BASE_VALUE,
            'colors_blur_offset': DEEPDREAM_COLORS_BLUR_OFFSET,
            'initial_image_blend_factor': DEEPDREAM_INITIAL_IMAGE_BLEND_FACTOR,
            'preserve_size': DEEPDREAM_PRESERVE_SIZE
            },
        'videodream': {
            'start_frame': VIDEODREAM_START_FRAME,
            'end_frame': VIDEODREAM_END_FRAME,
            'frame_scale_factor': VIDEODREAM_FRAME_SCALING_FACTOR,
            'previous_frame_blend_factor': 
                VIDEODREAM_PREVIOUS_FRAME_BLEND_FACTOR,
            'initial_frame_blend_factor': VIDEODREAM_INITIAL_FRAME_BLEND_FACTOR
            }
        }
  print('Config: %s' % json.dumps(config, indent=2))
  info_filename = 'info.json'
  with open(os.path.join(output_path, info_filename), 'w') as f:
    f.write(json.dumps(config, indent=2))

  print('Running VideoDream on %s\nModel: %s, layer(s): %s\nOutput: %s' % (
      input_video_path, base_model.name, layer_names, output_path))
  
  RunVideoDream(
        input_video_path, dream_model,
        output_path, start_frame=VIDEODREAM_START_FRAME, 
        end_frame=VIDEODREAM_END_FRAME, 
        frame_scaling_factor=VIDEODREAM_FRAME_SCALING_FACTOR)


# Deep Dream

## Utilities

In [None]:
def RandomRoll(image, maxroll):
  """A function that 'rolls' an image randomly horizontally and vertically.
     This is used when splitting the image into tiles to avoid artifacts at the
     the tile borders.

     E.g as a matrix if the random roll is (-1, 1):

     A B C D        J K L I
     E F G H   =>   B C D A
     I J K L        F G H E

     Args:
        image: An image as an array of pixels or tensor.
        maxroll: The size of the tiles.

    Returns:
        shift_down, shift_right: The values of the 'roll' (translation)
        rolled_image: The transformed image as an array of pixels or tensor.
  """
  # Generates a random 2d vector for roll shift.
  shift = tensorflow.random.uniform(
      shape=[2], minval=-maxroll, maxval=maxroll, dtype=tensorflow.int32)
  shift_down, shift_right = shift[0], shift[1]
  # Computes the rolled image.
  image_rolled = tensorflow.roll(
      tensorflow.roll(image, shift_right, axis=1),
      shift_down,
      axis=0)
  return shift_down, shift_right, image_rolled

In [None]:
def CalculateLoss(image, model):
  """Calculates loss in for the passed input image..

  Currently we only use activation loss but we can think of adding different
  values to guide the optimization. For instance the Tensorflow documentation
  includes an exemple of using tensorflow.image.total_variation as part of the
  loss to reduce noise:
  https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/image/total_variation
  """
  image_batch = tensorflow.expand_dims(image, axis=0)
  # We retrieve the activations per layer.
  layer_activations = model(image_batch)
  # Tensorflow does not return an array if there is only one output layer in 
  # the model so in this case we just convert to an array in this case to 
  # use the code below.
  if len(layer_activations) == 1:
      layer_activations = [layer_activations]
  # Calculates loss per layer with reduce_mean.
  losses = []
  for activation in layer_activations:
      loss = tensorflow.math.reduce_mean(activation)
      losses.append(loss)
  # For now we only use activation loss, we could add other things.
  activations_loss = tensorflow.reduce_sum(losses)

  return activations_loss

In [None]:
def SmoothGradients(
    gradients, step, num_steps, smooth_details=DEEPDREAM_SMOOTH_DETAILS, 
    smooth_colors=DEEPDREAM_SMOOTH_COLORS,
    details_blur_base_value=DEEPDREAM_DETAILS_BLUR_BASE_VALUE, 
    details_blur_offset=DEEPDREAM_DETAILS_BLUR_OFFSET,
    colors_blur_base_value=DEEPDREAM_COLORS_BLUR_BASE_VALUE, 
    colors_blur_offset=DEEPDREAM_COLORS_BLUR_OFFSET):
  """Applies gaussian blur to the input gradients.
  
  This is used to generate smoother images and parametrize color variations.
  It is inspired from an implementation of DeepDream by ElephantHunters:
  https://github.com/ElephantHunters/Deep-Dream-using-Tensorflow

  The value of the blur increases as we progress in the computation, we 
  calculate them with step/num_steps. We can think of different ways of 
  calculating these blurring values.

  In order to selectively smooth details or colors we apply the blur to a 
  subset of the gradients channels. The channel in the 3rd dimension
  corresponds to the colors.   
  """
  # Details blur sigma value.
  details_sigma_value = (
      (float(step) / float(num_steps)) * details_blur_base_value +
      details_blur_offset)
  # We apply this blur only to the 2 first channels of the gradients.
  details_sigma = (details_sigma_value, details_sigma_value, 0.0)

  # Colors blur sigma value.
  colors_sigma_value = (
      (float(step) / float(num_steps)) * colors_blur_base_value +
      colors_blur_offset)
  colors_sigma = (0.0, 0.0, colors_sigma_value)

  gradients_array = gradients.numpy()
  gradients_transformed = numpy.zeros_like(gradients_array)

  # Applies details smoothing:
  if smooth_details:
      gradients_transformed += gaussian_filter(
          gradients_array, sigma=details_sigma)

  # Applies color smoothing:
  if smooth_colors:
      gradients_transformed += gaussian_filter(
          gradients_array, sigma=colors_sigma)

  if not smooth_colors and not smooth_details:
      gradients_transformed = gradients_array

  return gradients_transformed

In [None]:
def DeprocessImage(image):
  """Deprocesses an image.
  
  Converts float values in the [-1.0, 1.0] range to integer values in the 
  [0, 255] range in order to retrieve RGB pixel values.
  """
  return tensorflow.cast(255 * (image + 1) / 2.0, tensorflow.uint8)

## Gradients calculation tensorflow module

This is the implementation presented in the [official Tensorflow deepdream implementation](https://www.tensorflow.org/tutorials/generative/deepdream#optional_scaling_up_with_tiles)

In [None]:
class TiledGradients(tensorflow.Module):
  """Deepdream Tensorflow module.

  Calculates the gradient of the loss with respect to the pixels of an image.
  We use Tensorflow automatic differentiation and Gradient Tape:
  https://www.tensorflow.org/tutorials/customization/autodiff
  """
  def __init__(self, model):
      self.model = model

  @tensorflow.function(
      input_signature=(
          tensorflow.TensorSpec(
              shape=[None, None, 3],
              dtype=tensorflow.float32),
          tensorflow.TensorSpec(shape=[], dtype=tensorflow.int32),)
  )
  def __call__(self, image, tile_size=DEEPDREAM_TILE_SIZE):
    """Returns the gradients for an input image.
    
    We divide the input image into tiles to circumvent memory limitations
    when processing large images.
    """
    # Gets a random rolled image (see RandomRoll function).
    shift_down, shift_right, image_rolled = RandomRoll(image, tile_size)
    
    # Initializes gradients.
    gradients = tensorflow.zeros_like(image_rolled)

    # Calculates the coordinates of the tiles.
    coordinates_x = tensorflow.range(
        0, tensorflow.shape(image_rolled)[0], tile_size)[:-1]
    if not tensorflow.cast(len(coordinates_x), bool):
        coordinates_x = tensorflow.constant([0])
    coordinates_y = tensorflow.range(
        0, tensorflow.shape(image_rolled)[1], tile_size)[:-1]
    if not tensorflow.cast(len(coordinates_y), bool):
        coordinates_y = tensorflow.constant([0])

    # Calculates gradients per tile.
    for x in coordinates_x:
        for y in coordinates_y:
            with tensorflow.GradientTape() as tape:
                tape.watch(image_rolled)
                image_tile = image_rolled[x:x + tile_size, y:y + tile_size]
                loss = CalculateLoss(image_tile, self.model)
            gradients = gradients + tape.gradient(loss, image_rolled)

    # 'Unrolls' the gradients so that they correspond to the unrolled image.
    gradients = tensorflow.roll(
        tensorflow.roll(
            gradients, -shift_right, axis=1), -shift_down, axis=0)

    # Normalizes the gradients.
    gradients /= tensorflow.math.reduce_std(gradients) + 1e-8

    return gradients

## Deepdream algorithm

In [None]:
def RunDeepDream(
  image, model, 
  steps_per_octave=DEEPDREAM_STEPS_PER_OCTAVE, step_size=DEEPDREAM_STEP_SIZE, 
  octaves=DEEPDREAM_OCTAVES, octave_scale=DEEPDREAM_OCTAVE_SCALE,
  initial_image_blend_factor=DEEPDREAM_INITIAL_IMAGE_BLEND_FACTOR,
  preserve_size=DEEPDREAM_PRESERVE_SIZE):
  """Main DeepDream algorithm.
  
  We calculate gradients for different scales of the image and simply add them
  to the image pixels.

  Args:
    image: Input image as an array of pixels.
    model: Tensorflow model to run DeepDream with.
    steps_per_octave: Number of times the gradients are calculated and added
      to the base image, per 'octave' (image scale)
    step_size: Overall 'effect' of each step on the result image. Lower values
      will result in more subtle effects.
    octaves: Range of integers used to compute the 'octaves' (scales) as which
      the gradients will be calculated. We use this to retrieve details at 
      different scales given that the models always output features of the 
      same size. Negative values are for smaller scales (larger features), 
      positive values are for larger scales (smaller features)
    octave_scale: Scale multiplier per octave (1.3 = image will be scaled 33%)
    smooth_[...], [...]_blur_base_value,...: See SmoothGraients
    initial_image_blend_factor: Blending factor for the non-transformed image.
      Used to preserve details from the initial image throughout the process.
    preserve_size: If true, the result image will be resized to the original
      image size (otherwise the output image will be larger or smaller 
      depending on the octaves used).
  
  Returns:
    Result image as an array of pixels.
  """
  # Instantiates the gradients calculation module above.
  deepdream = TiledGradients(model)

  # Preprocesses the image and converts to a tensor.
  # TODO(davyrisso): change the preprocess_input depending on the model.
  image_tensor = tensorflow.keras.applications.inception_v3.preprocess_input(
      tensorflow.keras.preprocessing.image.img_to_array(image))
  base_shape = tensorflow.shape(image)    

  # Keeps a copy of the original preprocessed image tensor.
  initial_image_tensor = image_tensor.copy()
  initial_shape = image_tensor.shape[:-1]

  # Initializes the tensor that will be used to produce the result image.
  image_tensor = tensorflow.image.resize(image_tensor, initial_shape)

  # Applies DeepDream for each of the passed 'octaves' (scales).
  for octave_index, octave in enumerate(octaves):
      # Resizes the tensor for this octave.
      new_size = tensorflow.cast(tensorflow.convert_to_tensor(
          base_shape[:-1]), tensorflow.float32) * (octave_scale**octave)
      image_tensor = tensorflow.image.resize(
          image_tensor, tensorflow.cast(new_size, tensorflow.int32))
      # Creates a version of the initial image at the same octave (so we can
      # blend its value).
      initial_image_tensor_resized = tensorflow.image.resize(
          initial_image_tensor, tensorflow.cast(new_size, tensorflow.int32))

      print('Processing octave %d/%d: %d (%s)' % (
          octave_index, len(octaves), octave, new_size))

      # Calculates and applies the gradients many times.
      for step in range(steps_per_octave):
          # if step % 10 == 0:
          #     print('Step %d/%d' % (step, steps_per_octave))
          gradients = deepdream(image_tensor)

          gradients_transformed = SmoothGradients(
              gradients, step, steps_per_octave)

          # We re-adjust the step size in case the transformed gradients
          # are not normalixed.
          step_size_scaled = step_size / \
              (numpy.std(gradients_transformed) + 1e-8)

          # Deepdream in a nutshell, we simply add the gradients to the
          # result image, which we can do because they have the same shape.
          # We also blend in the original image to preserve details.
          image_tensor = tensorflow.clip_by_value((
              (image_tensor + gradients_transformed * step_size_scaled) *
              (1.0 - initial_image_blend_factor) +
              initial_image_tensor_resized * initial_image_blend_factor),
              -1, 1)

  # Optionally resizes the output image.
  if preserve_size:
      image_tensor = tensorflow.image.resize(
          image_tensor, tensorflow.cast(initial_shape, tensorflow.int32))

  return DeprocessImage(image_tensor).numpy()

# Video Dream

In [None]:
def RunVideoDream(
        input_video_path, model, output_path,
        start_frame=VIDEODREAM_START_FRAME, end_frame=VIDEODREAM_END_FRAME,
        previous_frame_blend_factor=VIDEODREAM_PREVIOUS_FRAME_BLEND_FACTOR,
        initial_frame_blend_factor=VIDEODREAM_INITIAL_FRAME_BLEND_FACTOR,
        frame_scaling_factor=VIDEODREAM_FRAME_SCALING_FACTOR):
    # Prepares output directories.
    frames_output_dir = os.path.join(output_path, 'frames')
    if not os.path.exists(frames_output_dir):
        os.makedirs(frames_output_dir)
    video_output_dir = os.path.join(output_path, 'video')
    if not os.path.exists(video_output_dir):
        os.makedirs(video_output_dir)

    print('Opening %s' % input_video_path)
    video_capture = cv2.VideoCapture(input_video_path)
    frame_count = video_capture.get(cv2.CAP_PROP_FRAME_COUNT)
    print('Found %d frames' % frame_count)

    if end_frame is None:
        end_frame = frame_count
    print('Will process frames %d to %d' % (start_frame, end_frame))

    video_filename = os.path.split(input_video_path)[-1]
    video_writer = cv2.VideoWriter(
        filename=os.path.join(video_output_dir, video_filename),
        fourcc=int(cv2.VideoWriter_fourcc(*'mp4v')),
        fps=int(video_capture.get(cv2.CAP_PROP_FPS)),
        frameSize=(int(video_capture.get(cv2.CAP_PROP_FRAME_WIDTH)),
                   int(video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT))))

    frame_index = 0
    previous_frame = None
    previous_frame_grayscale = None
    previous_frame_processed = None

    while video_capture.isOpened():
        frame_start_time = datetime.datetime.now()
        retrieved, frame = video_capture.read()
        if not retrieved:
            print('Could not read video frame')
            break

        if frame_index < start_frame or frame_index > end_frame:
            frame_index += 1
            continue

        print('Frame original shape: %s' % (frame.shape,))        
        frame = cv2.resize(
            src=frame, dsize=(
                int(frame.shape[1] * frame_scaling_factor), 
                int(frame.shape[0] * frame_scaling_factor)))
      
        print(
            'Frame shape for computation: %s (%f)' % (
                frame.shape, frame_scaling_factor))

        print('Processing frame %d / %d' % (frame_index, end_frame))
        frame_grayscale = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        if previous_frame_processed is None:
            frame_initial = frame
        else:
            print('Calculating optical flow')
            optical_flow = cv2.optflow.createOptFlow_DeepFlow().calc(
                previous_frame_grayscale, frame_grayscale, None)

            coords_y, coords_x, = numpy.indices(
                (optical_flow.shape[0], optical_flow.shape[1]),
                dtype=numpy.float32)

            remap_vectors_x = numpy.add(coords_x, -optical_flow[..., 0])
            remap_vectors_y = numpy.add(coords_y, -optical_flow[..., 1])

            print('Applying optical flow to previous transformed frame')
            previous_frame_processed_updated = cv2.remap(
                src=previous_frame_processed,
                map1=remap_vectors_x,
                map2=remap_vectors_y,
                interpolation=cv2.INTER_LANCZOS4)

            frame_initial = cv2.addWeighted(
                src1=frame,
                src2=previous_frame_processed_updated,
                alpha=1.0 - previous_frame_blend_factor,
                beta=previous_frame_blend_factor,
                gamma=0)

        frame_processed = RunDeepDream(frame_initial, model)

        frame_processed = cv2.addWeighted(
            src1=frame_processed,
            src2=frame,
            alpha=1.0 - initial_frame_blend_factor,
            beta=initial_frame_blend_factor,
            gamma=0)

        frame_end_time = datetime.datetime.now()
        print('Done processing frame %d in %.1f seconds' % (
            frame_index, (
                frame_end_time - frame_start_time).total_seconds())) 

        frame_filename = '%05d.png' % frame_index
        frame_path = os.path.join(frames_output_dir, frame_filename)
        print('Saving frame at %s' % frame_path)
        cv2.imwrite(frame_path, frame_processed)

        video_writer.write(frame_processed)

        previous_frame = frame
        previous_frame_grayscale = frame_grayscale
        previous_frame_processed = frame_processed

        frame_index += 1
        cv2.waitKey(1)


# Run

In [None]:
main()

Config: {
  "input_video_path": "/content/drive/My Drive/Deepdream/samples/To_the_wonder_clip_1.mov",
  "base_output_path": "/content/drive/My Drive/Deepdream/output",
  "deepdream": {
    "base_model": "densenet201",
    "layers": [
      "conv4_block17_1_relu"
    ],
    "steps_per_octave": 100,
    "step_size": 0.01,
    "octaves": [
      -2,
      -1,
      0,
      1,
      2
    ],
    "octave_scale": 1.3,
    "tile_size": 512,
    "smooth_details": false,
    "details_blur_base_value": 2.5,
    "details_blur_offset": 0.125,
    "smooth_colors": true,
    "colors_blur_base_value": 2.5,
    "colors_blur_offset": 0.25,
    "initial_image_blend_factor": 0.0,
    "preserve_size": true
  },
  "videodream": {
    "start_frame": 0,
    "end_frame": 300,
    "frame_scale_factor": 0.5,
    "previous_frame_blend_factor": 0.15,
    "initial_frame_blend_factor": 0.05
  }
}
Running VideoDream on /content/drive/My Drive/Deepdream/samples/To_the_wonder_clip_1.mov
Model: densenet201, layer(s): 