In [1]:
import os
import datetime

from tqdm import tqdm

import tensorflow as tf
import matplotlib.pyplot as plt

from utils.OpticalFlowUtils import FlowReader
from utils.OpticalFlowUtils import FlowVisualiser

from utils.MPISintelUtils import MPISintelHandler

from assets.ml.src.POIFE_model import POIFE
from assets.ml.src.FlowConv import FlowConvNet
from assets.ml.src.CustomLosses import AE_Loss
from assets.ml.src.CustomLosses import EPE_Loss
from assets.ml.src.ResidualFlowNET import ResidualFlowNET
from assets.ml.src.CustomCallbacks import PredictOnEpochEndCallback
from assets.ml.src.ConvolutionalFlowRefiner import FlowRefinerConvNet

2024-02-20 18:52:14.810272: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


KeyboardInterrupt: 

In [2]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    # Restrict TensorFlow to only allocate 8.1GB of memory on the first GPU
    try:
        tf.config.set_logical_device_configuration(gpus[0], [tf.config.LogicalDeviceConfiguration(memory_limit=1024*8.1)])
        logical_gpus = tf.config.list_logical_devices('GPU')
        print(f"{len(gpus)} Physical GPUs, {len(logical_gpus)} Logical GPUs")
    except RuntimeError as e:
        # Virtual devices must be set before GPUs have been initialized
        print(f"Runtime Error: {e}")

1 Physical GPUs, 1 Logical GPUs


In [3]:
reader = FlowReader()
visualiser = FlowVisualiser()
dataHandler = MPISintelHandler()

In [4]:
SINTEL_PATH = os.path.join("..", "Data", "MPI-Sintel")
SINTEL_TRAINING_PATH = os.path.join(SINTEL_PATH, "training")
SINTEL_TRAINING_FLOW_PATH = os.path.join(SINTEL_TRAINING_PATH, "flow")
SINTEL_TRAINING_IMG_PATH = os.path.join(SINTEL_TRAINING_PATH, "clean")

In [5]:
print(f"SINTEL PATH: {SINTEL_PATH}")
print(f"SINTEL TRAINING PATH: {SINTEL_TRAINING_PATH}")
print(f"SINTEL TRAINING FLOW PATH: {SINTEL_TRAINING_FLOW_PATH}")
print(f"SINTEL TRAINING IMG PATH: {SINTEL_TRAINING_IMG_PATH}")

SINTEL PATH: ..\Data\MPI-Sintel
SINTEL TRAINING PATH: ..\Data\MPI-Sintel\training
SINTEL TRAINING FLOW PATH: ..\Data\MPI-Sintel\training\flow
SINTEL TRAINING IMG PATH: ..\Data\MPI-Sintel\training\clean


In [None]:
# load the data from the file after the file has been organised
dataFilepath = os.path.join(".", "assets", "data", "trainImages.data")
trainImgsArray = dataHandler.loadData(dataFilepath)

dataFilepath = os.path.join(".", "assets", "data", "trainFlows.data")
trainFlowsArray = dataHandler.loadData(dataFilepath)

trainImgsArray = trainImgsArray.astype('float32') / 255.0
trainFlowsArray = trainFlowsArray.astype('float32')
trainImgsArray = tf.reshape(trainImgsArray, shape=(-1, 2, 436, 1024, 1))

In [None]:
# Ensure the data has the same shape
print(f"Train images shape: {trainImgsArray.shape}")
print(f"Train flows shape: {trainFlowsArray.shape}")

In [6]:
# Load the split data 
dataFilepath = os.path.join(".", "assets", "data", "trainX.data")
train_X = dataHandler.loadData(dataFilepath)
train_X = train_X.astype('float32') / 255.0
train_X = tf.reshape(train_X, shape=(-1, 2, 436, 1024, 1))

dataFilepath = os.path.join(".", "assets", "data", "testX.data")
test_X = dataHandler.loadData(dataFilepath)
test_X = test_X.astype('float32') / 255.0
test_X = tf.reshape(test_X, shape=(-1, 2, 436, 1024, 1))

dataFilepath = os.path.join(".", "assets", "data", "valX.data")
val_X = dataHandler.loadData(dataFilepath)
val_X = val_X.astype('float32') / 255.0
val_X = tf.reshape(val_X, shape=(-1, 2, 436, 1024, 1))

dataFilepath = os.path.join(".", "assets", "data", "trainy.data")
train_y = dataHandler.loadData(dataFilepath)
train_y = train_y.astype('float32')

dataFilepath = os.path.join(".", "assets", "data", "testy.data")
test_y = dataHandler.loadData(dataFilepath)
test_y = test_y.astype('float32')

dataFilepath = os.path.join(".", "assets", "data", "valy.data")
val_y = dataHandler.loadData(dataFilepath)
val_y = val_y.astype('float32')

In [7]:
print(f"train_X: {train_X.shape} test_X: {test_X.shape} val_X: {val_X.shape} -> Total Samples: {train_X.shape[0] + test_X.shape[0] + val_X.shape[0]}")
print(f"train_y: {train_y.shape} test_y: {test_y.shape} val_y: {val_y.shape} -> Total Samples: {train_y.shape[0] + test_y.shape[0] + val_y.shape[0]}")

train_X: (510, 2, 436, 1024, 1) test_X: (297, 2, 436, 1024, 1) val_X: (234, 2, 436, 1024, 1) -> Total Samples: 1041
train_y: (510, 436, 1024, 2) test_y: (297, 436, 1024, 2) val_y: (234, 436, 1024, 2) -> Total Samples: 1041


In [8]:
# Use tensorboard to visualise training progress and all the fun things
%load_ext tensorboard

In [None]:
log_dir = os.path.join(".", "assets", "logs", "fits", f"FlowConvNET_{datetime.datetime.now().strftime('%Y%m%d-%H%M%S')}")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1, write_images=True, profile_batch= '10, 110')

modelDir = os.path.join(".", "assets", "ml", "models", "FlowConvNet")
if not os.path.exists(modelDir):
    os.makedirs(modelDir)

modelPath = os.path.join(modelDir, "FlowConvNET.h5")
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath=modelPath, monitor='val_loss', save_best_only=True, save_weights_only=True)

learning_rate_callback = tf.keras.callbacks.ReduceLROnPlateau(factor=0.85, patience=50, min_lr=1e-6)
_callbacks = [tensorboard_callback, model_checkpoint_callback, learning_rate_callback]

callbacks = tf.keras.callbacks.CallbackList(_callbacks, add_history=False)

fileWriter = tf.summary.create_file_writer(os.path.join(log_dir, "metrics"))
fileWriter.set_as_default()

In [9]:
%tensorboard --logdir ./assets/logs --port=8074

In [None]:
flowConvNet = FlowConvNet()
angularError = AE_Loss()

optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3, beta_1=0.9, beta_2=0.999)
flowConvNet.compile(optimizer=optimizer, loss=EPE_Loss(), metrics = ['mse'])
flowConvNet.build(input_shape=(None, 2, 436, 1024, 1))
flowConvNet.summary()

In [None]:
hist = flowConvNet.fit(x=train_X, y=train_y, epochs = 2500, verbose = True, batch_size=1, validation_data=(test_X, test_y), callbacks=_callbacks)

In [10]:
def visualiseFits(fits, validationData = True):
    fig, axs = plt.subplots(2, 2, figsize = (40, 20))

    axs[0][0].plot(fits.history['loss'], label = "Conventional NN Train EPE")
    axs[0][0].legend()
    axs[0][0].set_xlabel("Epochs")
    axs[0][0].set_ylabel("EPE (log Scaled)")
    axs[0][0].set_yscale('log')
    axs[0][0].grid(True)

    axs[1][0].plot(fits.history['loss'], label = "Conventional NN Train EPE")
    axs[1][0].legend()
    axs[1][0].set_xlabel("Epochs")
    axs[1][0].set_ylabel("EPE")
    axs[1][0].grid(True)

    axs[0][1].plot(fits.history['mse'], label = "Conventional NN Train MSE")
    axs[0][1].legend()
    axs[0][1].set_xlabel("Epochs")
    axs[0][1].set_ylabel("MSE")
    axs[0][1].grid(True)

    axs[1][1].plot(fits.history['mse'], label = "Conventional NN Train MSE")
    axs[1][1].legend()
    axs[1][1].set_xlabel("Epochs")
    axs[1][1].set_ylabel("MSE (log scaled)")
    axs[1][1].set_yscale('log')
    axs[1][1].grid(True)
    
    if validationData:
        axs[0][0].plot(fits.history['val_loss'], label = "Conventional NN Train EPE validation")
        axs[1][0].plot(fits.history['val_loss'], label = "Conventional NN Train EPE validation")
        axs[0][1].plot(fits.history['val_mse'], label = "Conventional NN Train MSE validation")
        axs[1][1].plot(fits.history['val_mse'], label = "Conventional NN Train MAE validation")

    plt.show()

In [None]:
visualiseFits(hist)

In [None]:
log_dir = os.path.join(".", "assets", "logs", "fits", f"FlowRefinerNET_{datetime.datetime.now().strftime('%Y%m%d-%H%M%S')}")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1, write_images=True, profile_batch= '10, 110')

modelDir = os.path.join(".", "assets", "ml", "models", "FlowRefinerNet")
if not os.path.exists(modelDir):
    os.makedirs(modelDir)

modelPath = os.path.join(modelDir, "FlowRefinerNET.h5")
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath=modelPath, monitor='val_loss', save_best_only=True, save_weights_only=True)

learning_rate_callback = tf.keras.callbacks.ReduceLROnPlateau(factor=0.85, patience=50, min_lr=1e-6)
_callbacks = [tensorboard_callback, model_checkpoint_callback, learning_rate_callback]

callbacks = tf.keras.callbacks.CallbackList(_callbacks, add_history=False)

fileWriter = tf.summary.create_file_writer(os.path.join(log_dir, "metrics"))
fileWriter.set_as_default()

In [None]:
flowRefinerNET = FlowRefinerConvNet()
angularError = AE_Loss()

optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3, beta_1=0.9, beta_2=0.999)
flowRefinerNET.compile(optimizer=optimizer, loss=EPE_Loss(), metrics = ['mse'])
flowRefinerNET.build(input_shape=(None, 2, 436, 1024, 1))
flowRefinerNET.summary()

In [None]:
hist = flowRefinerNET.fit(x=train_X, y=train_y, epochs = 2500, verbose = True, batch_size=1, validation_data=(test_X, test_y), callbacks=_callbacks)

In [None]:
visualiseFits(hist)

In [None]:
log_dir = os.path.join(".", "assets", "logs", "fits", f"ResidualFlowNET_{datetime.datetime.now().strftime('%Y%m%d-%H%M%S')}")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1, write_images=True, profile_batch= '10, 110')

modelDir = os.path.join(".", "assets", "ml", "models", "ResidualFlowNET")
if not os.path.exists(modelDir):
    os.makedirs(modelDir)

modelPath = os.path.join(modelDir, "ResidualFlowNET.h5")
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath=modelPath, monitor='val_loss', save_best_only=True, save_weights_only=True)

learning_rate_callback = tf.keras.callbacks.ReduceLROnPlateau(factor=0.85, patience=50, min_lr=1e-6)
_callbacks = [tensorboard_callback, model_checkpoint_callback, learning_rate_callback]

callbacks = tf.keras.callbacks.CallbackList(_callbacks, add_history=False)

fileWriter = tf.summary.create_file_writer(os.path.join(log_dir, "metrics"))
fileWriter.set_as_default()

In [None]:
residualFlowNET = ResidualFlowNET(numBlocks=1)
angularError = AE_Loss()

optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3, beta_1=0.9, beta_2=0.999)
residualFlowNET.compile(optimizer=optimizer, loss=EPE_Loss(), metrics = ['mse'])
residualFlowNET.build(input_shape=(None, 2, 436, 1024, 1))
residualFlowNET.summary()

In [None]:
hist = residualFlowNET.fit(x=train_X, y=train_y, epochs = 2500, verbose = True, batch_size=1, validation_data=(test_X, test_y), callbacks=_callbacks)

In [None]:
visualiseFits(hist)

In [11]:
log_dir = os.path.join(".", "assets", "logs", "fits", f"POIFE_{datetime.datetime.now().strftime('%Y%m%d-%H%M%S')}")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1, write_images=True, profile_batch= '10, 110')

modelDir = os.path.join(".", "assets", "ml", "models", "POIFE")
if not os.path.exists(modelDir):
    os.makedirs(modelDir)

modelPath = os.path.join(modelDir, "POIFE.h5")
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath=modelPath, monitor='val_loss', save_best_only=True, save_weights_only=True)

learning_rate_callback = tf.keras.callbacks.ReduceLROnPlateau(factor=0.85, patience=50, min_lr=1e-6)
_callbacks = [tensorboard_callback, model_checkpoint_callback, learning_rate_callback]

callbacks = tf.keras.callbacks.CallbackList(_callbacks, add_history=False)

fileWriter = tf.summary.create_file_writer(os.path.join(log_dir, "metrics"))
fileWriter.set_as_default()

In [12]:
poife = POIFE()
angularError = AE_Loss()

optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3, beta_1=0.9, beta_2=0.999)
poife.compile(optimizer=optimizer, loss=EPE_Loss(), metrics = ['mse'])
poife.build(input_shape=(None, 2, 436, 1024, 1))
poife.summary()

Input Shape: (None, 436, 1024, 2)
ResBlockA Shape: (None, 218, 512, 32)
MaxPoolA Shape: (None, 109, 256, 32)
ResBlockB Shape: (None, 109, 256, 64)
MaxPoolB Shape: (None, 54, 128, 64)
ResBlockC Shape: (None, 54, 128, 128)
MaxPoolC Shape: (None, 27, 64, 128)
ResBlockD Shape: (None, 27, 64, 256)
MaxPoolD Shape: (None, 13, 32, 256)
ResBlockE Shape: (None, 13, 32, 512)
MaxPoolE Shape: (None, 6, 16, 512)
DecoderA Shape: (None, 6, 16, 256)
UpSampleA Shape: (None, 12, 32, 256)
After Padding Shape: (None, 13, 32, 256)
DecoderB Shape: (None, 13, 32, 128)
UpSampleB Shape: (None, 26, 64, 128)
After Padding Shape: (None, 27, 64, 128)
DecoderC Shape: (None, 27, 64, 64)
UpSampleC Shape: (None, 54, 128, 64)
DecoderD Shape: (None, 54, 128, 32)
UpSampleD Shape: (None, 108, 256, 32)
After Padding Shape: (None, 109, 256, 32)
DecoderE Shape: (None, 109, 256, 16)
UpSampleE Shape: (None, 218, 512, 16)
DecoderF Shape: (None, 218, 512, 8)
UpSampleF Shape: (None, 436, 1024, 8)
Input Shape: (None, 436, 1024, 2)


In [13]:
hist = poife.fit(x=train_X, y=train_y, epochs = 2500, verbose = True, batch_size=1, validation_data=(test_X, test_y), callbacks=_callbacks)

Epoch 1/2500
Epoch 2/2500
Epoch 3/2500
Epoch 4/2500
Epoch 5/2500
Epoch 6/2500
Epoch 7/2500
Epoch 8/2500
Epoch 9/2500
Epoch 10/2500
Epoch 11/2500
Epoch 12/2500
Epoch 13/2500
Epoch 14/2500
Epoch 15/2500
Epoch 16/2500
Epoch 17/2500
Epoch 18/2500
Epoch 19/2500
Epoch 20/2500
Epoch 21/2500
Epoch 22/2500
Epoch 23/2500
Epoch 24/2500
Epoch 25/2500
Epoch 26/2500
Epoch 27/2500
Epoch 28/2500
Epoch 29/2500
Epoch 30/2500
Epoch 31/2500
Epoch 32/2500
Epoch 33/2500
Epoch 34/2500
Epoch 35/2500
Epoch 36/2500
Epoch 37/2500
Epoch 38/2500
Epoch 39/2500
Epoch 40/2500
Epoch 41/2500
Epoch 42/2500
Epoch 43/2500
Epoch 44/2500
Epoch 45/2500
Epoch 46/2500
Epoch 47/2500
Epoch 48/2500
Epoch 49/2500
Epoch 50/2500
Epoch 51/2500
Epoch 52/2500
Epoch 53/2500
Epoch 54/2500
Epoch 55/2500
Epoch 56/2500
Epoch 57/2500
Epoch 58/2500
Epoch 59/2500
Epoch 60/2500
Epoch 61/2500
Epoch 62/2500
Epoch 63/2500
Epoch 64/2500
Epoch 65/2500
Epoch 66/2500
Epoch 67/2500
Epoch 68/2500
Epoch 69/2500
Epoch 70/2500
Epoch 71/2500
Epoch 72/2500
E

KeyboardInterrupt: 

In [None]:
visualiseFits(hist)