**Code for Paper "The reconstruction of flows from spatiotemporal data by autoencoders"**

Facundo Fainstein (1,2), Josefina Catoni (1), Coen Elemans (3) and Gabriel B. Mindlin (1,2,4)* 

(1) Universidad de Buenos Aires, Facultad de Ciencias Exactas y Naturales, Departamento de Física, Ciudad Universitaria, 1428 Buenos Aires, Argentina.

(2) CONICET - Universidad de Buenos Aires, Instituto de Física Interdisciplinaria y Aplicada (INFINA), Ciudad Universitaria, 1428 Buenos Aires, Argentina.

(3) Department of Biology, University of Southern Denmark, 5230 Odense M, Denmark.

(4) Universidad Rey Juan Carlos, Departamento de Matemática Aplicada, Madrid, Spain. 

*Gabriel B. Mindlin (corresponding author)
Email: gabo@df.uba.ar


**Train autoencoder with different latent space dimensions** 

Do 100 fittings of an autoencoder. The architecture is: 9900-64-32-16-NUM_MODOS-16-32-64-9900. 

Save the results in .txt files. 

To fit networks with different latent space dimensions change the number of units in the middle layer ('NUM_MODOS').

In [None]:
# Python libraries
import numpy as np
import matplotlib.pyplot as plt
import glob
from numpy import loadtxt
from skimage import io
import os
from pathlib import Path

#Keras
import keras
from keras import layers
from keras import regularizers
from keras.models import Sequential, load_model
from keras.layers.core import Dense, Dropout, Activation
from keras import backend as K

#Uncomment the following lines if run in google colab 

# from google.colab import drive
#Mount drive to load images
#drive.mount('/content/gdrive', force_remount=True)  

**Load data**

In [None]:
#Load data

#Movie folder
root_dir = f'/home/.../' 
#Save file names in list
lista_files=[]
lista_files=glob.glob(root_dir+'*.jpg')
lista_files.sort()

#Load data
data = []
for file in lista_files:
  img = io.imread(file, as_gray=True)
  data.append(img)  
data = np.array(data)
print(data.shape)

#Define the pixels of each images to analyze
x_der = data[:,120:230,180:270].astype('float32')

#Compute the temporal mean value of each pixel
xmean_der=np.mean(x_der,axis=0) 

#Substract the mean 
x_der -= xmean_der
print(type(x_der))
print(x_der.shape)

**Define a function that fits the network and saves the results**

In [None]:
#Customized callback to save the encoding and the test MSE at each epoch of the training procedure

class CustomCallback(keras.callbacks.Callback):
    def __init__(self, model, x_test, model_path):
        self.model = model
        self.x_test = x_test
        self.model_path = model_path
        
    def on_epoch_end(self, epoch, logs={}):
        get_latent_layer_output = K.function([self.model.layers[0].input],
                                             [self.model.layers[4].output])
        layer_output_test = get_latent_layer_output ([self.x_test])[:][0]
        y_pred = self.model.predict(self.x_test)
        rec_error_test=np.sum((self.x_test-y_pred)**2,axis=1)
        filename_1 = '/ls_test_epoch_'+str(epoch)+".txt"
        filename_2 = '/mse_test_epoch_'+str(epoch)+".txt"
        np.savetxt(model_path+filename_1, layer_output_test)
        np.savetxt(model_path+filename_2, rec_error_test)

#Function that creates and trains the network, and saves the results

def train_save(X, model_path,NUM_MODOS):
    # X = data
    # X = [time, rows*columns pixels]
    # model_path = path to folder in which results will be saved
    
    #Separate train and test data sets
    X_train, X_test = X[0:300], X[300:]
  

    #Define the network
    numero_pixeles = X.shape[1]
    input_img = keras.Input(shape=(numero_pixeles,))
    encoded = layers.Dense(64, activation='relu')(input_img)
    encoded = layers.Dense(32, activation='relu')(encoded)
    encoded = layers.Dense(16, activation='relu')(encoded)
    encoded = layers.Dense(NUM_MODOS, activation='linear')(encoded)
    decoded = layers.Dense(16, activation='relu')(encoded)
    decoded = layers.Dense(32, activation='relu')(decoded)
    decoded = layers.Dense(64, activation='relu')(decoded)
    decoded = layers.Dense(numero_pixeles, activation='linear')(decoded)

    #Create the autoencoder
    autoencoder = keras.Model(input_img, decoded)
  
    #Compile the model
    autoencoder.compile(loss='mse', metrics=['mean_absolute_error'], optimizer='adam')

    #Train the model and save metrics in history
    history = autoencoder.fit(X_train, X_train,
            batch_size=16, epochs=200,
            validation_data=(X_test, X_test),
            verbose=0, callbacks=[CustomCallback(autoencoder,X_test, model_path)]);

    #Get results from trained net
    train_mse = np.array(history.history['loss'])
    val_mse = np.array(history.history['val_loss'])
    optimal_epoch=np.argmin(train_mse)
    
    #Delete results of every epoch except the one with minimum mse for the train set
    indexes=np.setdiff1d(np.arange(200),[optimal_epoch])
    for index in set(indexes):
        os.remove(model_path+'/ls_test_epoch_'+str(index)+".txt")
        os.remove(model_path+'/mse_test_epoch_'+str(index)+".txt")

    #Save 
    np.savetxt(model_path+'/train_mse.txt', train_mse)
    np.savetxt(model_path+'/val_mse.txt', val_mse)

    return train_mse,val_mse

**Fit N times**

In [None]:
#Define data 
X = x_der.reshape(x_der.shape[0], x_der.shape[1]*x_der.shape[2])
d_frames_test = np.sqrt(np.sum(np.diff(X[300:],axis=0)**2,axis=1))

#Define number of units in the middle layer
NUM_MODOS=4

#Number of trainings
N=1
path = f'/home/.../'

for k in range(N):
    print('iteracion ' + str(k) + ' de '+str(N))
    #Create name to save training results
    directory= 'dim_LS_'+str(NUM_MODOS)+'_it{}'.format(k)
    model_path = os.path.join(path, directory)
    Path(model_path).mkdir(parents=True, exist_ok=True)

    #Train and save
    (train_mse,val_mse)= train_save(X,model_path=model_path,NUM_MODOS=NUM_MODOS)
  