# Artwork Sequence Prediction Evaluation

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import os
from multivariate_fretech_distance import frechet_distance
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
#CONFIG_PATH = '/root/work/artwork_sequence/train_test_configuration/multivariate'
#CONFIG_PATH = '/root/work/artwork_sequence/train_test_configuration/univariate'

BASE_PATH = '/root/work/artwork_sequence/kfold'
folder ='folder_3'

DATASET_PATH = os.path.join(BASE_PATH, folder)

CONFIG_PATH = os.path.join(BASE_PATH,'univariate')

CONFIG_PATH = os.path.join(CONFIG_PATH,folder)


In [None]:
window_index = 4

museum_sequence_path = {
    'x_train' : os.path.join(DATASET_PATH, 'X_train.csv'),
    'x_test' : os.path.join(DATASET_PATH, 'X_test.csv'),
    'x_train_matrix' : os.path.join(DATASET_PATH, 'X_train_matrix.npy'),
    'x_test_matrix' : os.path.join(DATASET_PATH, 'X_test_matrix.npy'),
    'weights_folder' : os.path.join(CONFIG_PATH, 'config_'+str(window_index)+'/trained_model_weights')
}
museum_sequence_path

## Load data

In [None]:
df_x_train = pd.read_csv(museum_sequence_path['x_train'], index_col=0)
df_x_test = pd.read_csv(museum_sequence_path['x_test'], index_col=0)
x_train_matrix = np.load(museum_sequence_path['x_train_matrix'])
x_test_matrix = np.load(museum_sequence_path['x_test_matrix'])
df_x_train.head()

In [None]:
x_train_matrix.shape

## Reset Tensorflow session

In [None]:
tf.keras.backend.clear_session()

## Config  data to fit with the model input

Because the **Prediction feature model** split the data into training and validation dataset, it is necessary to give all the data in only one block

**Define timeline**

In [None]:
timeline = np.arange(x_train_matrix.shape[0] + x_test_matrix.shape[0])
timeline.shape

**Define configuration to deal with the windowed dataset**

In [None]:
split_time = x_train_matrix.shape[0]

X = np.concatenate((x_train_matrix, x_test_matrix))

window_size = window_index

#Number of artwork's feature
n_features = X.shape[1]

#Number of feature to take into account
n_influence_features=5

batch_size = 128
shuffle_buffer_size = 300

In [None]:
X.shape

## Create and Load models

**Get models**

In [None]:
from Sequence_prediction_factory import Sequence_prediction_multivariate, Sequence_prediction_univariate

In [None]:
model_univariate = Sequence_prediction_univariate(
    X=X, 
    shuffle_buffer_size=shuffle_buffer_size, 
    split_time=split_time, 
    train_batch_size=batch_size, 
    val_batch_size=batch_size,
    CONFIG_PATH=CONFIG_PATH)

In [None]:
model_multivariate = Sequence_prediction_multivariate(
    X=X, 
    shuffle_buffer_size=shuffle_buffer_size, 
    split_time=split_time, 
    train_batch_size=batch_size, 
    val_batch_size=batch_size, 
    window_size=window_size, 
    n_influence_features=n_influence_features)

In [None]:
model_prediction = model_univariate

In [None]:
n_features = X.shape[1]
#Get and define the RNN model 
model_prediction.set_window_size(window_size)
model = model_prediction.get_model()
model.define_model(conv_filter=20, lstm_filter=40, dense_filter=20, prediction_length=1)
model.get_model().summary()

## Evaluate model

In [None]:
from utils_plot import plot_series, plot_train_history, plot_prediction

In [None]:
def model_forecast(model, series, window_size, batch_size):
    if len(series.shape) == 1:
            series = tf.expand_dims(series, axis=-1)
    ds = tf.data.Dataset.from_tensor_slices(series)
    ds = ds.window(window_size, shift=1, drop_remainder=True)
    ds = ds.flat_map(lambda w: w.batch(window_size))
    ds = ds.map(lambda w: (w[:]))
    ds = ds.batch(batch_size)
    forecast = model.predict(ds)
    return forecast

**Predict validation dataset**

In [None]:
df_evaluation = pd.DataFrame({'feature' : [],
                             'forecast': [],
                             'x_valid':[],
                             'mae':[]})
df_evaluation

In [None]:
for feature in range(n_features):
    
    #Load weights for feature i
    model.set_index(feature)
    model.load_weights(museum_sequence_path)
    
    #Define feature to take into account for prediction
    x_influence_features = model.get_indexes_features()
    x_influence_features = np.insert(arr=x_influence_features, obj=0, values=int(feature))
    x_feature = X[:,x_influence_features.astype(int)]
    
    #Predict feature i
    rnn_forecast = model_forecast(model.get_model(), x_feature, window_size, batch_size)
    rnn_forecast = rnn_forecast[split_time-window_size+1:,-1]

    #Get validation dataset 
    x_valid = x_feature[split_time:, 0]

    #Compute MAE
    mae = tf.keras.metrics.mean_absolute_error(x_valid, rnn_forecast).numpy().mean()

    df_evaluation = df_evaluation.append({'feature' : feature,
                    'forecast': rnn_forecast,
                    'x_valid':x_valid,
                    'mae':mae
                   }, 
                   ignore_index=True)

df_evaluation.head()

### Prepare data to compute Frenchet distance

First, we get the values for real and predicted images

In [None]:
forecast_feature_matrix = np.vstack(list(df_evaluation['forecast'])).T
real_feature_matrix = np.vstack(list(df_evaluation['x_valid'])).T

In [None]:
forecast_feature_matrix.shape

Then, we compute mean and covariaton

In [None]:
mu_forecast_feature = np.mean(forecast_feature_matrix, axis=0)
sigma_forecast_feature = np.cov(forescast_feature_matrix, rowvar=False)

mu_real_feature = np.mean(real_feature_matrix, axis=0)
sigma_real_feature = np.cov(real_feature_matrix, rowvar=False)

At this point, we visualize what the pairwise multivariate distributions of the inception features look like

In [None]:
indices = [2, 4, 5]
forecast_dist = np.random.multivariate_normal(mu_forecast_feature[indices], sigma_forecast_feature[indices][:, indices], 1000)
real_dist = np.random.multivariate_normal(mu_real_feature[indices], sigma_real_feature[indices][:, indices], 1000)

df_forecast = pd.DataFrame(forecast_dist, columns=indices)
df_real = pd.DataFrame(real_dist, columns=indices)
df_forecast["is_real"] = "no"
df_real["is_real"] = "yes"
df = pd.concat([df_forecast, df_real])
sns.pairplot(df, plot_kws={'alpha': 0.1}, hue='is_real')

In [None]:
frechet_distance(mu_x=mu_forecast_feature,mu_y=mu_real_feature,sigma_x=sigma_forecast_feature,sigma_y=sigma_real_feature)