# MAE evaluation

In [None]:
import numpy as np
import pandas as pd
import os
from sklearn.metrics import mean_absolute_error
import gc

In [None]:
BASE_PATH = '/root/work/datasets/artwork_sequence/'
CONFIG_PATH = '/root/work/artwork_sequence/train_test_configuration'
RESULT_PATH = '/root/work/artwork_sequence/predicted_tours'

## Load data

**Load all metadata**

In [None]:
df_all_metadata = pd.read_csv(os.path.join(BASE_PATH, 'all_metadata.csv'))
all_data_matrix = np.load(os.path.join(BASE_PATH, 'all_code_matrix.npy' ))

In [None]:
print(df_all_metadata.shape)
print(all_data_matrix.shape)

**Load tours**

In [None]:
window_index = 5

museum_sequence_path = {
    'x_train' : os.path.join(CONFIG_PATH, 'X_train.csv'),
    'x_test' : os.path.join(CONFIG_PATH, 'X_test.csv'),
    'x_train_matrix' : os.path.join(CONFIG_PATH, 'X_train_matrix.npy'),
    'x_test_matrix' : os.path.join(CONFIG_PATH, 'X_test_matrix.npy'),
    'weights_folder' : os.path.join(CONFIG_PATH, 'config_'+str(window_index)+'/trained_model_weights'),
    'results_folder' : os.path.join(RESULT_PATH,'config_'+str(window_index))
}
museum_sequence_path

In [None]:
df_x_train = pd.read_csv(museum_sequence_path['x_train'], index_col=0)
df_x_test = pd.read_csv(museum_sequence_path['x_test'], index_col=0)
x_train_matrix = np.load(museum_sequence_path['x_train_matrix'])
x_test_matrix = np.load(museum_sequence_path['x_test_matrix'])
df_x_train.head()

In [None]:
tour_list = df_x_test['tour_path'].values

## Load model

In [None]:
from Sequence_generator_based_previous_most_similar import Sequence_generator_based_previous_most_similar
from Sequence_geneartion_rnn import Sequence_generator_rnn
from Generator_model_factory import Generator_model_factory, Generator_model_most_similar, Generator_model_rnn

## Compute MAE

**Define window size to work**

In [None]:
window_size = window_index

**Define models**

In [None]:
gen_model_most_sim = Generator_model_most_similar( 
        all_data_matrix=all_data_matrix, 
        df_all_metadata=df_all_metadata, 
        window_size=window_size)

In [None]:
split_time = x_train_matrix.shape[0]
gen_model_rnn = Generator_model_rnn(
    X=x_test_matrix, 
    all_data_matrix=all_data_matrix, 
    batch_size=128, 
    df_all_metadata=df_all_metadata, 
    museum_sequence_path=museum_sequence_path, 
    shuffle_buffer_size=300, 
    split_time=split_time, 
    window_size=window_size)

In [None]:
gen_models = [gen_model_most_sim, gen_model_rnn]


**Define MAE dataframe**

In [None]:
def get_mae_dataframe():

    df_mae = pd.DataFrame({ 'model' : [],
                             'tour' : [],
                             'mae' : []})
    return df_mae

In [None]:
#iterate over all model with the same window

for gen_model in gen_models:
    #index to get current tour
    index = 0
    
    #Get sequence generator model
    model = gen_model.get_model()
    
    #Dataframe to save mae
    df_mae = get_mae_dataframe()
    for tour in tour_list:
        #Get tour
        images_path = os.path.join(tour, 'images')
        tour_length = len(os.listdir(images_path))
        X_tour = x_test_matrix[index: index + tour_length]

        metadata_path = os.path.join(tour, 'metadata.csv')
        df_X_tour = pd.read_csv(metadata_path)

        index += tour_length 

        #Define model
        model.set_tour(X_tour=X_tour, df_X_tour=df_X_tour)

        df_predicted_tour = model.predict_tour()

        #Compute MAE
        forecast_matrix = model.get_predicted_tour_matrix()
        #In case the window size was bigger than the tour size
        if forecast_matrix.shape[0] != 0:
            mae = np.mean(mean_absolute_error( X_tour[window_size:,:].T, forecast_matrix.T, multioutput='raw_values' ))

            #Save tour
            tour_name = tour.split('/')[-1]
            target_path = os.path.join(museum_sequence_path['results_folder'], gen_model.__str__())
            df_predicted_tour.to_csv(os.path.join(target_path, 'predicted_' + tour_name + '.csv'), index=False)

            #Save MAE
            df_mae = df_mae.append(
                {'model' : model.get_name(),
                 'tour': tour_name,
                 'mae': mae
                }, 
               ignore_index=True)
        
    target_path = os.path.join(museum_sequence_path['results_folder'], gen_model.__str__())
    df_mae.to_csv(os.path.join(target_path, 'mae.csv'), index=False)

    #Clear memory
    model.del_data()
    del model
    del df_mae
    gc.collect()

In [None]:
df_mae['mae'].mean()