# MAE evaluation

In [1]:
import numpy as np
import pandas as pd
import os
from sklearn.metrics import mean_absolute_error

In [2]:
BASE_PATH = '/root/work/datasets/artwork_sequence/'
CONFIG_PATH = '/root/work/artwork_sequence/train_test_configuration'
RESULT_PATH = '/root/work/artwork_sequence/predicted_tours/generated_sequence_based_previous_most_similar'
#RESULT_PATH = '/root/work/artwork_sequence/predicted_tours/generated_sequence_rnn'

## Load data

**Load all metadata**

In [3]:
df_all_metadata = pd.read_csv(os.path.join(BASE_PATH, 'all_metadata.csv'))
all_data_matrix = np.load(os.path.join(BASE_PATH, 'all_code_matrix.npy' ))

In [4]:
print(df_all_metadata.shape)
print(all_data_matrix.shape)

(633, 6)
(633, 300)


**Load tours**

In [5]:
window_index = 3

museum_sequence_path = {
    'x_train' : os.path.join(CONFIG_PATH, 'X_train.csv'),
    'x_test' : os.path.join(CONFIG_PATH, 'X_test.csv'),
    'x_train_matrix' : os.path.join(CONFIG_PATH, 'X_train_matrix.npy'),
    'x_test_matrix' : os.path.join(CONFIG_PATH, 'X_test_matrix.npy'),
    'weights_folder' : os.path.join(CONFIG_PATH, 'config_'+str(window_index)+'/trained_model_weights')
}
museum_sequence_path

{'weights_folder': '/root/work/artwork_sequence/train_test_configuration/config_3/trained_model_weights',
 'x_test': '/root/work/artwork_sequence/train_test_configuration/X_test.csv',
 'x_test_matrix': '/root/work/artwork_sequence/train_test_configuration/X_test_matrix.npy',
 'x_train': '/root/work/artwork_sequence/train_test_configuration/X_train.csv',
 'x_train_matrix': '/root/work/artwork_sequence/train_test_configuration/X_train_matrix.npy'}

In [6]:
df_x_train = pd.read_csv(museum_sequence_path['x_train'], index_col=0)
df_x_test = pd.read_csv(museum_sequence_path['x_test'], index_col=0)
x_train_matrix = np.load(museum_sequence_path['x_train_matrix'])
x_test_matrix = np.load(museum_sequence_path['x_test_matrix'])
df_x_train.head()

Unnamed: 0,tour_path
20,/root/work/datasets/artwork_sequence/rijksmuse...
7,/root/work/datasets/artwork_sequence/rijksmuse...
40,/root/work/datasets/artwork_sequence/prado_cra...
0,/root/work/datasets/artwork_sequence/rijksmuse...
23,/root/work/datasets/artwork_sequence/prado_cra...


In [7]:
tour_list = df_x_test['tour_path'].values

## Load model

In [9]:
from Sequence_generator_based_previous_most_similar import Sequence_generator_based_previous_most_similar
from Sequence_geneartion_rnn import Sequence_generator_rnn
from Generator_model_factory import Generator_model_factory, Generator_model_most_similar, Generator_model_rnn

## Compute MAE

**Define models**

In [None]:
gen_model_most_sim = Generator_model_most_similar( 
        all_data_matrix=all_data_matrix, 
        df_all_metadata=df_all_metadata, 
        window_size=3)

In [12]:
split_time = x_train_matrix.shape[0]
gen_model_rnn = Generator_model_rnn(
    X=x_test_matrix, 
    all_data_matrix=all_data_matrix, 
    batch_size=128, 
    df_all_metadata=df_all_metadata, 
    museum_sequence_path=museum_sequence_path, 
    shuffle_buffer_size=300, 
    split_time=split_time, 
    window_size=window_index)

In [None]:
gen_models = {
    'gen_model_most_sim' : gen_model_most_sim,
    'gen_model_rnn' : gen_model_rnn
}

In [10]:
df_mae = pd.DataFrame({ 'model' : [],
                         'tour' : [],
                         'mae' : []})
df_mae

Unnamed: 0,mae,model,tour


In [None]:
index = 0
window_size = 3
 

for tour in tour_list:
    #Get tour
    images_path = os.path.join(tour, 'images')
    tour_length = len(os.listdir(images_path))
    X_tour = x_test_matrix[index: index + tour_length]
    
    metadata_path = os.path.join(tour, 'metadata.csv')
    df_X_tour = pd.read_csv(metadata_path)
    
    index += tour_length 
    
    #Define model
    gen_model.set_tour(X_tour=X_tour, df_X_tour=df_X_tour)
    model = gen_model.get_model()
    
    df_predicted_tour = model.predict_tour()
    
    #Compute MAE
    forecast_matrix = model.get_predicted_tour_matrix()
    mae = np.mean(mean_absolute_error( X_tour[window_size:,:].T, forecast_matrix.T, multioutput='raw_values' ))
    
    #Save tour
    tour_name = tour.split('/')[-1]
    df_predicted_tour.to_csv(os.path.join(RESULT_PATH, 'predicted_' + tour_name + '.csv'), index=False)
    
    #Save MAE
    df_mae = df_mae.append(
        {'model' : model.get_name(),
         'tour': tour_name,
         'mae': mae
        }, 
       ignore_index=True)

df_mae



In [None]:
df_mae['mae'].mean()