# MAE evaluation

In [None]:
import numpy as np
import pandas as pd
import os
from sklearn.metrics import mean_absolute_error
import gc
from multivariate_fretech_distance import compute_fretech_distance

In [None]:
BASE_PATH = '/root/work/datasets/artwork_sequence/'

CONFIG_BASE_PATH = '/root/work/artwork_sequence/train_test_configuration'

MULTI_CONFIG_PATH = os.path.join(CONFIG_BASE_PATH,'multivariate')
UNI_CONFIG_PATH = os.path.join(CONFIG_BASE_PATH,'univariate')
UNI_PREDICT_MULTI_CONFIG_PATH = os.path.join(CONFIG_BASE_PATH,'univariate_predict_multiple')
EMB_CONFIG_PATH = os.path.join(CONFIG_BASE_PATH,'word_embeddings')
CODE_EMB_CONFIG_PATH = os.path.join(CONFIG_BASE_PATH,'code_embedding')
ARTIST_CODE_EMB_CONFIG_PATH = os.path.join(CONFIG_BASE_PATH,'artist_code_embedding')

RESULT_PATH = '/root/work/artwork_sequence/predicted_tours'

In [None]:
museum_sequence_path = {
    'x_train' : os.path.join(CONFIG_BASE_PATH, 'X_train.csv'),
    'x_test' : os.path.join(CONFIG_BASE_PATH, 'X_test.csv'),
    'x_train_matrix' : os.path.join(CONFIG_BASE_PATH, 'X_train_matrix.npy'),
    'x_test_matrix' : os.path.join(CONFIG_BASE_PATH, 'X_test_matrix.npy'),
    'embedding_train_matrix' : os.path.join(CONFIG_BASE_PATH, 'embedding_train_matrix.npy'),
    'embedding_test_matrix' : os.path.join(CONFIG_BASE_PATH, 'embedding_test_matrix.npy'),
    'artist_code_train_matrix' : os.path.join(CONFIG_BASE_PATH, 'artist_code_train_matrix.npy'),
    'artist_code_test_matrix' : os.path.join(CONFIG_BASE_PATH, 'artist_code_test_matrix.npy'),
    'all_metadata' : os.path.join(BASE_PATH, 'all_metadata.csv'),
    'all_data_matrix' : os.path.join(BASE_PATH, 'all_code_matrix.npy' ),
    'all_embedding_matrix' : os.path.join(BASE_PATH, 'all_embedding_matrix.npy' ),
    'all_artists' : os.path.join(CONFIG_BASE_PATH, 'all_artists.csv'),
    'all_artists_code_matrix' : os.path.join(CONFIG_BASE_PATH, 'all_artists_code_matrix.npy')
}
museum_sequence_path

## Load data

**Load all artwork's data**

In [None]:
df_all_metadata = pd.read_csv(museum_sequence_path['all_metadata'])
all_data_matrix = np.load(museum_sequence_path['all_data_matrix'])
all_embedding_matrix = np.load(museum_sequence_path['all_embedding_matrix'])

df_all_artists = pd.read_csv(museum_sequence_path['all_artists'])
all_artists_code_matrix = np.load(museum_sequence_path['all_artists_code_matrix'])

In [None]:
print(df_all_metadata.shape)
print(all_data_matrix.shape)
print(all_embedding_matrix.shape)
print(df_all_artists.shape)
print(all_artists_code_matrix.shape)

**Organize artist_code_matrix to have the all_metadata shape**

In [None]:
def get_artist_code(author, df_all_artists, all_artists_code_matrix):
    index = df_all_artists[df_all_artists['author'] == author].index[0]
    return all_artists_code_matrix[index, :]

In [None]:
df_all_metadata['artist code'] = df_all_metadata['author'].apply(get_artist_code, args=(df_all_artists, all_artists_code_matrix))
all_artists_code_list = list(df_all_metadata['artist code'].values)
all_artists_code_matrix = np.vstack(all_artists_code_list)
all_artists_code_matrix.shape

**Load tours dataset**

In [None]:
df_x_train = pd.read_csv(museum_sequence_path['x_train'], index_col=0)
df_x_test = pd.read_csv(museum_sequence_path['x_test'], index_col=0)

x_train_matrix = np.load(museum_sequence_path['x_train_matrix'])
x_test_matrix = np.load(museum_sequence_path['x_test_matrix'])

embedding_train_matrix = np.load(museum_sequence_path['embedding_train_matrix'])
embedding_test_matrix = np.load(museum_sequence_path['embedding_test_matrix'])

artist_code_train_matrix = np.load(museum_sequence_path['artist_code_train_matrix'])
artist_code_test_matrix = np.load(museum_sequence_path['artist_code_test_matrix'])

In [None]:
print(df_x_train.shape)
print(df_x_test.shape)

print(x_train_matrix.shape)
print(x_test_matrix.shape)

print(embedding_train_matrix.shape)
print(embedding_test_matrix.shape)

print(artist_code_train_matrix.shape)
print(artist_code_test_matrix.shape)

In [None]:
tour_list = df_x_test['tour_path'].values

## Load model

In [None]:
from Generator_model_factory import Generator_model_factory, Generator_model_most_similar, Generator_model_rnn, Generator_model_rnn_multivariate, Generator_model_rnn_embedding, Generator_model_rnn_code_embeding

## Compute MAE

**Define window size to work**

In [None]:
window_size = 3

**Define models**

In [None]:
gen_model_most_sim = Generator_model_most_similar(
        X=x_test_matrix,
        all_data_matrix=all_data_matrix, 
        df_all_metadata=df_all_metadata, 
        window_size=window_size)

In [None]:
split_time = x_train_matrix.shape[0]
gen_model_rnn = Generator_model_rnn(
    X=x_test_matrix, 
    all_data_matrix=all_data_matrix, 
    batch_size=128, 
    df_all_metadata=df_all_metadata, 
    CONFIG_PATH=UNI_CONFIG_PATH, 
    shuffle_buffer_size=300, 
    split_time=split_time, 
    window_size=window_size,
    conv_filter=20, 
    lstm_filter=40, 
    dense_filter=16, 
    prediction_length=1)

In [None]:
gen_model_rnn_multi = Generator_model_rnn_multivariate(
    X=x_test_matrix,
    all_data_matrix=all_data_matrix,
    batch_size=128,
    df_all_metadata=df_all_metadata,
    CONFIG_PATH=MULTI_CONFIG_PATH,
    shuffle_buffer_size=300,
    split_time=split_time, window_size=window_size,
    n_influence_features=10)

In [None]:
gen_model_rnn_embedding = Generator_model_rnn_embedding(
    X=x_test_matrix,
    all_data_matrix=all_data_matrix,
    batch_size=128,
    df_all_metadata=df_all_metadata,
    CONFIG_PATH=EMB_CONFIG_PATH,
    shuffle_buffer_size=300,
    split_time=split_time, window_size=window_size,
    X_embedding=embedding_test_matrix,
    conv_filter=20, 
    lstm_filter=40, 
    dense_filter=20, 
    prediction_length=1)

In [None]:
all_data_code_embedding_matrix = np.hstack((all_data_matrix, all_embedding_matrix))
x_embedding_test_matrix = np.hstack((x_test_matrix, embedding_test_matrix))

gen_model_rnn_code_embedding = Generator_model_rnn(
    X=x_embedding_test_matrix, 
    all_data_matrix=all_data_code_embedding_matrix, 
    batch_size=128, 
    df_all_metadata=df_all_metadata, 
    CONFIG_PATH=CODE_EMB_CONFIG_PATH, 
    shuffle_buffer_size=300, 
    split_time=split_time, 
    window_size=window_size,
    conv_filter=20, 
    lstm_filter=40, 
    dense_filter=20, 
    prediction_length=1,
    name= 'generated_sequence_rnn_code_embedding')

In [None]:
artist_code_test_matrix = np.mean(artist_code_test_matrix, axis=1)
x_artist_code_embedding_test_matrix = np.hstack((x_embedding_test_matrix, artist_code_test_matrix.reshape((-1, 1))))


all_artists_code_mean_matrix = np.mean(all_artists_code_matrix, axis=1)
all_data_artist_code_embedding_matrix = np.hstack((all_data_code_embedding_matrix, all_artists_code_mean_matrix.reshape((-1, 1))))

gen_model_rnn_artist_code_embedding = Generator_model_rnn(
    X=x_artist_code_embedding_test_matrix, 
    all_data_matrix=all_data_artist_code_embedding_matrix, 
    batch_size=128, 
    df_all_metadata=df_all_metadata, 
    CONFIG_PATH=ARTIST_CODE_EMB_CONFIG_PATH, 
    shuffle_buffer_size=300, 
    split_time=split_time, 
    window_size=window_size,
    conv_filter=20, 
    lstm_filter=40, 
    dense_filter=20, 
    prediction_length=1,
    name= 'generated_sequence_rnn_artist_code_embedding')

In [None]:
#gen_models = [gen_model_most_sim, gen_model_rnn, gen_model_rnn_multi]
gen_models = [gen_model_most_sim, gen_model_rnn, gen_model_rnn_code_embedding, gen_model_rnn_artist_code_embedding]
#gen_models = [gen_model_rnn_artist_code_embedding]

**Define MAE dataframe**

In [None]:
def get_mae_dataframe():

    df_mae = pd.DataFrame({ 'model' : [],
                             'tour' : [],
                             'mae' : [],
                             'fretech_dist':[]})
    return df_mae

**Define Result path**

In [None]:
def get_result_path(window_size):
    return os.path.join(RESULT_PATH,'config_'+str(window_size))
    

In [None]:
#iterate over all model with the same window

for gen_model in gen_models:
    #index to get current tour
    index = 0
    
    #Get sequence generator model
    model = gen_model.get_model()
    
    #Dataframe to save mae
    df_mae = get_mae_dataframe()
    
    #Define path to save the results
    target_path = os.path.join(get_result_path(window_size), gen_model.__str__())
    if not os.path.exists(target_path):
        os.makedirs(target_path)
            
    for tour in tour_list:
        #Get tour
        images_path = os.path.join(tour, 'images')
        tour_length = len(os.listdir(images_path))
        #Get code for artworks tour
        X_tour = gen_model.get_tour_data(index, tour_length)
        #Get embedding for artworks tour
        X_embedding_tour = embedding_test_matrix[index: index + tour_length]

        metadata_path = os.path.join(tour, 'metadata.csv')
        df_X_tour = pd.read_csv(metadata_path)

        index += tour_length 

        #Define model
        model.set_tour(X_tour=X_tour, df_X_tour=df_X_tour, X_embedding_tour= X_embedding_tour)

        df_predicted_tour = model.predict_tour()
        

        #Compute MAE
        forecast_matrix = model.get_predicted_tour_matrix()
        X_true = model.get_tour()
        #In case the window size was bigger than the tour size
        if forecast_matrix.shape[0] != 0:
            print(forecast_matrix.T.shape)
            mae = np.mean(mean_absolute_error( X_true[window_size:,:].T, forecast_matrix.T, multioutput='raw_values' ))
            
            fretech_dist = compute_fretech_distance(forecast_matrix, X_true[window_size:,:])

            #Save tour
            tour_name = tour.split('/')[-1]
            df_predicted_tour.to_csv(os.path.join(target_path, 'predicted_' + tour_name + '.csv'), index=False)

            #Save MAE
            df_mae = df_mae.append(
                {'model' : model.get_name(),
                 'tour': tour_name,
                 'mae': mae,
                 'fretech_dist' : fretech_dist
                }, 
               ignore_index=True)
        break
    break
    df_mae.to_csv(os.path.join(target_path, 'mae.csv'), index=False)

    #Clear memory
    model.del_data()
    del model
    del df_mae
    #gc.collect()

In [None]:
#df_mae['mae'].mean()