# Curatornet MAE Evaluation

In [None]:
import numpy as np
import pandas as pd
import os
import tensorflow as tf
from sklearn.metrics import mean_absolute_error
import gc

In [None]:
BASE_PATH = '/root/work/datasets/artwork_sequence/'
CONFIG_BASE_PATH = '/root/work/artwork_sequence/train_test_configuration'
DATASET_PATH = '/root/work/datasets/'

CURATORNET_CONFIG_PATH = os.path.join(CONFIG_BASE_PATH,'curatornet')
CURATORNET_SEQUENCE_CONFIG_PATH = os.path.join(CONFIG_BASE_PATH,'curatornet_sequence')

CURATORNET_DATA_PATH = os.path.join(DATASET_PATH,'curatornet_data')

RESULT_PATH = '/root/work/artwork_sequence/predicted_tours'

In [None]:
museum_sequence_path = {
    'curatornet_code_train_matrix' : os.path.join(CONFIG_BASE_PATH, 'curatornet_code_train_matrix.npy'),
    'curatornet_code_test_matrix' : os.path.join(CONFIG_BASE_PATH, 'curatornet_code_test_matrix.npy'),
    
    'curatornet_sequence_train_matrix' : os.path.join(CONFIG_BASE_PATH, 'curatornet_sequence_train_matrix.npy'),
    'curatornet_sequence_test_matrix' : os.path.join(CONFIG_BASE_PATH, 'curatornet_sequence_test_matrix.npy'),
    
    'curatornet_purchase_data_train' : os.path.join(CURATORNET_DATA_PATH, 'curatornet_purchase_data_train.csv'),
    'curatornet_purchase_data_test' : os.path.join(CURATORNET_DATA_PATH, 'curatornet_purchase_data_test.csv' ),
    
    'curatornet_sequence_purchase_data_train' : os.path.join(CURATORNET_DATA_PATH, 'curatornet_sequence_purchase_data_train.csv'),
    'curatornet_sequence_purchase_data_test' : os.path.join(CURATORNET_DATA_PATH, 'curatornet_sequence_purchase_data_test.csv' )
}
museum_sequence_path

In [None]:
def get_trained_weights_path(CONFIG_PATH, window_size):
    trained_weights_path = {
            'weights_folder' : os.path.join(CONFIG_PATH, 'config_'+str(window_size)+'/trained_model_weights')
        }

    return trained_weights_path


## Load data

In [None]:
df_curatornet_purchase_data_train = pd.read_csv(museum_sequence_path['curatornet_purchase_data_train'])
df_curatornet_purchase_data_test = pd.read_csv(museum_sequence_path['curatornet_purchase_data_test'])

df_curatornet_sequence_data_train = pd.read_csv(museum_sequence_path['curatornet_sequence_purchase_data_train'])
df_curatornet_sequence_data_test = pd.read_csv(museum_sequence_path['curatornet_sequence_purchase_data_test'])

curatornet_code_train_matrix = np.load(museum_sequence_path['curatornet_code_train_matrix'])
curatornet_code_test_matrix = np.load(museum_sequence_path['curatornet_code_test_matrix'])

curatornet_sequence_train_matrix = np.load(museum_sequence_path['curatornet_sequence_train_matrix'])
curatornet_sequence_test_matrix = np.load(museum_sequence_path['curatornet_sequence_test_matrix'])


In [None]:
print(df_curatornet_purchase_data_train.shape)
print(df_curatornet_purchase_data_test.shape)

print(df_curatornet_sequence_data_train.shape)
print(df_curatornet_sequence_data_test.shape)

print(curatornet_code_train_matrix.shape)
print(curatornet_code_test_matrix.shape)

print(curatornet_sequence_train_matrix.shape)
print(curatornet_sequence_test_matrix.shape)

**Define configurations**

In [None]:
curatornet_user_purchases_data = {
    'train_medatada': df_curatornet_purchase_data_train,
    'test_medatada': df_curatornet_purchase_data_test,
    
    'matrix_train': curatornet_code_train_matrix,
    'matrix_test' : curatornet_code_test_matrix
}

curatornet_sequence_data = {
    
    'train_medatada': df_curatornet_sequence_data_train,
    'test_medatada': df_curatornet_sequence_data_test,
    
    'matrix_train': curatornet_sequence_train_matrix,
    'matrix_test' : curatornet_sequence_test_matrix
}

In [None]:
curator_config = curatornet_sequence_data

## Load model

In [None]:
from Sequence_prediction_factory import Sequence_prediction_multivariate, Sequence_prediction_univariate

## Compute MAE

In [None]:
code_train_matrix = curator_config['matrix_train']
code_test_matrix = curator_config['matrix_test']

df_train = curator_config['train_medatada']
df_test = curator_config['test_medatada']

In [None]:
all_data_matrix = np.concatenate([code_train_matrix, code_test_matrix])
all_data_matrix.shape

In [None]:
all_metadata = pd.concat([df_train, df_test], ignore_index=True)
all_metadata.shape

**Define window size**

In [None]:
window_size = 3

split_time = curatornet_code_train_matrix.shape[0]

X = curatornet_code_test_matrix

n_features = X.shape[1]

#Number of artwork's feature
n_features = X.shape[1]

batch_size = 128
shuffle_buffer_size = 300

In [None]:
split_time = curatornet_code_train_matrix.shape[0]
model_prediction =Sequence_prediction_univariate(
    X=X, 
    shuffle_buffer_size=shuffle_buffer_size, 
    split_time=split_time, 
    train_batch_size=batch_size, 
    val_batch_size=batch_size,
    CONFIG_PATH=CURATORNET_CONFIG_PATH)

In [None]:
#Get and define the RNN model 
model_prediction.set_window_size(window_size)
model = model_prediction.get_model()
model.define_model(conv_filter=20, lstm_filter=40, dense_filter=20, prediction_length=1)
model.get_model().summary()

**Look for previous user's purchases**

In [None]:
def get_user_purchases_matrix(user_id, window_size, df, code_matrix):
    
    #Look for user's purchases
    df_user_purchases = df[df['user_id_hash'] == user_id]
    
    previous_purchases = len(df_user_purchases)
    
    #More purchases than window size
    if previous_purchases > window_size:
        df_user_purchases = df_user_purchases.iloc[previous_purchases - window_size:]
    
    user_purchase_matrix = code_matrix[list(df_user_purchases.index),:]
    
    #Less purchases than window size
    while user_purchase_matrix.shape[0] < window_size:
        user_purchase_matrix = np.vstack([user_purchase_matrix, user_purchase_matrix[-1,:]])
    
    return user_purchase_matrix
    

### Predict tour

**Helper functions**

In [None]:
def model_forecast(model, series, window_size, batch_size):
    if len(series.shape) == 1:
            series = tf.expand_dims(series, axis=-1)
    ds = tf.data.Dataset.from_tensor_slices(series)
    ds = ds.window(window_size, shift=1, drop_remainder=True)
    ds = ds.flat_map(lambda w: w.batch(window_size))
    ds = ds.map(lambda w: (w[:]))
    ds = ds.batch(batch_size)
    forecast = model.predict(ds)
    return forecast

In [None]:
def get_dataframe_evaluation():
    return pd.DataFrame(
        {
            'feature' : [],
            'forecast': [],
            'x_valid':[],
            'mae':[]
        }
    )

In [None]:
def get_user_prediction(model_prediction, X, X_valid, window_size, batch_size, weights_path):
    
    #Get dataframe to save prediction
    df_evaluation = get_dataframe_evaluation()
    
    model = model_prediction.get_model()
    model.define_model(conv_filter=20, lstm_filter=40, dense_filter=20, prediction_length=1)

    for feature in range(n_features):

        #Load weights for feature i
        model.set_index(feature)
        model.load_weights(get_trained_weights_path(weights_path, window_size))

        #Define feature to take into account for prediction
        x_influence_features = model.get_indexes_features()
        x_influence_features = np.insert(arr=x_influence_features, obj=0, values=int(feature))
        x_feature = X[:,x_influence_features.astype(int)]

        #Predict feature i
        rnn_forecast = model_forecast(model.get_model(), x_feature, window_size, batch_size)
        #print(rnn_forecast)
        #Get validation dataset 
        x_valid = X_valid[0, feature]

        #Compute MAE
        mae = tf.keras.metrics.mean_absolute_error(x_valid, rnn_forecast).numpy().mean()

        df_evaluation = df_evaluation.append({
            'feature' : feature,
            'forecast': rnn_forecast[0][0],
            'x_valid':x_valid,
            'mae':mae
                       }, 
                       ignore_index=True)

    return df_evaluation
    

In [None]:
user_purchases_prediction_dict = {}

for index, row in df_curatornet_purchase_data_test.head().iterrows():
    user_purchases_matrix = get_user_purchases_matrix(row['user_id_hash'], window_size, df_curatornet_purchase_data_test, curatornet_code_test_matrix)
    
    #Set seed
    X = user_purchases_matrix
    #Set x_valid
    X_valid = code_test_matrix[index, :].reshape((1, -1))
    
    
    #Compute evaluation
    df_evaluation = get_user_prediction(model_prediction, X, X_valid, window_size, batch_size, CURATORNET_CONFIG_PATH)
    
    user_purchases_prediction_dict[row['user_id_hash']] = df_evaluation

In [None]:
from sklearn.metrics.pairwise import cosine_similarity, cosine_distances,euclidean_distances

for user_id, df_group in df_test.groupby('user_id_hash'):
    print(user_id)
    indexes = list(df_group.index)
    #Set seed
    X = code_test_matrix[indexes[-4:-1]]
    print(X.shape)
    X_valid = code_test_matrix[indexes[-1], :].reshape((1, -1))
    
    #Compute evaluation
    df_evaluation = get_user_prediction(model_prediction, X, X_valid, window_size, batch_size, CURATORNET_SEQUENCE_CONFIG_PATH)
    
    forescast = df_evaluation['forecast']
    feature_list = list(forescast.values)
    forecast_matrix = np.stack(feature_list)
    forecast_matrix = forecast_matrix.T
    
    sim_matrix = cosine_similarity(forecast_matrix.reshape((1, -1)), X_valid.reshape((1, -1)))
    print(sim_matrix)
    
    break

## Find Most similar

In [None]:
df_evaluation['mae'].mean()

**Reconstruct predicted code**

In [None]:
forescast = df_evaluation['forecast']
forescast.head()

In [None]:
feature_list = list(forescast.values)
forecast_matrix = np.stack(feature_list)
forecast_matrix = forecast_matrix.T
forecast_matrix.shape

**Compute cosine similarity**

In [None]:
from sklearn.metrics.pairwise import cosine_similarity, cosine_distances,euclidean_distances

In [None]:
#Compute cosine similarity
sim_matrix = cosine_similarity(forecast_matrix.reshape((1, -1)), curatornet_code_test_matrix)

In [None]:
sort_index = np.argsort(sim_matrix.reshape((-1,)))
sort_index[-20:]

In [None]:
sim_matrix[0][0]

In [None]:
sim_matrix[0][sort_index[-1]]