# Artwork sequence prediction

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import os

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  from ._conv import register_converters as _register_converters
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
BASE_PATH = '/root/work/artwork_sequence/train_test_configuration'

MULTI_CONFIG_PATH = os.path.join(BASE_PATH,'multivariate')
UNI_CONFIG_PATH = os.path.join(BASE_PATH,'univariate')
UNI_PREDICT_MULTI_CONFIG_PATH = os.path.join(BASE_PATH,'univariate_predict_multiple')
EMB_CONFIG_PATH = os.path.join(BASE_PATH,'word_embeddings')
CODE_EMB_CONFIG_PATH = os.path.join(BASE_PATH,'code_embedding')
ARTIST_CODE_EMB_CONFIG_PATH = os.path.join(BASE_PATH,'artist_code_embedding')


In [3]:
museum_sequence_path = {
    'x_train' : os.path.join(BASE_PATH, 'X_train.csv'),
    'x_test' : os.path.join(BASE_PATH, 'X_test.csv'),
    'x_tour' : os.path.join(BASE_PATH, 'style_tours.csv'),
    
    'x_style_tour_matrix' : os.path.join(BASE_PATH, 'style_tours_matrix.npy'),
    'x_train_matrix' : os.path.join(BASE_PATH, 'X_train_matrix.npy'),
    'x_test_matrix' : os.path.join(BASE_PATH, 'X_test_matrix.npy'),
    
    'embedding_train_matrix' : os.path.join(BASE_PATH, 'embedding_train_matrix.npy'),
    'embedding_test_matrix' : os.path.join(BASE_PATH, 'embedding_test_matrix.npy'),
    'embedding_style_tour_matrix' : os.path.join(BASE_PATH, 'embedding_style_tours_matrix.npy'),
    
    'artist_code_train_matrix' : os.path.join(BASE_PATH, 'artist_code_train_matrix.npy'),
    'artist_code_test_matrix' : os.path.join(BASE_PATH, 'artist_code_test_matrix.npy'),
    'artist_style_tour_matrix' : os.path.join(BASE_PATH, 'style_tours_artist_code_matrix.npy')
}
museum_sequence_path

{'artist_code_test_matrix': '/root/work/artwork_sequence/train_test_configuration/artist_code_test_matrix.npy',
 'artist_code_train_matrix': '/root/work/artwork_sequence/train_test_configuration/artist_code_train_matrix.npy',
 'artist_style_tour_matrix': '/root/work/artwork_sequence/train_test_configuration/style_tours_artist_code_matrix.npy',
 'embedding_style_tour_matrix': '/root/work/artwork_sequence/train_test_configuration/embedding_style_tours_matrix.npy',
 'embedding_test_matrix': '/root/work/artwork_sequence/train_test_configuration/embedding_test_matrix.npy',
 'embedding_train_matrix': '/root/work/artwork_sequence/train_test_configuration/embedding_train_matrix.npy',
 'x_style_tour_matrix': '/root/work/artwork_sequence/train_test_configuration/style_tours_matrix.npy',
 'x_test': '/root/work/artwork_sequence/train_test_configuration/X_test.csv',
 'x_test_matrix': '/root/work/artwork_sequence/train_test_configuration/X_test_matrix.npy',
 'x_tour': '/root/work/artwork_sequence/tr

## Load data

In [4]:
df_x_train = pd.read_csv(museum_sequence_path['x_train'], index_col=0)
df_x_test = pd.read_csv(museum_sequence_path['x_test'], index_col=0)

df_style_tours = pd.read_csv(museum_sequence_path['x_tour'], index_col=0)
x_style_tour_matrix = np.load(museum_sequence_path['x_style_tour_matrix'])

x_train_matrix = np.load(museum_sequence_path['x_train_matrix'])
x_test_matrix = np.load(museum_sequence_path['x_test_matrix'])

embedding_train_matrix = np.load(museum_sequence_path['embedding_train_matrix'])
embedding_test_matrix = np.load(museum_sequence_path['embedding_test_matrix'])
embedding_style_tour_matrix = np.load(museum_sequence_path['embedding_style_tour_matrix'])

artist_code_train_matrix = np.load(museum_sequence_path['artist_code_train_matrix'])
artist_code_test_matrix = np.load(museum_sequence_path['artist_code_test_matrix'])
artist_style_tour_matrix = np.load(museum_sequence_path['artist_style_tour_matrix'])

**Combine code data with embedding data**

In [5]:
x_embedding_train_matrix = np.hstack((x_train_matrix, embedding_train_matrix))
x_embedding_test_matrix = np.hstack((x_test_matrix, embedding_test_matrix))
x_embedding_style_tour_matrix = np.hstack((x_style_tour_matrix, embedding_style_tour_matrix))

**Reduce artist code matrix**

In [6]:
artist_code_train_matrix = np.mean(artist_code_train_matrix, axis=1)
artist_code_test_matrix = np.mean(artist_code_test_matrix, axis=1)
artist_style_tour_matrix = np.mean(artist_style_tour_matrix, axis=1)

In [7]:
x_artist_embedding_train_matrix = np.hstack((x_embedding_train_matrix, artist_code_train_matrix.reshape((-1, 1))))
x_artist_embedding_test_matrix = np.hstack((x_embedding_test_matrix, artist_code_test_matrix.reshape((-1, 1))))
x_artist_embedding_style_matrix = np.hstack((x_embedding_style_tour_matrix, artist_style_tour_matrix.reshape((-1, 1))))

## Reset Tensorflow session

In [8]:
tf.keras.backend.clear_session()

## Config data to fit with the model input

Because the **Prediction feature model** split the data into training and validation dataset, it is necessary to give all the data in only one block

**Define timeline**

In [9]:
time = np.arange(x_train_matrix.shape[0] + x_test_matrix.shape[0])
time.shape

(859,)

**Define configuration to deal with the windowed dataset**

In [10]:
def get_tours_conf():
    
    split_time = x_train_matrix.shape[0]
    #code matrix
    X = np.concatenate((x_train_matrix, x_test_matrix))
    #embedding matrix
    X_embeddings = np.concatenate((embedding_train_matrix, embedding_test_matrix))
    #code and embedding matrix
    X_code_embeddings = np.concatenate((x_embedding_train_matrix, x_embedding_test_matrix))
    #code, embedding and artist matrix
    X_artist_code_embeddings = np.concatenate((x_artist_embedding_train_matrix, x_artist_embedding_test_matrix))
    
    return (split_time, X, X_embeddings, X_code_embeddings, X_artist_code_embeddings)

In [11]:
def get_style_tour_conf():
    
    split_time = 20000
    #code matrix
    X = x_style_tour_matrix
    #embedding matrix
    X_embeddings = embedding_style_tour_matrix
    #code and embedding matrix
    X_code_embeddings = x_embedding_style_tour_matrix
    
    #code, embedding and artist matrix
    X_artist_code_embeddings = x_artist_embedding_style_matrix
    
    return (split_time, X, X_embeddings, X_code_embeddings, X_artist_code_embeddings)

In [12]:
#split_time, X, X_embeddings, X_code_embeddings, X_artist_code_embeddings = get_tours_conf()

split_time, X, X_embeddings, X_code_embeddings, X_artist_code_embeddings = get_style_tour_conf()

#Number of feature to take into account
n_influence_features=10

batch_size = 128
shuffle_buffer_size = 300

In [13]:
X.shape

(27000, 300)

In [14]:
X_embeddings.shape

(27000, 100)

In [15]:
X_artist_code_embeddings.shape

(27000, 401)

## Create models

**Define model**

In [16]:
from Sequence_prediction_factory import Sequence_prediction_multivariate, Sequence_prediction_univariate, Sequence_prediction_embeddings

In [17]:
model_univariate = Sequence_prediction_univariate(
    X=X, 
    shuffle_buffer_size=shuffle_buffer_size, 
    split_time=split_time, 
    train_batch_size=batch_size, 
    val_batch_size=batch_size,
    CONFIG_PATH=UNI_PREDICT_MULTI_CONFIG_PATH)

In [18]:
model_multivariate = Sequence_prediction_multivariate(
    X=X, 
    shuffle_buffer_size=shuffle_buffer_size, 
    split_time=split_time, 
    train_batch_size=batch_size, 
    val_batch_size=batch_size,  
    n_influence_features=n_influence_features,
    CONFIG_PATH=MULTI_CONFIG_PATH)

In [19]:
model_embeddings = Sequence_prediction_embeddings(
    X=X, 
    shuffle_buffer_size=shuffle_buffer_size, 
    split_time=split_time, 
    train_batch_size=batch_size, 
    val_batch_size=batch_size,  
    X_embeddings=X_embeddings,
    CONFIG_PATH=EMB_CONFIG_PATH)

In [20]:
model_code_embedding = Sequence_prediction_univariate(
    X=X_code_embeddings, 
    shuffle_buffer_size=shuffle_buffer_size, 
    split_time=split_time, 
    train_batch_size=batch_size, 
    val_batch_size=batch_size,
    CONFIG_PATH=CODE_EMB_CONFIG_PATH)

In [21]:
model_artist_code_embedding = Sequence_prediction_univariate(
    X=X_artist_code_embeddings, 
    shuffle_buffer_size=shuffle_buffer_size, 
    split_time=split_time, 
    train_batch_size=batch_size, 
    val_batch_size=batch_size,
    CONFIG_PATH=UNI_PREDICT_MULTI_CONFIG_PATH)

In [22]:
model_prediction = model_artist_code_embedding

In [None]:
from IPython.display import clear_output
import time

default_index = 0
#length of the history
for window_size in range(3,4):
    
    #Define model with the window size
    model_prediction.set_window_size(window_size)
    tf.keras.backend.clear_session()
    model = model_prediction.get_model()
    model.define_model(conv_filter=20, lstm_filter=40, dense_filter=20, prediction_length=15)
    model.set_index(default_index)
    model.save_weights(model_prediction.get_untrained_weights_path())

    start_time = time.time()
    
    for i in range(model_prediction.get_x_features()):
        clear_output(wait=True)
        print("---------- Feature %s -------------" % (i))
        
        #Load untrained weights for next training
        model.set_index(default_index)
        model.load_weights(model_prediction.get_untrained_weights_path())
        
        #Train model with the feature i
        model.set_index(i)
        model.train_model(epochs=8, lr=1e-6)

        #Save weights
        model.save_weights(model_prediction.get_trained_weights_path())
        

    print("--- %s seconds ---" % (time.time() - start_time))

---------- Feature 48 -------------
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8