# Artwork sequence prediction

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import os

In [None]:
BASE_PATH = '/root/work/artwork_sequence/kfold'

UNI_CONFIG_PATH = os.path.join(BASE_PATH,'univariate')

## Define K-folder path

In [None]:
def get_museum_sequence_path(base_path, k):
    return {
        'x_train' : os.path.join(BASE_PATH,'folder_'+ str(k) + '/X_train.csv'),
        'x_test' : os.path.join(BASE_PATH, 'folder_'+ str(k) + '/X_test.csv'),

        'x_train_matrix' : os.path.join(BASE_PATH, 'folder_'+ str(k) + '/X_train_matrix.npy'),
        'x_test_matrix' : os.path.join(BASE_PATH, 'folder_'+ str(k) + '/X_test_matrix.npy'),
    }


In [None]:
def get_weights_model_path(path, k):
    weights_folder = os.path.join(path,'folder_'+ str(k))
    if not os.path.exists(weights_folder):
        os.makedirs(weights_folder)
    return weights_folder

**Test function**

In [None]:
k=9

In [None]:
museum_sequence_path = get_museum_sequence_path(BASE_PATH, k)
museum_sequence_path

In [None]:
UNI_CONFIG_PATH = get_weights_model_path(UNI_CONFIG_PATH, k)

**Load data**

In [None]:
df_x_train = pd.read_csv(museum_sequence_path['x_train'], index_col=0)
df_x_test = pd.read_csv(museum_sequence_path['x_test'], index_col=0)

x_train_matrix = np.load(museum_sequence_path['x_train_matrix'])
x_test_matrix = np.load(museum_sequence_path['x_test_matrix'])


## Reset Tensorflow session

In [None]:
tf.keras.backend.clear_session()

## Config data to fit with the model input

Because the **Prediction feature model** split the data into training and validation dataset, it is necessary to give all the data in only one block

**Define timeline**

In [None]:
time = np.arange(x_train_matrix.shape[0] + x_test_matrix.shape[0])
time.shape

**Define configuration to deal with the windowed dataset**

In [None]:
def get_tours_conf():
    
    split_time = x_train_matrix.shape[0]
    #code matrix
    X = np.concatenate((x_train_matrix, x_test_matrix))
    #embedding matrix
    
    return (split_time, X)

In [None]:
split_time, X = get_tours_conf()

#Number of feature to take into account
n_influence_features=10

batch_size = 128
shuffle_buffer_size = 300

In [None]:
X.shape

## Create models

**Define model**

In [None]:
from Sequence_prediction_factory import Sequence_prediction_multivariate, Sequence_prediction_univariate, Sequence_prediction_embeddings

In [None]:
model_univariate = Sequence_prediction_univariate(
    X=X, 
    shuffle_buffer_size=shuffle_buffer_size, 
    split_time=split_time, 
    train_batch_size=batch_size, 
    val_batch_size=batch_size,
    CONFIG_PATH=UNI_CONFIG_PATH)

In [None]:
model_prediction = model_univariate

In [None]:
from IPython.display import clear_output
import time

default_index = 0
#length of the history
for window_size in range(4,5):
    
    #Define model with the window size
    model_prediction.set_window_size(window_size)
    tf.keras.backend.clear_session()
    model = model_prediction.get_model()
    model.define_model(conv_filter=20, lstm_filter=40, dense_filter=20, prediction_length=1)
    model.set_index(default_index)
    model.save_weights(model_prediction.get_untrained_weights_path())

    start_time = time.time()
    
    for i in range(model_prediction.get_x_features()):
        clear_output(wait=True)
        print("---------- Feature %s -------------" % (i))
        
        #Load untrained weights for next training
        model.set_index(default_index)
        model.load_weights(model_prediction.get_untrained_weights_path())
        
        #Train model with the feature i
        model.set_index(i)
        model.train_model(epochs=8, lr=1e-6)

        #Save weights
        model.save_weights(model_prediction.get_trained_weights_path())
        

    print("--- %s seconds ---" % (time.time() - start_time))