In [None]:
import tensorflow as tf
import numpy as np
import os
import random
import pandas as pd
import seaborn as sns
from datetime import datetime
import matplotlib.pyplot as plt
plt.rc('font', size=16)
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import KFold 
import warnings
warnings.filterwarnings('ignore')
tf.get_logger().setLevel('ERROR')

tfk = tf.keras
tfkl = tf.keras.layers
print(tf.__version__)

2.7.0


In [None]:
# Random seed for reproducibility
seed = 42
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

In [None]:
# Load the dataset
dataset = pd.read_csv('Training.csv')
print(dataset.shape)
dataset.head()

(68528, 7)


Unnamed: 0,Sponginess,Wonder level,Crunchiness,Loudness on impact,Meme creativity,Soap slipperiness,Hype root
0,7.97698,4.33494,10.67282,1.76692,3.2244,51.68146,3.65434
1,8.07824,4.44616,10.5616,1.70716,3.32566,51.563598,3.47672
2,8.02844,4.22372,10.5616,1.64906,3.1746,50.86308,3.47672
3,8.02844,4.22372,10.5616,1.70716,3.1746,45.841581,3.47672
4,7.87572,4.44616,10.45038,1.70716,3.27586,47.126421,3.47672


In [None]:
def build_sequences(df, target_labels, window=200, stride=20, telescope=100):
    # Sanity check to avoid runtime errors
    assert window % stride == 0
    dataset = []
    labels = []
    temp_df = df.copy().values
    temp_label = df[target_labels].copy().values
    padding_len = len(df)%window

    if(padding_len != 0):
        # Compute padding length
        padding_len = window - len(df)%window
        padding = np.zeros((padding_len,temp_df.shape[1]), dtype='float64')
        temp_df = np.concatenate((padding,df))
        padding = np.zeros((padding_len,temp_label.shape[1]), dtype='float64')
        temp_label = np.concatenate((padding,temp_label))
        assert len(temp_df) % window == 0

    for idx in np.arange(0,len(temp_df)-window-telescope,stride):
        dataset.append(temp_df[idx:idx+window])
        labels.append(temp_label[idx+window:idx+window+telescope])

    dataset = np.array(dataset)
    labels = np.array(labels)
    return dataset, labels

In [None]:
# We choose our parameters through CV, then we retrained the model on the entire dataset
window = 400
stride = 20

In [None]:
target_labels = dataset.columns
telescope = 50

In [None]:
X, y = build_sequences(dataset, target_labels, window, stride, telescope)
X.shape, y.shape

((3418, 400, 7), (3418, 50, 7))

In [None]:
input_shape = X.shape[1:]
output_shape = y.shape[1:]
batch_size = 64
epochs = 200

In [None]:
# Function to create the model 
def sequence2sequence():
    
    # Build Encoder-Decoder Model
    # ---------------------------

    # ENCODER
    # -------
    encoder_input = tf.keras.Input(shape=input_shape, name='encoder_input')


    encoder_output = tf.keras.layers.LSTM(units=256, return_state=True, name='encoder_lstm')(encoder_input)


    # Save encoder state (to initialize the decoder)
    encoder_state = encoder_output[1:]

    # DECODER
    # -------
    decoder_inputs = tf.keras.layers.RepeatVector(telescope)(encoder_output[0])
         
    decoder_lstm, _, _ = tf.keras.layers.LSTM(units=256, return_sequences=True, return_state=True, 
                                              name='decoder_lstm')(decoder_inputs, initial_state=encoder_state)

    decoder_out = tfkl.TimeDistributed(tfkl.Dense(output_shape[-1]))(decoder_lstm)

    # MODEL
    model = tf.keras.Model(encoder_input,  decoder_out)

    # ----------
    # Compile training model
    model.compile(loss=tfk.losses.MeanSquaredError(),
                  optimizer=tfk.optimizers.Adam(), metrics=['mae'])
    
    return model

In [None]:
model = sequence2sequence()
model.summary()

history = model.fit(
  x = X,
  y = y,
  batch_size = batch_size,
  epochs = epochs,
  validation_split=.1,
  callbacks = [
      tfk.callbacks.EarlyStopping(monitor='val_mae', mode='min', patience=10, restore_best_weights=True),
      tfk.callbacks.ReduceLROnPlateau(monitor='val_mae', mode='min', patience=5, factor=0.5, min_lr=1e-5),
  ]
  ).history

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 encoder_input (InputLayer)     [(None, 400, 7)]     0           []                               
                                                                                                  
 encoder_lstm (LSTM)            [(None, 256),        270336      ['encoder_input[0][0]']          
                                 (None, 256),                                                     
                                 (None, 256)]                                                     
                                                                                                  
 repeat_vector (RepeatVector)   (None, 50, 256)      0           ['encoder_lstm[0][0]']           
                                                                                              

In [None]:
model.save('Seq2Seq_w400_s20')

