In [None]:
import os
import json
import zipfile
from pprint import pprint
import numpy as np
import tensorflow.compat.v1 as tf
import keras as k
from Utilities import cplot_loss

def percentage_error(actual, predicted):
    res = np.empty(actual.shape)
    for j in range(actual.shape[0]):
        if actual[j] != 0:
            res[j] = (actual[j] - predicted[j]) / actual[j]
        else:
            res[j] = predicted[j] / np.mean(actual)
    return res

def mean_absolute_percentage_error(y_true, y_pred): 
    return np.mean(np.abs(percentage_error(np.asarray(y_true), np.asarray(y_pred)))) * 100


def mape_keras(y_true, y_pred, threshold=0.1):
    v = k.backend.clip(k.backend.abs(y_true), threshold, None)
    diff = k.backend.abs((y_true - y_pred) / v)
    return 100.0 * k.backend.mean(diff, axis=-1)

def mae(y_true, y_pred):
    return np.mean(np.abs(y_pred - y_true))

def rmse(y_true, y_pred):
    return np.sqrt(np.mean(np.square(y_pred - y_true)))

def mape(y_true, y_pred, threshold=0.1):
    v = np.clip(np.abs(y_true), threshold, None)
    diff = np.abs((y_true - y_pred) / v)
    return 100.0 * np.mean(diff, axis=-1).mean()

def transform(idxs):
    return [idxs[:, i] for i in range(idxs.shape[1])]

def set_session(device_count=None, seed=0):
    gpu_options = tf.GPUOptions(allow_growth=True)
    config = tf.ConfigProto(
            gpu_options=gpu_options, 
            device_count=device_count)

def get_metrics(model, x, y, batch_size=1024):
    yp = model.predict(x, batch_size=batch_size, verbose=1).flatten()
    return {
        "rmse": float(rmse(y, yp)), 
        "mape": float(mape(y, yp)), 
        "mae": float(mae(y, yp))
    }, rmse(y, yp), mape(y, yp), mae(y, yp)

def create_costco(shape, rank, nc):
    inputs = [k.Input(shape=(1,), dtype="int32") for i in range(len(shape))]
    embeds = [
        k.layers.Embedding(output_dim=rank, input_dim=shape[i])(inputs[i])
        for i in range(len(shape))
    ]
    x = k.layers.Concatenate(axis=1)(embeds)
    x = k.layers.Reshape(target_shape=(rank, len(shape), 1))(x)
    x = k.layers.Conv2D(
        nc, 
        kernel_size=(1, len(shape)), 
        activation="relu", 
        padding="valid"
    )(x)
    x = k.layers.Conv2D(
        nc, 
        kernel_size=(rank, 1), 
        activation="relu", 
        padding="valid"
    )(x)
    x = k.layers.Flatten()(x)
    x = k.layers.Dense(nc, activation="relu")(x)
    outputs = k.layers.Dense(1, activation="relu")(x)
    model = k.Model(inputs=inputs, outputs=outputs)

    return model



train_rmse_list = []
train_mae_list = []
train_mape_list = []
test_rmse_list = []
test_mae_list = []
test_mape_list = []

for i in range(10):
    
    lr = 1e-4
    epochs = 50
    batch_size = 256
    seed = 3
    verbose = 1
    r=20
    rank=r
    nc = rank
    set_session(device_count={"GPU": 0}, seed=seed)
    optim = k.optimizers.Adam(lr=lr)

    model = create_costco(shape, rank, nc)
    model.compile(optim, loss=["mse"], metrics=["mae", mape_keras])
    hists = model.fit(
    x=transform(tr_idxs),
    y=tr_vals,
    verbose=verbose,
    epochs=epochs,
    batch_size=batch_size,
    validation_split=0.1,
    callbacks=[k.callbacks.EarlyStopping(
        monitor="val_mean_absolute_error", 
        patience=10, 
        restore_best_weights=True)],
        );

    tr_info, rmse, mape, mae = get_metrics(model, transform(tr_idxs), tr_vals)
    
    
    te_info, rmse, mape, mae = get_metrics(model, transform(te_idxs), te_vals)
    
    
    
    pprint({'train': tr_info, 'test': te_info})

In [18]:
np.mean(list(map(float, x)))

1.83