scripts/staging/fm-binclass.dml

#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------


/*
 * Factorization Machines for binary classification.
 */

# Imports
source("nn/optim/adam.dml") as adam
source("nn/layers/fm.dml") as fm
source("nn/layers/log_loss.dml") as log_loss
source("nn/layers/sigmoid.dml") as sigmoid
source("nn/layers/l2_reg.dml") as l2_reg
source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss

train = function(matrix[double] X, matrix[double] y, matrix[double] X_val, matrix[double] y_val, int epochs)
    return (matrix[double] w0, matrix[double] W, matrix[double] V, double loss) {
  /*
   * Trains the FM model.
   *
   * Inputs:
   *  - X : n examples with d features, of shape (n, d).
   *  - y : label corresponds to n examples
   *  - lambda : regularization (5e-04)
   *
   * Outputs:
   *  - w0, W, V : updated model parameters.
   *  - loss : computed loss with log_loss.
   *
   * input propagation through layers
   * fm::init -> adam::init -> fm::forward -> sigmoid::forward -> log_loss::forward \
   *          adam::update <- fm::backward <- sigmoid::backward <- log_loss::backward <-
   */

    n = nrow(X);
    d = ncol(X);
    k = 2; # factorization dimensionality, only(=2) possible for now.

    # 1.initialize fm core
    [w0, W, V] = fm::init(d, k);

    # 2.initialize adam optimizer
    ## Default values for some parameters
    lr      = 0.001;
    beta1   = 0.9;       # [0, 1)
    beta2   = 0.999;     # [0, 1)
    epsilon = 0.00000001;
    t       = 0;

    # [mX, vX] = adam::init(X); # to optimize input.
    [mw0, vw0] = adam::init(w0);
    [mW, vW]   = adam::init(W);
    [mV, vV]   = adam::init(V);

    # Regularization
    lambda = 5e-04

    # Optimize
    print("Starting optimization")
    batch_size = 10
    iters = ceil(1000 / batch_size)
    N = n;
    for (e in 1:epochs) {
      for (i in 1:iters) {
        # Get the next batch
        beg = ((i-1) * batch_size) %% N + 1
        end = min(N, beg + batch_size - 1)
        X_batch = X[beg:end,]
        y_batch = y[beg:end,]

        # 3.Send inputs through fm::forward
        y_res = fm::forward(X_batch, w0, W, V);

        # 4.Send the above result through sigmoid::forward
        sfy = sigmoid::forward(y_res);

        # 5.Send the above result through log_loss::forward
        loss = log_loss::forward(sfy, y_batch);

        # Compute loss & accuracy for training & validation data every 100 iterations.
        if (i %% 100 == 0) {
          # Compute training loss & accuracy
          loss_data = log_loss::forward(sfy, y_batch);
          loss_reg_w0 = l2_reg::forward(w0, lambda);
          loss_reg_W  = l2_reg::forward(W, lambda);
          loss_reg_V  = l2_reg::forward(V, lambda);

          accuracy = mean((sfy<0.5) == (y_batch<0.5));
          loss = loss_data + loss_reg_w0 + loss_reg_W + loss_reg_V;

          # Compute validation loss & accuracy
          probs_val = predict(X_val, w0, W, V)
          loss_val = log_loss::forward(probs_val, y_val)
          accuracy_val = mean((probs_val<0.5) == (y_val<0.5))

          # Output results
          print("Epoch: " + e + ", Iter: " + i + ", Train Loss: " + loss + ", Train Accuracy: "
                + accuracy + ", Val Loss: " + loss_val + ", Val Accuracy: " + accuracy_val)
        }

        # 6.Send the result of sigmoid::forward and the correct labels y to log_loss::backward
        dsfy = log_loss::backward(sfy, y_batch);

        # 7.Send the above result through sigmoid::backward
        dy = sigmoid::backward(dsfy, y_res);

        # 8.Send the above result through fm::backward
        [dw0, dW, dV] = fm::backward(dy, X_batch, w0, W, V);

        # 9. update the timestep
        t = e * i - 1;

        # 10.Call adam::update for all parameters

        # Incase we want to optimize inputs (X) also, as in deep dream.
        #[X, mX, vX] = adam::update(X, dX, lr, beta1, beta2, epsilon, t, mX, vX);

        [w0, mw0, vw0] = adam::update(w0, dw0, lr, beta1, beta2, epsilon, t, mw0, vw0);
        [W, mW, vW]  = adam::update(W, dW, lr, beta1, beta2, epsilon, t, mW, vW );
        [V, mV, vV]  = adam::update(V, dV, lr, beta1, beta2, epsilon, t, mV, vV );
      }
    }
}

predict = function(matrix[double] X, matrix[double] w0, matrix[double] W, matrix[double] V)
    return (matrix[double] out) {
  /*
   * Computes the predictions for the given inputs.
   *
   * Inputs:
   *  - X : n examples with d features, of shape (n, d).
   *  - w0, W, V : trained model parameters.
   *
   * Outputs:
   *  - out : target vector, y.
   */

    # 1.initialize fm core
    #[w0, W, V] = fm::init(d, k);

    # 2.Send inputs through fm::forward
    y = fm::forward(X, w0, W, V);

    # 3.Send the above result through sigmoid::forward
    out = sigmoid::forward(y);

    # 4.Send the above result through log_loss::forward
    # loss = log_loss::forward(out);

}

eval = function(matrix[double] probs, matrix[double] y)
    return (double loss, double accuracy) {
  /**
   * Computes loss and accuracy.
   */

    # 1. compute log loss
    loss = log_loss::forward(probs, y);

    # 2. compute accuracy
    accuracy = mean( (probs<0.5) == (y<0.5) )
}