In [1]:
def softmax_loss_naive(W, X, y, reg):
  """
  Softmax loss function, naive implementation (with loops)
  Inputs:
  - W: C x D array of weights
  - X: D x N array of data. Data are D-dimensional columns
  - y: 1-dimensional array of length N with labels 0...K-1, for K classes
  - reg: (float) regularization strength
  Returns:
  a tuple of:
  - loss as single float
  - gradient with respect to weights W, an array of same size as W
  """
  # Initialize the loss and gradient to zero.
  loss = 0.0
  dW = np.zeros_like(W)

  #############################################################################
  # Compute the softmax loss and its gradient using explicit loops.           #
  # Store the loss in loss and the gradient in dW. If you are not careful     #
  # here, it is easy to run into numeric instability. Don't forget the        #
  # regularization!                                                           #
  #############################################################################

  # Get shapes
  num_classes = W.shape[0]
  num_train = X.shape[1]

  for i in range(num_train):
    # Compute vector of scores
    f_i = W.dot(X[:, i]) # in R^{num_classes}

    # Normalization trick to avoid numerical instability, per http://cs231n.github.io/linear-classify/#softmax
    log_c = np.max(f_i)
    f_i -= log_c

    # Compute loss (and add to it, divided later)
    # L_i = - f(x_i)_{y_i} + log \sum_j e^{f(x_i)_j}
    sum_i = 0.0
    for f_i_j in f_i:
      sum_i += np.exp(f_i_j)
    loss += -f_i[y[i]] + np.log(sum_i)

    # Compute gradient
    # dw_j = 1/num_train * \sum_i[x_i * (p(y_i = j)-Ind{y_i = j} )]
    # Here we are computing the contribution to the inner sum for a given i.
    for j in range(num_classes):
      p = np.exp(f_i[j])/sum_i
      dW[j, :] += (p-(j == y[i])) * X[:, i]

  # Compute average
  loss /= num_train
  dW /= num_train

  # Regularization
  loss += 0.5 * reg * np.sum(W * W)
  dW += reg*W

  return loss, dW

In [2]:
def svm_loss_naive(W, X, y, reg):
    """
    # SVM loss function native version
    Inputs have dimension D, there are C classes, and we operate on minibatches
    of N examples.
    Inputs:
    - W: A numpy array of shape (D, C) containing weights.
    - X: A numpy array of shape (N, D) containing a minibatch of data.
    - y: A numpy array of shape (N,) containing training labels; y[i] = c means
    that X[i] has label c, where 0 <= c < C.
    - reg: (float) regularization strength
    Returns a tuple of:
    - loss as single float
    - gradient with respect to weights W; an array of same shape as W
    """
    dW = np.zeros(W.shape)    # initialize the gradient as zero
 
    # compute the loss and the gradient
    num_classes = W.shape[1]
    num_train = X.shape[0]
    loss = 0.0
    # For each sample, accumulate loss
    for i in xrange(num_train):
        scores = X[i].dot(W)     # (1, C)
        correct_class_score = scores[y[i]]
        for j in xrange(num_classes):
            if j == y[i]:
                continue
            # Calculate according to the SVM loss function
            margin = scores[j] - correct_class_score + 1    # note delta = 1
            # When margin>0, there will be loss, and there will also be accumulation of gradients
            if margin > 0:      # max(0, yi - yc + 1)
                loss += margin
                 # According to the formula: ∇Wyi Li =-xiT(∑j≠yi1(xiWj-xiWyi +1>0)) + 2λWyi
                dW[:, y[i]] += -X[i, :]   # y[i] is the correct class
                # According to the formula: ∇Wj Li = xiT 1(xiWj-xiWyi +1>0) + 2λWj,
                dW[:, j] += X[i, :]
 
    # Average loss of training data
    loss /= num_train
    dW /= num_train
 
    # Regular loss
    loss += 0.5 * reg * np.sum(W * W)
    dW += reg * W
 
    #
    return loss, dW
 
 
#
def svm_loss_vectorized(W, X, y, reg):
    """
         SVM loss function vectorized version
    Structured SVM loss function, vectorized implementation.Inputs and outputs
    are the same as svm_loss_naive.
    """
    loss = 0.0
    dW = np.zeros(W.shape)   # initialize the gradient as zero
    scores = X.dot(W)        # N by C Number of samples*Number of categories
    num_train = X.shape[0]
    num_classes = W.shape[1]
 
    scores_correct = scores[np.arange(num_train), y]
    scores_correct = np.reshape(scores_correct, (num_train, 1))  # N*1 Correct category for each sample
 
    margins = scores - scores_correct + 1.0     # N by C Calculate the loss of each place in the scores matrix
    margins[np.arange(num_train), y] = 0.0      # The correct category loss of each sample is set to 0
    margins[margins <= 0] = 0.0                 # max(0, x)
    loss += np.sum(margins) / num_train         # Accumulate all losses and take the average
    loss += 0.5 * reg * np.sum(W * W)           # Regular
 
    # compute the gradient
    margins[margins > 0] = 1.0                  # max(0, x) The gradient greater than 0 is counted as 1
    row_sum = np.sum(margins, axis=1)           # N*1 Each sample is accumulated
    margins[np.arange(num_train), y] = -row_sum  # Class correct position = -gradient accumulation
    dW += np.dot(X.T, margins)/num_train + reg * W     # D by C
    return loss, dW

In [3]:
import tensorflow as tf
from tensorflow.python.ops import array_ops


def sv_softmax_loss(t=1.0, s=1):

    t = float(t)
    s = float(s)
    
    def sv_softmax_loss_fixed(y_true, logits):
        """SV-Softmax loss
        Notice: y_pred is raw logits
        Support Vector Guided Softmax Loss for Face Recognition
        https://arxiv.org/pdf/1812.11317.pdf
        Arguments:
            y_true {tensor} -- ground truth labels, shape of [batch_size, num_cls]
            y_pred {tensor} -- model's output, shape of [batch_size, num_cls]
        Keyword Arguments:
        Returns:
            [tensor] -- loss.
        """
        epsilon = 1.e-9
        zeros = array_ops.zeros_like(logits, dtype=logits.dtype)
        ones = array_ops.ones_like(logits, dtype=logits.dtype)
        
        logit_y = tf.reduce_sum(tf.multiply(y_true, logits), axis=-1, keepdims=True)
        I_k = array_ops.where(logit_y >= logits, zeros, ones)
        
        h = tf.exp(s * tf.multiply(t - 1., tf.multiply(logits + 1., I_k)))
        
        softmax = tf.exp(s * logits) / (tf.reshape(
                         tf.reduce_sum(tf.multiply(tf.exp(s * logits), h), axis=-1, keepdims=True), 
                         [-1, 1]) + epsilon)
        
        # We add epsilon because log(0) = nan
        softmax = tf.add(softmax, epsilon)
        ce = tf.multiply(y_true, -tf.log(softmax))
        ce = tf.reduce_sum(ce, axis=1)
        return tf.reduce_mean(ce)
    
    return sv_softmax_loss_fixed

ModuleNotFoundError: No module named 'tensorflow'