# Anchor-based multi-class classification

* p anchors per class
* k classes
* n features
* m examples

In [6]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

In [9]:
p = 5
k = 10
n = 300
m = 5000

A = np.random.randn(k, p, n)
X = np.random.randn(n, m)
np.dot(A, X).shape

(10, 5, 5000)

In [11]:
help(tf.transpose)

Help on function transpose in module tensorflow.python.ops.array_ops:

transpose(a, perm=None, name='transpose', conjugate=False)
    Transposes `a`. Permutes the dimensions according to `perm`.
    
    The returned tensor's dimension i will correspond to the input dimension
    `perm[i]`. If `perm` is not given, it is set to (n-1...0), where n is
    the rank of the input tensor. Hence by default, this operation performs a
    regular matrix transpose on 2-D input Tensors. If conjugate is True and
    `a.dtype` is either `complex64` or `complex128` then the values of `a`
    are conjugated and transposed.
    
    For example:
    
    ```python
    x = tf.constant([[1, 2, 3], [4, 5, 6]])
    tf.transpose(x)  # [[1, 4]
                     #  [2, 5]
                     #  [3, 6]]
    
    # Equivalently
    tf.transpose(x, perm=[1, 0])  # [[1, 4]
                                  #  [2, 5]
                                  #  [3, 6]]
    
    # If x is complex, setting conjugate=Tru

In [None]:
def random_mini_batches(X_train, Y_train, minibatch_size, seed):
    
    np.random.seed(seed)
    (n, m) = X_train.shape
    indices = np.array([i for i in range(n)])
    np.random.shuffle(indices)
    
    minibatches = []
    i = 0
    
    while i < m:
        
        minibatches.append((X_train[:, i:i+minibatch_size], Y_train[:, i:i+minibatch_size]))
        i += minibatch_size
    
    return minibatches

def one_hot():
    
    pass

def create_placeholders(n, k):
    
    X = tf.placeholder(tf.float32, shape = [n, None], name = "X")
    Y = tf.placeholder(tf.float32, shape = [k, None], name = "Y")
    
    return X, Y

def initialize_params(p, k, n):
    
    # p anchors
    # k classes
    # n features
    
    A = tf.get_variable("A", [k, p, n], dtype = tf.float32, initializer = tf.contrib.layers.xavier_initializer(seed = 1))
    w = tf.get_variable("w", [k, 1], dtype = tf.float32, initializer = tf.zeros_initializer())
    b = tf.get_variable("b", [k, 1], dtype = tf.float32, initializer = tf.zeros_initializer())
    
    params = {
        "A": A,
        "w": w,
        "b": b
    }
                        
    return params

def forward_propagate(X, params):

    # A.shape = (k, p, n)
    # X.shape = (n, m)
    # w.shape = (k, 1)
    # b.shape = (k, 1)
    
    A = params["A"]
    w = params["w"]
    b = params["b"]
    
    p = tf.shape(A)[0]
    k = tf.shape(A)[2]
    m = tf.shape(X)[1]
    
    norm_A = tf.reshape(tf.norm(A, axis = 1), (p, 1, k))
    norm_X = tf.reshape(tf.norm(X, axis = 0), (1, m))
    norm = tf.matmul(norm_A, norm_X)
    
    sim = tf.divide(tf.matmul(A, X), norm) # shape = (k, p, m)
    dist = 1 - sim
    
    D = tf.reshape(tf.reduce_prod(dist, axis = 1), (k, m)) # shape = (k, m)
    H = tf.softmax(tf.matmul(tf.transpose(w), D) + b) # shape = (1, m)
    
    return H

def get_cost(Y, H, epsilon = 0.0001):
    
    # Y.shape = (k, m)
    # H.shape = (k, m)
    
    m = tf.cast(tf.shape(Y)[1], "float32")
    L = - tf.reduce_sum(Y * tf.log(tf.maximum(H, epsilon)), axis = 0)
    cost = 1/m * tf.reduce_sum(L)
    
    return cost

def get_A_reg(A, q=0, epsilon = 0.0001):
    
    # A.shape = (k, p, n)
    
    if q == 0:
        return 0
    
    (k, p, n) = tf.shape(A)
    
    norm_A = tf.reshape(tf.norm(A, axis = 2), (k, p, 1))
    norm = tf.matmul(norm_A, tf.transpose(norm_A, perm = [0, 2, 1])) # shape = (k, p, p)
    
    sim = tf.divide(tf.matmul(A, tf.transpose(A, perm = [0, 2, 1])), norm)
    dist = tf.maximum(1 - sim, epsilon)
    
    energy = tf.reduce_sum(q**2 / dist)
    
    return energy

def get_w_reg(w, c):
    
    pass

# ##### -------------------------------------------------------- #####

# def model(X_train, 
#           Y_train, 
#           X_test, 
#           Y_test, 
#           k = 4,
#           q = 0,
#           learning_rate = 0.0001,
#           num_epochs = 1500, 
#           minibatch_size = 32, 
#           print_cost = True):
    
#     # X_train.shape = (n_x, m)
#     # Y_train.shape = (1, m)
    
#     tf.reset_default_graph()
    
#     tf.set_random_seed(1)
#     seed = 2
    
#     (n_x, m) = X_train.shape
#     n_y = Y_train.shape[0]
#     costs = []
    
#     X, Y = create_placeholders(n_x, n_y)
#     params = initialize_params(k, n_x)
#     H = forward_propagate(X, params)
#     J = get_cost(Y, H) + get_reg(params["A"], q = q)
    
#     optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(J)

#     init = tf.global_variables_initializer()
    
#     with tf.Session() as sess:
        
#         sess.run(init)
        
#         for epoch in range(num_epochs):

#             epoch_cost = 0
#             num_minibatches = int(m / minibatch_size)
#             seed += 1
#             minibatches = random_mini_batches(X_train, Y_train, minibatch_size, seed)
            
#             for minibatch in minibatches:

#                 (minibatch_X, minibatch_Y) = minibatch
#                 _ , minibatch_cost = sess.run([optimizer, J], feed_dict={X: minibatch_X, Y: minibatch_Y})
#                 epoch_cost += minibatch_cost / num_minibatches
            
#             if print_cost == True and epoch % 200 == 0:
#                 print ("Cost after epoch %i: %f" % (epoch, epoch_cost))
#             if print_cost == True and epoch % 5 == 0:
#                 costs.append(epoch_cost)
                
#         plt.plot(np.squeeze(costs))
#         plt.ylabel('cost')
#         plt.xlabel('iterations (per tens)')
#         plt.title("Learning rate =" + str(learning_rate))
#         plt.show()

#         params = sess.run(params)
#         print("Training complete.")
        
#         pred = sess.run(H, feed_dict = {X: X_test})
        
#         return pred, params