In [3]:
import numpy as np

In [6]:
def compute_probabilities(X, theta, temp_parameter):
    """
    Computes, for each datapoint X[i], the probability that X[i] is labeled as j
    for j = 0, 1, ..., k-1

    Args:
        X - (n, d) NumPy array (n datapoints each with d features)
        theta - (k, d) NumPy array, where row j represents the parameters of our model for label j
        temp_parameter - the temperature parameter of softmax function (scalar)
    Returns:
        H - (k, n) NumPy array, where each entry H[j][i] is the probability that X[i] is labeled as j
    """
    #YOUR CODE HERE
    theta_X = np.dot(theta, X.T)/temp_parameter
    c = np.max(theta_X, axis=0)
    term1 = 1/np.sum(np.exp(theta_X - c), axis=0)
    term2 = np.exp(theta_X - c)
    probs = term1*term2
    return probs



In [11]:
def compute_cost_function(X, Y, theta, lambda_factor, temp_parameter):
    """
    Computes the total cost over every datapoint.

    Args:
        X - (n, d) NumPy array (n datapoints each with d features)
        Y - (n, ) NumPy array containing the labels (a number from 0-9) for each
            data point
        theta - (k, d) NumPy array, where row j represents the parameters of our
                model for label j
        lambda_factor - the regularization constant (scalar)
        temp_parameter - the temperature parameter of softmax function (scalar)

    Returns
        c - the cost value (scalar)
    """
    n = X.shape[0]
    k = theta.shape[0]
    reg = lambda_factor/2 * np.linalg.norm(theta)

    log_probs = np.log(compute_probabilities(X, theta, temp_parameter))

    cost = 0
    for i in range(n):
        for j in range(k):
            if Y[i] == j:
                cost += log_probs[j]

    return -np.sum(cost)/len(Y)**2 + reg

In [12]:
X = np.array([[1.,        0.8952439,  0.19803336, 0.96064058, 0.53690055],
              [1.,        0.03260465, 0.36628576, 0.80935996, 0.69563339],
              [1.,        0.12633139, 0.87646967, 0.56129179, 0.71368294]])

Y = np.array([[3, 2, 0]]).T

theta = np.array([[0.82876051, 0.55741146, 0.22405162, 0.11129421, 0.59229072],
                  [0.67248567, 0.57445413, 0.72155054, 0.77835744, 0.0576578 ],
                  [0.22500609, 0.11383772, 0.53580948, 0.69958244, 0.10515939],
                  [0.93356274, 0.1011183 , 0.98104938, 0.66789743, 0.25662077]])

lambda_factor = 0.01
temp_parameter = 0.6

print(compute_cost_function(X, Y, theta, lambda_factor, temp_parameter))

1.68915877604923
