Calculate the first iteration of Gradient Descent to use when building unittests.

In [117]:
import numpy as np

In [118]:
# Input matrix with 2 features (columns) and 3 data points (rows)
feature_matrix = np.array([[0, 1], [2, 2], [3, 0]])
print("Input with 2 features: \n",feature_matrix)

Input with 2 features: 
 [[0 1]
 [2 2]
 [3 0]]


In [119]:
# Output matrix corresponding to the feature_matrix for 3 data points
target_matrix = np.array([1, 0, 0])
print("Target Output for each row: ",target_matrix)

Target Output for each row:  [1 0 0]


In [120]:
# initialize coefficients to 1
beta = np.ones(2)
print("Initial Coefficients for each feature: ",beta)

Initial Coefficients for each feature:  [ 1.  1.]


In [121]:
# Logistic (sigmoidal) hypothesis function; the basis of our classification model
# input is the sum of linear coefficients times x_values
hyp = lambda x: 1/(1+np.exp(x))
hyp(-100), hyp(0), round(hyp(100))

(1.0, 0.5, 0.0)

In [122]:
# Calculate predicted output for each row given the current coefficients
def predict_y(X, coeffs):
    linear_predictor = np.dot(X, coeffs)
    return hyp(-linear_predictor)

print("Predictions for each row based on current coefficients: \n",predict_p(feature_matrix, beta))

Predictions for each row based on current coefficients: 
 [ 0.73105858  0.98201379  0.95257413]


In [123]:
# Cost function: this is what gets minimized, even if it's not used directly
def cost_fn(X, y, coeffs):
    y_predictions = predict_y(X, coeffs)
    return -sum(y*np.log(y_predictions) + (1-y)*np.log(1-y_predictions))

print("Cost Function at this point: ", cost_fn(feature_matrix, target_matrix, beta))

Cost Function at this point:  7.37999896701


In [124]:
# Gradient: slopes of the cost function with these coefficients
def gradient(X, y, coeffs):
    y_predictions = predict_y(X, coeffs)
    return X.T.dot(y_predictions-y)

print("Gradient of the cost at this point: ",gradient(feature_matrix, target_matrix, beta))

Gradient of the cost at this point:  [ 4.82174996  1.69508616]
