In [6]:
import pandas as pd
import numpy as np

In [7]:
df = pd.read_csv("~/Downloads/winequality-white.csv", sep=";")
df.columns = df.columns.str.lower().str.replace(' ','_')

df_binary = df.copy()
df_binary['quality'] = [1 if x >= 6 else 0 for x in df_binary['quality']]

pd.DataFrame({
    'quality n': df_binary['quality'].value_counts().sort_index(),
    'prop n': df_binary['quality'].value_counts(normalize=True).sort_index()
})

Unnamed: 0,quality n,prop n
0,1640,0.334831
1,3258,0.665169


In [19]:
#target variable, reshape for matrix multiplication purposes
y = df_binary['quality'].to_numpy().reshape([-1, 1])
#feature variables
X = df.drop(['quality'], axis=1).to_numpy()
print('y shape', y.shape)
print('X shape', X.shape)

#Splitting the data into train/test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2017)
print('X Train set:', X_train.shape, 'y Train set:', y_train.shape)
print('X Test set:', X_test.shape, 'y Test set:', y_test.shape)

y shape (4898, 1)
X shape (4898, 11)
X Train set: (3918, 11) y Train set: (3918, 1)
X Test set: (980, 11) y Test set: (980, 1)


In [20]:
def scale_val(x):
    mean = sum(x) * (1.0 / len(x))
    var = list(map(lambda x: (x - mean) ** 2, x))
    sd = (sum(var) * 1.0 / (len(var))) ** 0.5
    scaled_values = (x - mean) / sd
    return scaled_values

In [21]:
X_train = scale_val(X_train)
X_test = scale_val(X_test)

In [24]:
def batch_grad_descent(x, y, lr=0.001):
    m_current = b_current = 0
    print('starting m, b:', m_current, b_current)
    iterations = 10
    n = len(x)
    learning_rate = lr
    for i in range(iterations):
        predicted_y = m_current*x + b_current
        cost = np.round((1/n) * sum([val**2 for val in (y-predicted_y)]),2)
        derivative_m = -(2/n)*sum(x*(x-predicted_y))
        derivative_b = -(2/n)*sum(x*(y-predicted_y))
        m_current = np.round((m_current - learning_rate * derivative_m), 2)
        b_current = np.round((b_current - learning_rate * derivative_b), 2)
        print('m {}, b {}, cost {}, iteration {}' .format(m_current, b_current, cost, i))

In [25]:
batch_grad_descent(X_train, y_train, lr=0.001)

starting m, b: 0 0
m [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.], b [-0. -0.  0. -0. -0. -0. -0. -0.  0.  0.  0.], cost [0.67 0.67 0.67 0.67 0.67 0.67 0.67 0.67 0.67 0.67 0.67], iteration 0
m [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.], b [-0. -0.  0. -0. -0. -0. -0. -0.  0.  0.  0.], cost [0.67 0.67 0.67 0.67 0.67 0.67 0.67 0.67 0.67 0.67 0.67], iteration 1
m [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.], b [-0. -0.  0. -0. -0. -0. -0. -0.  0.  0.  0.], cost [0.67 0.67 0.67 0.67 0.67 0.67 0.67 0.67 0.67 0.67 0.67], iteration 2
m [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.], b [-0. -0.  0. -0. -0. -0. -0. -0.  0.  0.  0.], cost [0.67 0.67 0.67 0.67 0.67 0.67 0.67 0.67 0.67 0.67 0.67], iteration 3
m [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.], b [-0. -0.  0. -0. -0. -0. -0. -0.  0.  0.  0.], cost [0.67 0.67 0.67 0.67 0.67 0.67 0.67 0.67 0.67 0.67 0.67], iteration 4
m [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.], b [-0. -0.  0. -0. -0. -0. -0. -0.  0.  0.  0.], cost [0.67 0.67 0.67 0.67 0.67 0.67 0.67 0.67 0.67 0.67 0.67], iteration 5
m [0. 0. 

In [26]:
#m = weight
#b = bias

def update_weights(m, b, X, Y, learning_rate):
    m_deriv = 0
    b_deriv = 0
    N = len(X)
    for i in range(N):
        # Calculate partial derivatives
        # -2x(y - (mx + b))
        m_deriv += -2*X[i] * (Y[i] - (m*X[i] + b))

        # -2(y - (mx + b))
        b_deriv += -2*(Y[i] - (m*X[i] + b))

    # We subtract because the derivatives point in direction of steepest ascent
    m -= (m_deriv / float(N)) * learning_rate
    b -= (b_deriv / float(N)) * learning_rate

    return m, b

In [None]:
update_weights()

In [31]:
# Batch Gradient Descent
#calculates the cost for the given X and y

def calculate_cost(theta, x, y):
    m = len(y)
    predictions = X.dot(theta)
    cost = np.sum(np.square(predictions-y))/(2*m)
    return cost

#returns the final theta vector and the array of the cost history
def gradient_descent(X, y, theta, learning_rate=0.01, iterations=1000):
    m = len(y)
    cost_history = np.zeros(iterations)
    theta_history = np.zeros((iterations,2))
    for it in range(iterations):
        prediction = np.dot(X, theta)
        theta -= (1/m)*learning_rate*(X.T.dot((prediction - y)))
        theta_history[it,:] = theta.T
        cost_history[it] = calculate_cost(theta, X, y)
        return theta, cost_history, theta_history

In [29]:
def stocashtic_gradient_descent(X, y, theta, learning_rate=0.01, iterations=100):
    m = len(y)
    cost_history = np.zeros(iterations)

    for it in range(iterations):
        cost = 0.0
        for i in range(m):
            rand_ind = np.random.randint(0,m)
            X_i = X[rand_ind,:].reshape(1, X.shape[1])
            y_i = y[rand_ind].reshape(1,1)
            prediction = np.dot(X_i, theta)
            theta -= (1/m)*learning_rate*(X_i.T.dot((prediction - y_i)))
            cost += calculate_cost(theta, X_i, y_i)
            cost_history[it] = cost

    return theta, cost_history, theta_history

In [30]:
#  returns the final theta vector and the array of the cost history

def stocashtic_gradient_descent(X, y, theta, learning_rate=0.01, iterations=100, batch_size=20):
    m = len(y)
    cost_history = np.zeros(iterations)
    n_batches = int(m/batch_size)

    for it in range(iterations):
        cost = 0.0
        indices = np.random.permumtation(m)
        X = X[indices]
        y = y[indices]
        for i in range(0, m, batch_size):
            X_i = X[i:i+batch_size]
            y_i = y[i:i+batch_size]
            X_i = np.c_[np.ones(len(X_i)), X_i]
            prediction = np.dot(X_i, theta)
            
            theta -= (1/m)*learning_rate*(X_i.T.dot((prediction - y_i)))
            cost += calculate_cost(theta, X_i, y_i)
            cost_history[it] = cost
    return theta, cost_history, theta_history

In [None]:
# GRADED FUNCTION: update_parameters_with_gd
#Arguments:
#    parameters -- python dictionary containing your parameters to be updated:
#                    parameters['W' + str(l)] = Wl
#                    parameters['b' + str(l)] = bl
#    grads -- python dictionary containing your gradients to update each parameters:
#                    grads['dW' + str(l)] = dWl
#                    grads['db' + str(l)] = dbl
#    learning_rate -- the learning rate, scalar.
#    
#    Returns:
#    parameters -- python dictionary containing your updated parameters 

def update_parameters_with_gd(parameters, grads, learning_rate):
    L = len(parameters) // 2 # number of layers in the neural networks
    # Update rule for each parameter
    for l in range(L):
        ### START CODE HERE ### (approx. 2 lines)
        parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate*grads["dW" + str(l+1)]
        parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate*grads["db" + str(l+1)]
        ### END CODE HERE ###
        
    return parameters

In [None]:
parameters, grads, learning_rate = update_parameters_with_gd_test_case()

parameters = update_parameters_with_gd(parameters, grads, learning_rate)
print("W1 = " + str(parameters["W1"]))
print("b1 = " + str(parameters["b1"]))
print("W2 = " + str(parameters["W2"]))
print("b2 = " + str(parameters["b2"]))

In [None]:
#batch

X = data_input
Y = labels
parameters = initialize_parameters(layers_dims)
for i in range(0, num_iterations):
    # Forward propagation
    a, caches = forward_propagation(X, parameters)
    # Compute cost.
    cost = compute_cost(a, Y)
    # Backward propagation.
    grads = backward_propagation(a, caches, parameters)
    # Update parameters.
    parameters = update_parameters(parameters, grads)

In [None]:
#Stochastic Gradient Descent:

X = data_input
Y = labels
parameters = initialize_parameters(layers_dims)
for i in range(0, num_iterations):
    for j in range(0, m):
        # Forward propagation
        a, caches = forward_propagation(X[:,j], parameters)
        # Compute cost
        cost = compute_cost(a, Y[:,j])
        # Backward propagation
        grads = backward_propagation(a, caches, parameters)
        # Update parameters.
        parameters = update_parameters(parameters, grads)