# ==================== Mini-Batch Gradient Descent =================================

In [1]:
# used for manipulating directory paths
import os

# Scientific and vector computation for python
import numpy as np

# Plotting library
from matplotlib import pyplot
from mpl_toolkits.mplot3d import Axes3D  # needed to plot 3-D surfaces

# tells matplotlib to embed plots within the notebook
%matplotlib inline

The file Data/ex1data1.txt contains the dataset for our linear regression problem. The first column is the population of a city (in 10,000s) and the second column is the profit of a food truck in that city (in $10,000s). A negative value for profit indicates a loss.

In [57]:
# Read comma separated data
data = np.loadtxt(os.path.join('Data', 'ex1data1.txt'), delimiter=',')

X = data[:, :-1] # everything besides that last column
y = data[:, -1].reshape((-1, 1)) # everything from the last column

m = y.size  # number of training examples

X = np.concatenate([np.ones((data.shape[0], 1)), X], axis=1)


# ==== Alternative ====
# data = np.hstack((np.ones((data.shape[0], 1)), data)) 
# X = data[:, :-1] 
# y = data[:, -1].reshape((-1, 1)) 

  





In [58]:
# linear regression using "mini-batch" gradient descent 
# function to compute hypothesis / predictions 
def hypothesis(X, theta): 
	return np.dot(X, theta) 

# function to compute gradient of error function w.r.t. theta 
def gradient(X, y, theta): 
	h = hypothesis(X, theta) 
	grad = np.dot(X.transpose(), (h - y)) 
	return grad 

# function to compute the error for current values of theta 
def cost(X, y, theta): 
	h = hypothesis(X, theta) 
	J = np.dot((h - y).transpose(), (h - y)) 
	J /= 2
	return J[0] 

# function to create a list containing mini-batches 
def create_mini_batches(X, y, batch_size): 
	mini_batches = [] 
	data = np.hstack((X, y)) 
	np.random.shuffle(data) 
	n_minibatches = data.shape[0] // batch_size 
	i = 0

	for i in range(n_minibatches + 1): 
		mini_batch = data[i * batch_size:(i + 1)*batch_size, :] 
		X_mini = mini_batch[:, :-1] 
		Y_mini = mini_batch[:, -1].reshape((-1, 1)) 
		mini_batches.append((X_mini, Y_mini)) 
	if data.shape[0] % batch_size != 0: 
		mini_batch = data[i * batch_size:data.shape[0]] 
		X_mini = mini_batch[:, :-1] 
		Y_mini = mini_batch[:, -1].reshape((-1, 1)) 
		mini_batches.append((X_mini, Y_mini)) 
	return mini_batches 

# function to perform mini-batch gradient descent 
def gradientDescent(X, y, learning_rate = 0.001, batch_size = 32): 
	theta = np.zeros((X.shape[1], 1)) 
	error_list = [] 
	max_iters = 3
	for itr in range(max_iters): 
		mini_batches = create_mini_batches(X, y, batch_size) 
		for mini_batch in mini_batches: 
			X_mini, y_mini = mini_batch 
			theta = theta - learning_rate * gradient(X_mini, y_mini, theta) 
			J.append(cost(X_mini, y_mini, theta)) 

	return theta, J 


In [32]:
theta, error_list = gradientDescent(X, y)  
print("Bias = ", theta[0]) 
print("Coefficients = ", theta[1:]) 

Bias =  [-1.21665391]
Coefficients =  [[-13.86182457]]
