In [3]:
#Stochastic GD

import numpy as np
np.set_printoptions(precision=2)
from sklearn.datasets import make_regression

x = np.array([[0,1],[2,6],[3,8]]) #x1, x2
y = np.array([1,1,4])

x_b = np.c_[np.ones((x.shape[0],1)),x]

def cost_function(theta, x, y, N):
  y_hat = x.dot(theta)
  c = (1/N)*np.sum((y_hat-y)**2)
  return c

def stochastic_gradient_descent(alpha, x, y, ep=0.001, max_iter=10000):
  converged = False
  iter = 0
  N = x.shape[0] # number of samples
  print("Num of data = ",N)

  # initial theta
  theta =  np.random.random((x.shape[1],1))
  print("Init theta.shape = ",theta.shape)

  # total error, J(theta)
  J = cost_function(theta, x, y, N)
  print("First J = ",J)

  # Iterate Loop
  while not converged:
    indices = np.arange(N)
    np.random.shuffle(indices)  # Shuffle indices to ensure randomness
    for i in indices:
      xi = x[i:i+1]
      yi = y[i:i+1]

      y_hat = xi.dot(theta)
      diff = y_hat - yi
      grad = xi.T.dot(diff)

      theta = theta - alpha * grad

      # error
      J2 = cost_function(theta, x, y, N)

      if abs(J-J2) <= ep:
          print("       Converged, iterations: ", iter, "/", max_iter)
          converged = True
          break

      J = J2   # update error s
      iter += 1  # update iter

      if iter == max_iter:
          print('       Max iterations exceeded!')
          converged = True
          break

  #print("End converged iter = ",iter)
  return theta

if __name__ == '__main__':

  print("start main")
  print(x_b.shape)
  y = y.reshape(-1,1)
  print(y.shape)

  alpha = 0.01 # learning rate
  #Training process
  theta = stochastic_gradient_descent(alpha, x_b, y, ep=0.000000000001, max_iter=1000000)
  print ("Theta = ", theta)

  #predict trained x
  xtest = np.array([[4,9]])
  xtest_b = np.c_[np.ones((xtest.shape[0],1)),xtest]
  y_p = xtest_b.dot(theta)
  print("y predict = ",y_p)

start main
(3, 3)
(3, 1)
Num of data =  3
Init theta.shape =  (3, 1)
First J =  26.680960520609034
       Converged, iterations:  240507 / 1000000
Theta =  [[ 7.]
 [15.]
 [-6.]]
y predict =  [[13.]]


In [4]:
#Mini-batch GD (size = 2)

import numpy as np
np.set_printoptions(precision=2)
from sklearn.datasets import make_regression

x = np.array([[0,1],[2,6],[3,8]]) #x1, x2
y = np.array([1,1,4])

x_b = np.c_[np.ones((x.shape[0],1)),x]

def cost_function(theta, x, y, N):
  y_hat = x.dot(theta)
  c = (1/N)*np.sum((y_hat-y)**2)
  return c

def mini_batch_gradient_descent(alpha, x, y, batch_size=2, ep=0.001, max_iter=10000):
  converged = False
  iter = 0
  N = x.shape[0] # number of samples
  print("Num of data = ", N)

  # initial theta
  theta = np.random.random((x.shape[1],1))
  print("Init theta.shape = ",theta.shape)

  # total error, J(theta)
  J = cost_function(theta, x, y, N)
  print("First J = ",J)

  # Iterate Loop
  while not converged:
    indices = np.arange(N)
    np.random.shuffle(indices)  # Shuffle indices to ensure randomness
    for start in range(0, N, batch_size):
      end = min(start + batch_size, N)
      batch_indices = indices[start:end]
      x_batch = x[batch_indices]
      y_batch = y[batch_indices]

      y_hat = x_batch.dot(theta)
      diff = y_hat - y_batch
      grad = x_batch.T.dot(diff) / len(y_batch)

      theta = theta - alpha * grad

      # error
      J2 = cost_function(theta, x, y, N)

      if abs(J - J2) <= ep:
          print("       Converged, iterations: ", iter, "/", max_iter)
          converged = True
          break

      J = J2   # update error
      iter += 1  # update iter

      if iter == max_iter:
          print('       Max iterations exceeded!')
          converged = True
          break

  #print("End converged iter = ",iter)
  return theta

if __name__ == '__main__':

  print("start main")
  print(x_b.shape)
  y = y.reshape(-1,1)
  print(y.shape)

  alpha = 0.01 # learning rate
  #Training process
  theta = mini_batch_gradient_descent(alpha, x_b, y, batch_size=2, ep=0.000000000001, max_iter=1000000)
  print ("Theta = ", theta)

  #predict trained x
  xtest = np.array([[4,9]])
  xtest_b = np.c_[np.ones((xtest.shape[0],1)),xtest]
  y_p = xtest_b.dot(theta)
  print("y predict = ",y_p)

start main
(3, 3)
(3, 1)
Num of data =  3
Init theta.shape =  (3, 1)
First J =  14.151858235162663
       Converged, iterations:  225124 / 1000000
Theta =  [[ 7.  ]
 [14.99]
 [-6.  ]]
y predict =  [[13.]]
