In [1]:
from sklearn import datasets
import numpy

x_sparse, y = datasets.load_svmlight_file('diabetes')
x = x_sparse.todense()

print('Shape of x: ' + str(x.shape))
print('Shape of y: ' + str(y.shape))

Shape of x: (768, 8)
Shape of y: (768,)


In [2]:
# partition the data to training and test sets
n = x.shape[0]
n_train = 640
n_test = n - n_train

rand_indices = numpy.random.permutation(n)
train_indices = rand_indices[0:n_train]
test_indices = rand_indices[n_train:n]

x_train = x[train_indices, :]
x_test = x[test_indices, :]
y_train = y[train_indices].reshape(n_train, 1)
y_test = y[test_indices].reshape(n_test, 1)

print('Shape of x_train: ' + str(x_train.shape))
print('Shape of x_test: ' + str(x_test.shape))
print('Shape of y_train: ' + str(y_train.shape))
print('Shape of y_test: ' + str(y_test.shape))

Shape of x_train: (640, 8)
Shape of x_test: (128, 8)
Shape of y_train: (640, 1)
Shape of y_test: (128, 1)


In [3]:
# Standardization
import numpy

# calculate mu and sig using the training set
d = x_train.shape[1]
mu = numpy.mean(x_train, axis=0).reshape(1, d)
sig = numpy.std(x_train, axis=0).reshape(1, d)

# transform the training features
x_train = (x_train - mu) / (sig + 1E-6)

# transform the test features
x_test = (x_test - mu) / (sig + 1E-6)

print('test mean = ')
print(numpy.mean(x_test, axis=0))

print('test std = ')
print(numpy.std(x_test, axis=0))

test mean = 
[[-0.06369565  0.01628698  0.13345802 -0.0589218   0.12068269  0.11463593
   0.00526051  0.02654035]]
test std = 
[[0.92107512 1.0008797  0.92230626 1.0299508  1.32594655 0.91735475
  0.89912796 1.01847521]]


In [4]:
n_train, d = x_train.shape
x_train = numpy.concatenate((x_train, numpy.ones((n_train, 1))), axis=1)

n_test, d = x_test.shape
x_test = numpy.concatenate((x_test, numpy.ones((n_test, 1))), axis=1)

print('Shape of x_train: ' + str(x_train.shape))
print('Shape of x_test: ' + str(x_test.shape))

Shape of x_train: (640, 9)
Shape of x_test: (128, 9)


In [5]:
n,d = x_train.shape
rand_indices = numpy.random.permutation(n)
rand_indices.shape

(640,)

## Stochastic GD

In [24]:
xi = x_train[0, :] # 1-by-d matrix
yi = float(y_train[0, :]) # scalar
w = numpy.zeros((d, 1))
xi, yi, w

(matrix([[-0.55172346, -0.588728  ,  0.88580663,  0.96531245,  0.39090008,
           1.71225292, -1.02392748, -0.86969669,  1.        ]]),
 -1.0,
 array([[0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.]]))

In [51]:
xi = numpy.ones((5,4))
yi = numpy.ones((5,1))
yi[0, :] = -1
yi[3, :] = -1
w = numpy.ones((4,1))
xi.shape, yi.shape, w.shape

((5, 4), (5, 1), (4, 1))

In [54]:
yx = numpy.multiply(yi, xi)
yxw = numpy.dot(yx, w)
yxw

array([[-4.],
       [ 4.],
       [ 4.],
       [-4.],
       [ 4.]])

In [64]:
loss = numpy.log(1 + numpy.exp(-yxw))
reg = (lam / 2) * numpy.sum(w * w)
obj = (1/b) * numpy.sum(loss + reg)
obj

1.011344954948631

In [90]:
g_loss = numpy.multiply((1 / (1 + numpy.exp(yxw))), -yx).T + (lam + w)
g_loss

array([[1.98201479, 0.98201479, 0.98201479, 1.98201479, 0.98201479],
       [1.98201479, 0.98201479, 0.98201479, 1.98201479, 0.98201479],
       [1.98201479, 0.98201479, 0.98201479, 1.98201479, 0.98201479],
       [1.98201479, 0.98201479, 0.98201479, 1.98201479, 0.98201479]])

In [89]:
g.shape

array([0.86375924, 0.86375924, 0.86375924, 0.86375924])

## Mini-batch SGD

In [38]:
lam = 1E-6
b = 8
xi = x_train[rand_indices[0:b], :]
yi = y_train[rand_indices[0:b]].reshape((b, 1))
w = numpy.zeros((d,1))
yi.shape, xi.shape, w.shape

((8, 1), (8, 9), (9, 1))

In [39]:
n, d = xi.shape
yi = yi.reshape(n)
yi.shape

(8,)

In [34]:
yx = numpy.multiply(yi, xi)
yxw = numpy.dot(yx, w)
yx.shape, yxw.shape

((8, 9), (8, 1))

In [10]:
loss = numpy.log(1 + numpy.exp(-yxw))
reg = lam / 2 * numpy.sum(w * w)
obj = (1/b) * numpy.sum(loss + reg)  # scalar
obj

0.6931471805599453

In [18]:
g_loss = (-yx / (1 + numpy.exp(yxw)))
g_loss += (lam * w.T)
g_loss

ValueError: operands could not be broadcast together with shapes (9,8) (8,1) 

In [None]:
yx.T.shape

In [17]:
g = (1/b) * numpy.sum(g_loss)
g

-0.2720386778481628

In [None]:
iters = int(n / b)
start = 0
for i in range(iters):
    end = start + b
    print(start, end)
    xi = x[rand_indices[0:b], :]
    print(len(xi))
    yi = y[rand_indices[0:b]].reshape((b, 1))
    start = end