In [4]:
import numpy as np

def softmax(Z):
    # Compute softmax values for each sets of scores in Z each column of Z is a set of scores.
    # Z: a numpy array of shape (N, C)
    # return a numpy array of shape (N, C)
    e_Z = np.exp(Z)
    A = e_Z / e_Z.sum(axis = 1, keepdims = True)
    return A

print(softmax(np.array([[2, 2, 2], [2, 1.8, 0], [-1, -1.5, -2]])))

[[0.33333333 0.33333333 0.33333333]
 [0.51175343 0.41898827 0.0692583 ]
 [0.50648039 0.30719589 0.18632372]]


In [5]:
# phien ban on dinh hon cua ham softmax (chong tran so)
def softmax_stable(Z):
    # Compute softmax values for each set of scores in Z each row of Z is a set of scores.
    # Z is N * C
    e_Z = np.exp(Z - np.max(Z, axis=1, keepdims=True))
    A = e_Z / e_Z.sum(axis = 1, keepdims = True)
    return A

print(softmax_stable(np.array([[2, 2, 2], [2, 1.8, 0], [-1, -1.5, -2]])))

[[0.33333333 0.33333333 0.33333333]
 [0.51175343 0.41898827 0.0692583 ]
 [0.50648039 0.30719589 0.18632372]]


In [9]:
a = np.array([[1, 2], [3, 4], [5, 6]])

b = a.sum(axis=1, keepdims=True)
print(b)

[ 3  7 11]


In [6]:
def softmax_loss(X, y, W):
    # W: 2d numpy array of shape (d, C), each column correspoding to one output node
    # X: 2d numpy array of shape (N, d), each row is one data point
    # y: 1d numpy array -- label of each row of X

    A = softmax_stable(X.dot(W))
    id0 = range(X.shape[0]) # range(N), indexes in axis 0 is id0, indexes in axis 1 is y
    return -np.mean(np.log(A[id0, y]))



In [7]:
def softmax_grad(X, y, W):
    # W: 2d numpy array of shape (d, C), each column correspoding to one output node
    # X: 2d numpy array of shape (N, d), each row is one data point
    # y: 1d numpy array -- label of each row of X
    A = softmax_stable(X.dot(W))
    id0 = range(X.shape[0])
    A[id0, y] -= 1
    return X.T.dot(A) / X.shape[0]


In [8]:
def softmax_fit(X, y, W, lr = 0.01, nepoches = 100, tol = 1e-5, batch_size = 10):
    W_old = W.copy()
    ep = 0
    loss_hist = [softmax_loss(X, y, W)] # store loss history
    N = X.shape[0]
    nbatches = int(np.ceil(float(N) / batch_size))
    while ep < nepoches:
        ep += 1
        mix_ids = np.random.permutation(N)
        for i in range(nbatches):
            # get the i-th batch
            batch_ids = mix_ids[batch_size * i : min(batch_size * (i + 1), N)]
            X_batch = X[batch_ids]
            y_batch = y[batch_ids]
            W = -lr * softmax_grad(X_batch, y_batch, W) # gradient descent
            loss_hist.append(softmax_loss(X, y, W))
            if np.linalg.norm(W - W_old) / W.size < tol:
                print(ep)
                break
            W_old = W.copy()
    return W, loss_hist

In [9]:
def pred(W, X):
    return np.argmax(X.dot(W), axis=1)

In [9]:
a = np.array([[10, 2, 3], [4, 5, 6]])
print(np.argmax(a, axis=0))

[0 1 1]


In [10]:
# Vi du bang data 
C, N = 5, 500
means = [[2, 2], [8, 3], [3, 6], [14, 2], [12, 8]]
cov = [[1, 0], [0, 1]]
X0 = np.random.multivariate_normal(means[0], cov, N)
X1 = np.random.multivariate_normal(means[1], cov, N)
X2 = np.random.multivariate_normal(means[2], cov, N)
X3 = np.random.multivariate_normal(means[3], cov, N)
X4 = np.random.multivariate_normal(means[4], cov, N)

X = np.concatenate((X0, X1, X2, X3, X4), axis = 0) # each row is a datapoint
Xbar = np.concatenate((X, np.ones((X.shape[0], 1))), axis = 1) # bias trick

y = np.asarray([0]*N + [1]*N + [2]*N+ [3]*N + [4]*N) # label


In [13]:
W_init = np.random.randn(Xbar.shape[1], C)
W, loss_hist = softmax_fit(Xbar, y, W_init, lr = 0.01)

print(pred(W, np.array([[14.2, 2.1, 1]])))

[4]


In [11]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(C = 1e5, solver='lbfgs', multi_class='multinomial')
model.fit(X, y)

X_test = np.array([[14.2, 2.1], [3.1, 6]])

y_pred = model.predict(X_test)
print(y_pred)

[3 2]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [33]:
X_test = np.array([[14.2, 2.1, 1], [3.1, 6, 1]])
# print(pred(W, Xbar[0:100]))
print(Xbar[0:1].dot(W))
# print(Xbar[0:10])

[[-0.05729193 -0.22016065  0.55028735 -1.06914942  0.79631464]]
