In [None]:
import numpy as np
# softmax stable function


def stable_softmax(x):
    e_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return e_x / e_x.sum(axis=1, keepdims=True)


def softmax_loss(X, y, W):
    A = stable_softmax(X.dot(W))
    idx = range(A.shape[0])
    return -np.mean(np.log(A[idx, y]))+0.5*np.sum(W**2)*1/A.shape[0]


def softmax_grad(X, y, W):
    """
    W: 2d numpy array of shape (d, C),
    each column correspoding to one output node
    X: 2d numpy array of shape (N, d), each row is one data point
    y: 1d numpy array -- label of each row of X
    """
    A = stable_softmax(X.dot(W))  # shape of (N, C)
    id0 = range(X.shape[0])
    A[id0, y] -= 1  # A - Y, shape of (N, C)
    # return X.T.dot(A)/X.shape[0]
    return A


# test
X = np.array([[3, 3, 5], [1, 2, -1], [3, 3, 5], [1, 2, -1]])
y = np.array([0, 2, 1, 1])
W = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
# print(softmax_loss(X, y, W))
A = stable_softmax(X.dot(W))
print(A)
# print(np.argmax(A, axis=1))
print(softmax_grad(X, y, W))


In [None]:
# Neural Network on MNIST using scikit-learn
# Path: Python\mnist_sklearn.ipynb
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import joblib
import matplotlib.pyplot as plt
import time
mnist = fetch_openml('mnist_784', version=1, cache=True)
X = mnist.data
y = mnist.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
mlp = MLPClassifier(hidden_layer_sizes=(100, 100, 100), max_iter=1000,random_state=3,alpha=0.001)
start = time.time()
mlp.fit(X_train, y_train)
end = time.time()
print("Training time: ", end-start)
predictions = mlp.predict(X_test)
print(accuracy_score(y_test, predictions))
joblib.dump(mlp, 'mlp.pkl')
mlp = joblib.load('mlp.pkl')
predictions = mlp.predict(X_test)
print(accuracy_score(y_test, predictions))
