In [None]:
def f1(x):
    return 0.01 * x**2 + 0.1 * x

def numerical_diff(f, x):
    h = 1e-4
    return (f(x + h) - f(x - h)) / (2*h)

def diff_tangent(f, x0):
    alpha = numerical_diff(f, x0)
    return lambda x: alpha * (x - x0) + f(x0)

%matplotlib inline
x = np.arange(0., 100., 0.1)
y = f1(x)
plt.plot(x, y)
plt.plot(x, diff_tangent(f1, 50)(x))

In [None]:
def numerical_gradient(f, x):
    h = 1e-4
    grad = np.zeros_like(x)
    
    for i in range(x.size):
        hs = np.zeros_like(x)
        hs[i] = h
        grad[i] = (f(x + hs) - f(x - hs)) / (2 * h)
    return grad

def f2(x):
    return sum(x**2)

numerical_gradient(f2, np.array([0., 2.]))

In [None]:
def gradient_descent(f, init_x, lr=0.01, step_num=100):
    x = init_x
    for i in range(step_num):
        grad = numerical_gradient(f, x)
        x -= lr * grad
    return x

init_x = np.array([3., 4.])
gradient_descent(f2, init_x, 0.1, 100)


In [None]:
def plot_gradient_descent(f, init_x, lr=0.01, step_num=100, step_plot=10):
    x = init_x
    xs = [x.copy()]
    for _ in range(int(step_num/step_plot)):
        x = gradient_descent(f, x, lr=lr, step_num=step_plot)
        xs.append(x.copy())
    xs = np.array(xs)
    plt.plot(xs[:, 0], xs[:, 1], 'ro', ms=6)
    plt.show()

%matplotlib inline
init_x = np.array([3., 4.])
plot_gradient_descent(f2, init_x, 0.1, 100, 1)

In [None]:
import importlib
importlib.reload(gradient_simplenet)
#import gradient_simplenet

net = gradient_simplenet.simpleNet()
print(net.W)
x = np.array([0.6, 0.9])
p = net.predict(x)
print(p)
print(np.argmax(p))
label = np.array([0, 0, 1])
net.loss(x, label)

In [None]:
from common import gradient
import importlib
importlib.reload(gradient)

def f(W):
    net.W = W
    return net.loss(x, label)

dW = gradient.numerical_gradient(f, net.W)
print(dW)

In [None]:
import two_layers_net
importlib.reload(two_layers_net)
net = two_layers_net.TwoLayersNet(3, 5, 2)

x = np.array([1, 2, 3])
label = np.array([1, 1])
net.numerical_gradient(x, label)

In [None]:
import two_layers_net
import importlib
importlib.reload(two_layers_net)
net = two_layers_net.TwoLayersNet(784, 100, 10)
print(net.params['W1'].shape)
print(net.params['b1'].shape)

In [None]:
x = np.random.rand(100, 784)
y = np.random.rand(100, 10)
net.loss(x, y)
grads = net.numerical_gradient(x, y)

In [None]:
grads

In [None]:
sys.path.append(os.pardir)
from dataset.mnist import load_mnist
from two_layers_net import TwoLayersNet

(x_train, y_train), (x_test, y_test) =\
    load_mnist(normalize=True, one_hot_label=True)

iter_num = 10000
batch_size = 100
learning_rate = 0.1

net = TwoLayersNet(784, 50, 10)

losses_train = []
acc_trains, acc_tests = [], []
iter_per_epoch = int(x_train.shape[0]/batch_size)
n = 1
for i in range(iter_num):
    batch_mask = np.random.choice(x_train.shape[0], batch_size)
    x_batch, y_batch = x_train[batch_mask], y_train[batch_mask]

    grads = net.numerical_gradient(x_batch, y_batch)
    for k in net.params.keys():
        net.params[k] -= learning_rate * grads[k]

    losses_train.append(net.loss(x_batch, y_batch))
    if i % iter_per_epoch == 0:
        acc_trains.append(net.accuracy(x_train, y_train))
        acc_tests.append(net.accuracy(x_test, y_test))
        print('%d epoch:: acc_train: %s,  acc_test: %s' % (n, acc_trains[-1], acc_tests[-1]))
        n += 1


1 epoch:: acc_train: 0.09035,  acc_test: 0.0892


In [None]:
%matplotlib inline
plt.plot(losses_train)
plt.show()

