In [None]:
def f1(x):
    return 0.01 * x**2 + 0.1 * x

def numerical_diff(f, x):
    h = 1e-4
    return (f(x + h) - f(x - h)) / (2*h)

def diff_tangent(f, x0):
    alpha = numerical_diff(f, x0)
    return lambda x: alpha * (x - x0) + f(x0)

%matplotlib inline
x = np.arange(0., 100., 0.1)
y = f1(x)
plt.plot(x, y)
plt.plot(x, diff_tangent(f1, 50)(x))

In [None]:
def numerical_gradient(f, x):
    h = 1e-4
    grad = np.zeros_like(x)
    
    for i in range(x.size):
        hs = np.zeros_like(x)
        hs[i] = h
        grad[i] = (f(x + hs) - f(x - hs)) / (2 * h)
    return grad

def f2(x):
    return sum(x**2)

numerical_gradient(f2, np.array([0., 2.]))

In [None]:
def gradient_descent(f, init_x, lr=0.01, step_num=100):
    x = init_x
    for i in range(step_num):
        grad = numerical_gradient(f, x)
        x -= lr * grad
    return x

init_x = np.array([3., 4.])
gradient_descent(f2, init_x, 0.1, 100)


In [None]:
def plot_gradient_descent(f, init_x, lr=0.01, step_num=100, step_plot=10):
    x = init_x
    xs = [x.copy()]
    for _ in range(int(step_num/step_plot)):
        x = gradient_descent(f, x, lr=lr, step_num=step_plot)
        xs.append(x.copy())
    xs = np.array(xs)
    plt.plot(xs[:, 0], xs[:, 1], 'ro', ms=6)
    plt.show()

%matplotlib inline
init_x = np.array([3., 4.])
plot_gradient_descent(f2, init_x, 0.1, 100, 1)

In [None]:
import importlib
importlib.reload(gradient_simplenet)
#import gradient_simplenet

net = gradient_simplenet.simpleNet()
print(net.W)
x = np.array([0.6, 0.9])
p = net.predict(x)
print(p)
print(np.argmax(p))
label = np.array([0, 0, 1])
net.loss(x, label)

In [None]:
from common import gradient
import importlib
importlib.reload(gradient)

def f(W):
    net.W = W
    return net.loss(x, label)

dW = gradient.numerical_gradient(f, net.W)
print(dW)

In [None]:
import two_layers_net
importlib.reload(two_layers_net)
net = two_layers_net.TwoLayersNet(3, 5, 2)

x = np.array([1, 2, 3])
label = np.array([1, 1])
net.numerical_gradient(x, label)

In [1]:
import two_layers_net
import importlib
importlib.reload(two_layers_net)
net = two_layers_net.TwoLayersNet(784, 100, 10)
print(net.params['W1'].shape)
print(net.params['b1'].shape)

(784, 100)
(100,)


In [2]:
x = np.random.rand(100, 784)
y = np.random.rand(100, 10)
net.loss(x, y)
grads = net.numerical_gradient(x, y)

In [3]:
grads

{'W1': array([[ 0.00129319,  0.00036092,  0.00152131, ..., -0.00092054,
          0.00094277, -0.00032491],
        [ 0.00155893,  0.00050255,  0.00177216, ..., -0.0010212 ,
          0.00106366, -0.00041425],
        [ 0.00135872,  0.0004137 ,  0.00157107, ..., -0.00097502,
          0.00094471, -0.00037837],
        ..., 
        [ 0.00146235,  0.00043081,  0.00166614, ..., -0.001001  ,
          0.0010254 , -0.00036133],
        [ 0.00152687,  0.00049546,  0.00170097, ..., -0.00102308,
          0.00105349, -0.00043294],
        [ 0.00153715,  0.00048843,  0.00173592, ..., -0.00107368,
          0.00110791, -0.00041002]]),
 'W2': array([[-0.13534155, -0.12367435, -0.12323215, -0.13711163, -0.14132182,
         -0.13970287, -0.14991459, -0.13880626, -0.13870509, -0.13525405],
        [-0.12576651, -0.11520061, -0.11395618, -0.12723957, -0.13110292,
         -0.12902681, -0.13905827, -0.12867064, -0.12809079, -0.12527782],
        [-0.1319527 , -0.12043073, -0.11946416, -0.13320648, -