# 0.5_ss_chap6
6章の実装　


In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%reload_ext autoreload

In [3]:
from src import utils
import itertools
import logging
import numpy as np
import matplotlib.pylab as plt
%matplotlib inline

In [4]:
log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
# logging.basicConfig(level=logging.INFO, format=log_fmt)
logging.basicConfig(level=logging.DEBUG, format=log_fmt)
logger = logging.getLogger()

# 中身

In [5]:
class SGD:
    ''' Simple stochasitc gradient descent
    '''
    def __init__(self, lr=0.01):
        self.lr = lr
    
    def update(self, params, grads):
        for key in params.keys():
            params[key] -= self.lr * grads[key]
    

### 関数を書いてみる

In [6]:
def bowl(x):
    ''' f(x,y) = 1/20 * x[0]^2 + x[1]^2
    ベクトルで入ってきた場合と、2次元（マトリックス）で入ってき場合に対応する
    
    Parameters
    ----------
    x: np.ndarray
        x values
    y: np.ndarray
        y values
    
    Returns
    -------
    f(x,y) = 1/20 * x^2 + y^2
    '''
    # takes numpy as input
    if type(x) != np.ndarray:
        raise ValueError("Only takes numpy.array as input")
        
    # 点
    if x.ndim == 1:
                return 1/20*x[0]**2 + x[1]**2
    
    # x should be 2 * N shape
    if x.shape[0] != 2:
        raise ValueError(f"x is {x.shape}, takes 2*N only")
    
    # ベクトル
    if x.ndim == 2:
        return 1/20*x[0] **2 + x[1]**2

    # それ以上　
    raise ValueError("Not implemented")
    


In [35]:
def bowl_grad(x):
        ''' Check that numerical gradient is correct
        f(x,y) = 1/20 * x^2 + y^2
        df/dx = x/10
        df/dy = 2*y
        '''
        # takes numpy as input
        if type(x) != np.ndarray:
                raise ValueError("Only takes numpy.array as input")

        # 点
        if x.ndim == 1:
                dx = x[0] / 10
                dy = x[1] * 2
                return np.array([dx, dy])

        # それ以上　
        raise ValueError("Not implemented")

In [7]:
x = np.array([3, 5])

In [8]:
x = x.astype(float)

In [9]:
x.shape

(2,)

In [10]:
x.shape

(2,)

In [11]:
x.reshape(1, -1).shape

(1, 2)

In [12]:
original_shape = x.shape

In [13]:
original_shape

(2,)

In [14]:
x_reshaped = x.reshape(1, -1)

In [15]:
x_reshaped

array([[3., 5.]])

In [16]:
x_reshaped.ndim

2

In [17]:
x_reshaped.shape

(1, 2)

In [18]:
x_reshaped[0][0]

3.0

In [19]:
x_reshaped.reshape(original_shape)

array([3., 5.])

In [20]:
x_reshaped.reshape(original_shape)[0]

3.0

In [21]:
x.ndim

1

In [22]:
X = np.array([[3.,4.,3.],[5.,6.,7.]])

In [23]:
bowl(X) 

array([25.45, 36.8 , 49.45])

### 勾配を書いてみる

In [24]:
x0 = np.arange(-2, 2.5, .25)
x1 = np.arange(-2, 2.5, .25)
X, Y = np.meshgrid(x0, x1)
X = X.flatten()
Y = Y.flatten()

In [25]:
np.array([X, Y]).ndim

2

In [26]:
xbig = np.array([X, Y])

In [27]:
xbig.shape

(2, 324)

In [28]:
xbig.shape

(2, 324)

In [29]:
xbig.ndim

2

# あの numerical_gradient の中身はなんなんだ
- 一旦諦める

In [30]:
def _numerical_gradient_no_batch(f, x):
        ''' Calcuate gradient at point x for f
        We need to check for the shape. 
        The shape for this input should be 1 dim.
        
        Parameters
        f: function
                the input shape for the function should be [num_rows, 1] 
        x_original: np.ndarray
                input. of shape or [num_rows, 1]
        '''
        # assert that this is one dimentional
        assert(x.ndim == 1)
        # turn into float
        x = x.astype(float)
        
        # reshape x so that is [1, num_cols]
        h = 1e-4
        
        # make grads
        grad = np.zeros_like(x)
        logger.debug(f"grad shape is {grad.shape}")
        
        # gradient for each point
        for idx in range(x.size):
                logger.debug(f"idx is {idx}")
                tmp_val = float(x[idx])
                # calculate f(x+h)
                x[idx] = tmp_val + h
                logger.debug(f"new x is {x}")
                fxh1 = f(x)
                logger.debug(f"f(xh1) is {fxh1}")
                
                # f(x-h)
                x[idx] = tmp_val - h
                logger.debug(f"new x is {x}")
                fxh2 = f(x)
                logger.debug(f"f(xh2) is {fxh2}")
                
                grad[idx] = (fxh1 - fxh2)/(2*h)
                logger.debug(f"grad[idx] is {grad[idx]}")
                x[idx] = tmp_val
        
        return grad

In [36]:
x = np.array([0.1, 1.5])

In [37]:
_numerical_gradient_no_batch(bowl,x ) 

2018-11-10 16:11:21,142 - root - DEBUG - grad shape is (2,)
2018-11-10 16:11:21,145 - root - DEBUG - idx is 0
2018-11-10 16:11:21,148 - root - DEBUG - new x is [0.1001 1.5   ]
2018-11-10 16:11:21,149 - root - DEBUG - f(xh1) is 2.2505010005
2018-11-10 16:11:21,152 - root - DEBUG - new x is [0.0999 1.5   ]
2018-11-10 16:11:21,154 - root - DEBUG - f(xh2) is 2.2504990005
2018-11-10 16:11:21,155 - root - DEBUG - grad[idx] is 0.009999999999177334
2018-11-10 16:11:21,156 - root - DEBUG - idx is 1
2018-11-10 16:11:21,159 - root - DEBUG - new x is [0.1    1.5001]
2018-11-10 16:11:21,161 - root - DEBUG - f(xh1) is 2.2508000100000003
2018-11-10 16:11:21,163 - root - DEBUG - new x is [0.1    1.4999]
2018-11-10 16:11:21,165 - root - DEBUG - f(xh2) is 2.2502000100000004
2018-11-10 16:11:21,167 - root - DEBUG - grad[idx] is 2.9999999999996696


array([0.01, 3.  ])

In [38]:
bowl_grad(x)

array([0.01, 3.  ])

In [39]:
debug = True

In [43]:
idx = 11

In [45]:
if debug and (idx > 10):
        break

SyntaxError: 'break' outside loop (cell_name, line 5)

In [85]:
x0 = np.arange(-2, -1.5, .25)
x1 = np.arange(-2, -1.5, .25)
X, Y = np.meshgrid(x0, x1)
X = X.flatten()
Y = Y.flatten()

In [86]:
x_debug = np.array([X, Y])

In [87]:
x_debug

array([[-2.  , -1.75, -2.  , -1.75],
       [-2.  , -2.  , -1.75, -1.75]])

In [51]:
x_debug.shape

(2, 4)

In [54]:
grad = numerical_gradient(bowl, x_debug,debug = True)

2018-11-11 00:41:27,826 - root - INFO - debug: True
2018-11-11 00:41:27,827 - root - DEBUG - idx is 0
2018-11-11 00:41:27,828 - root - DEBUG - grad shape is (4,)
2018-11-11 00:41:27,828 - root - DEBUG - idx is 0
2018-11-11 00:41:27,829 - root - DEBUG - new x is [-1.9999 -1.75   -2.     -1.75  ]
2018-11-11 00:41:27,830 - root - DEBUG - f(xh1) is 3.2624800005
2018-11-11 00:41:27,831 - root - DEBUG - new x is [-2.0001 -1.75   -2.     -1.75  ]
2018-11-11 00:41:27,832 - root - DEBUG - f(xh2) is 3.2625200005
2018-11-11 00:41:27,833 - root - DEBUG - grad[idx] is -0.1999999999990898
2018-11-11 00:41:27,833 - root - DEBUG - idx is 1
2018-11-11 00:41:27,834 - root - DEBUG - new x is [-2.     -1.7499 -2.     -1.75  ]
2018-11-11 00:41:27,835 - root - DEBUG - f(xh1) is 3.26215001
2018-11-11 00:41:27,836 - root - DEBUG - new x is [-2.     -1.7501 -2.     -1.75  ]
2018-11-11 00:41:27,837 - root - DEBUG - f(xh2) is 3.26285001
2018-11-11 00:41:27,838 - root - DEBUG - grad[idx] is -3.5000000000007248
20

In [55]:
grad

array([[-0.2, -3.5,  0. ,  0. ],
       [-0.2, -4. ,  0. ,  0. ]])

In [57]:
x_short = np.array([0.1, 1.5])

In [58]:
numerical_gradient(bowl, x_short)

2018-11-11 00:44:40,871 - root - INFO - debug: False
2018-11-11 00:44:40,874 - root - DEBUG - grad shape is (2,)
2018-11-11 00:44:40,877 - root - DEBUG - idx is 0
2018-11-11 00:44:40,878 - root - DEBUG - new x is [0.1001 1.5   ]
2018-11-11 00:44:40,879 - root - DEBUG - f(xh1) is 2.2505010005
2018-11-11 00:44:40,881 - root - DEBUG - new x is [0.0999 1.5   ]
2018-11-11 00:44:40,884 - root - DEBUG - f(xh2) is 2.2504990005
2018-11-11 00:44:40,885 - root - DEBUG - grad[idx] is 0.009999999999177334
2018-11-11 00:44:40,886 - root - DEBUG - idx is 1
2018-11-11 00:44:40,887 - root - DEBUG - new x is [0.1    1.5001]
2018-11-11 00:44:40,890 - root - DEBUG - f(xh1) is 2.2508000100000003
2018-11-11 00:44:40,892 - root - DEBUG - new x is [0.1    1.4999]
2018-11-11 00:44:40,893 - root - DEBUG - f(xh2) is 2.2502000100000004
2018-11-11 00:44:40,894 - root - DEBUG - grad[idx] is 2.9999999999996696


array([0.01, 3.  ])

In [59]:
bowl_grad(x_short)

array([0.01, 3.  ])

In [61]:
from src.common.gradient import numerical_gradient

In [62]:
# 結局これは、値（スカラーでもベクトルでも）の点ごとの grad を取っている
numerical_gradient(bowl, x_short)

array([0.01, 3.  ])

In [64]:
# 値の array に対して取る場合は、その外側からループを回さないとならない
x_debug

array([[-2.0001, -1.75  , -2.    , -1.75  ],
       [-2.    , -2.    , -1.75  , -1.75  ]])

In [65]:
np.meshgrid(x0, x1)

[array([[-2.  , -1.75],
        [-2.  , -1.75]]), array([[-2.  , -2.  ],
        [-1.75, -1.75]])]

In [68]:
mesh = np.meshgrid(x0, x0)
mesh

[array([[-2.  , -1.75],
        [-2.  , -1.75]]), array([[-2.  , -2.  ],
        [-1.75, -1.75]])]

In [70]:
grad = np.zeros_like(mesh)
grad

array([[[0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.]]])

In [71]:
for idx, x in enumerate(mesh):
        print(f"idx: {idx}, x: {x}")
        

idx: 0, x: [[-2.   -1.75]
 [-2.   -1.75]]
idx: 1, x: [[-2.   -2.  ]
 [-1.75 -1.75]]


In [73]:
x_debug_transposed = x_debug.transpose()
x_debug_transposed

array([[-2.0001, -2.    ],
       [-1.75  , -2.    ],
       [-2.    , -1.75  ],
       [-1.75  , -1.75  ]])

In [84]:
x_debug

array([[-2.0001, -1.75  , -2.    , -1.75  ],
       [-2.    , -2.    , -1.75  , -1.75  ]])

In [79]:
grad= np.zeros_like(x_debug_transposed)
grad

array([[0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.]])

In [80]:
for idx, x in enumerate(x_debug_transposed):
        print(f"idx: {idx}, x: {x}")
        grad[idx] = numerical_gradient(bowl, x)

idx: 0, x: [-2.0001 -2.    ]
idx: 1, x: [-1.75 -2.  ]
idx: 2, x: [-2.   -1.75]
idx: 3, x: [-1.75 -1.75]


In [82]:
grad

array([[-0.20001, -4.     ],
       [-0.175  , -4.     ],
       [-0.2    , -3.5    ],
       [-0.175  , -3.5    ]])

In [83]:
x_debug_transposed[0]

array([-2.0001, -2.    ])

In [89]:
x_debug_transposed.shape

(4, 2)