# 직접 미분

In [None]:
import sympy

x = sympy.Symbol('x')
f = (x**2 + 2 * x) * sympy.log(x)
df = sympy.diff(f, x)

print(df)
print(sympy.simplify(df))

In [5]:
import numpy as np

f = lambda x : (x**2 + 2 * x) * np.log(x)
df = lambda x : 2 * (x + 1) * np.log(x) + (x + 2)

print(f(1))
print(df(1))

0.0
3.0


### 수치 미분

In [15]:
def numer_deriv(f, x, h=0.001, method="center"):
    '''
    {f(x+h) - f(x)} / h 을 수치적으로 계산
    
    f: 미분할 함수로 주어진 위치에서 함숫값 계산을 위해 사용
    x: 미분계수를 구할 변수의 위치로
        일변수인 경우 itn 또는 float
        다변수인 경우 넘파이 어레이 (d,) 벡터
    h: 비율을 구할 작은 구간
    '''
    if type(x) in (float, int):
        grad = [0.0]
        x_ = [x]
        var_type = 'scalar'
    else:
        grad = np.zeros(x.shape)
        x_ = x.copy().astype('float32')
        var_type = 'vector'
    
    for i, xi in enumerate(x_):
        original_value = x_[i]
        
        if method == 'forward':
            x_[i] = original_value + h
        else:
            x_[i] = original_value + (h/2)
        
        if var_type == 'scalar':
            gradplus = f(x_[i])
        else:
            gradplus = f(x_)
        
        if method == 'forward':
            x_[i] = original_value
        else:
            x_[i] = original_value - (h/2)
        
        if var_type == 'scalar':
            gradminus = f(x_[i])
        else:
            gradminus = f(x_)
        
        grad[i] = (gradplus - gradminus) / h
    
    if var_type == 'scalar':
        return grad[0]
    else:
        return grad

In [12]:
numer_deriv(f, 1)

2.999999999999666

In [16]:
print(numer_deriv(f, 1, h=0.5, method="forward"))

print(numer_deriv(f, 1, h=0.5, method="center"))

4.257383635135726
2.9997299032915508


In [19]:
f_xy = lambda x : (x[0]**2 + 2 * x[0]) * np.log(x[1])
numer_deriv(f_xy, np.array([1, 2]))

array([2.77255299, 1.49889143])

In [23]:
# 심파이로 직접 편미분

import sympy

x = sympy.Symbol('x')
y = sympy.Symbol('y')
f_xy_sympy = (x**2 + 2 * x) * sympy.log(y)
df_xy_x = sympy.diff(f_xy_sympy, x)
df_xy_y = sympy.diff(f_xy_sympy, y)

print(df_xy_x)
print(df_xy_y)

print("{:.4f}".format(df_xy_x.evalf(subs={x: 1.0, y: 2.0})))
print("{:.4f}".format(df_xy_y.evalf(subs={x: 1.0, y: 2.0})))

(2*x + 2)*log(y)
(x**2 + 2*x)/y
2.7726
1.5000


### 자동 미분

In [27]:
import numpy as np
import torch

np.random.seed(0)

x = np.random.rand(6).reshape(2, 3)

x_tensor = torch.tensor(x)
x_from_numpy = torch.from_numpy(x)
x_Tensor = torch.Tensor(x)
x_as_tensor = torch.as_tensor(x)

print(x, x.dtype)
print(x_tensor, x_tensor.dtype, x_tensor.requires_grad)
print(x_from_numpy, x_from_numpy.dtype, x_from_numpy.requires_grad)
print(x_Tensor, x_Tensor.dtype, x_Tensor.requires_grad)
print(x_as_tensor, x_as_tensor.dtype, x_as_tensor.requires_grad)

# 원 데이터에서 요소 하나 변경
print("===== 요소 변경 =====")
x[0, 0] = 100

print(x, x.dtype)
print(x_tensor, x_tensor.dtype, x_tensor.requires_grad)
print(x_from_numpy, x_from_numpy.dtype, x_from_numpy.requires_grad)
print(x_Tensor, x_Tensor.dtype, x_Tensor.requires_grad)
print(x_as_tensor, x_as_tensor.dtype, x_as_tensor.requires_grad)

[[0.5488135  0.71518937 0.60276338]
 [0.54488318 0.4236548  0.64589411]] float64
tensor([[0.5488, 0.7152, 0.6028],
        [0.5449, 0.4237, 0.6459]], dtype=torch.float64) torch.float64 False
tensor([[0.5488, 0.7152, 0.6028],
        [0.5449, 0.4237, 0.6459]], dtype=torch.float64) torch.float64 False
tensor([[0.5488, 0.7152, 0.6028],
        [0.5449, 0.4237, 0.6459]]) torch.float32 False
tensor([[0.5488, 0.7152, 0.6028],
        [0.5449, 0.4237, 0.6459]], dtype=torch.float64) torch.float64 False
===== 요소 변경 =====
[[100.           0.71518937   0.60276338]
 [  0.54488318   0.4236548    0.64589411]] float64
tensor([[0.5488, 0.7152, 0.6028],
        [0.5449, 0.4237, 0.6459]], dtype=torch.float64) torch.float64 False
tensor([[100.0000,   0.7152,   0.6028],
        [  0.5449,   0.4237,   0.6459]], dtype=torch.float64) torch.float64 False
tensor([[0.5488, 0.7152, 0.6028],
        [0.5449, 0.4237, 0.6459]]) torch.float32 False
tensor([[100.0000,   0.7152,   0.6028],
        [  0.5449,   0.4237,

In [29]:
# 자동 미분 설정 - requires_grad is True

x_tensor_grad = torch.tensor(x, requires_grad=True)
print(x_tensor_grad, x_tensor_grad.dtype, x_tensor_grad.requires_grad)

tensor([[100.0000,   0.7152,   0.6028],
        [  0.5449,   0.4237,   0.6459]], dtype=torch.float64,
       requires_grad=True) torch.float64 True


In [32]:
# torch.autograd.backward

x = torch.tensor([1.0], requires_grad=True)
f = (x**2 + 2 * x) * torch.log(x)

print(x)
print(f)
print(x.grad) # 연산 수행을 안 해서 None

torch.autograd.backward(f, retain_graph=True)
print(x.grad)

tensor([1.], requires_grad=True)
tensor([0.], grad_fn=<MulBackward0>)
None
tensor([3.])


In [35]:
# torch.autograd.grad

df = torch.autograd.grad(f, x, retain_graph=True)
print(df)

df = torch.autograd.grad(f, (x, x), retain_graph=True)
print(df)

(tensor([3.]),)
(tensor([3.]), tensor([3.]))


In [36]:
x = torch.tensor([1.0], requires_grad=True)
y = torch.tensor([2.0], requires_grad=True)
f_xy = (x**2 + 2 * x) * torch.log(y)

torch.autograd.backward(f_xy, retain_graph=True)
print(x.grad)

print(y.grad)

df = torch.autograd.grad(f_xy, (x, y), retain_graph=True)
print(df)

tensor([2.7726])
tensor([1.5000])
(tensor([2.7726]), tensor([1.5000]))


### 자동 미분 구현

In [4]:
def times(x, y):
    return x * y, (x, y)

def times_deriv(cache, dout=1):
    return cache[1] * dout, cache[0] * dout

TIMES = {'f': times, 'df': times_deriv}

v, cache = TIMES['f'](2, 3)
dx, dy = TIMES['df'](cache)

print("dx={}, dy={}".format(dx, dy))

dx=3, dy=2


In [5]:
#### 덧셈과 로그 사용

In [6]:
import numpy as np

def add(x, y):
    return x+y, (x, y)

def add_deriv(cache, dout=1):
    return dout, dout

ADD = {'f': add, 'df': add_deriv}

def log(x):
    return np.log(x), x

def log_deriv(cache, dout=1):
    return (1/cache) * dout

LOG = {'f': log, 'df': log_deriv}

In [1]:
import numpy as np

x = 1.; y = 2.

a, cache_a = TIMES['f'](x, x)
b, cache_b = TIMES['f'](2, x)
c, cache_c = ADD['f'](a, b)
d, cache_d = LOG['f'](y)
z, cache_z = TIMES['f'](c, d)

dx = dy = 0.

dc, dd = TIMES['df'](cache_z, 1)
dy = LOG['df'](cache_d, dd)
da, db = ADD['df'](cache_c, dc)
_, dx_ = TIMES['df'](cache_b, db); dx += dx_;
dx_, dx__ = TIMES['df'](cache_a, da); dx += dx_ + dx__

print("backward pass dx = {:.6f}, dy = {:.6f}".format(dx, dy))

NameError: name 'TIMES' is not defined

In [1]:
import torch

x = torch.tensor([1.], requires_grad=True)
y = torch.tensor([2.], requires_grad=True)
z = (x**2 + 2 * x) * torch.log(y)
dz = torch.autograd.grad(z, (x, y), grad_outputs=torch.tensor([2.]), retain_graph=True)
print(dz)

(tensor([5.5452]), tensor([3.]))
