In [41]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import ticker, cm
import seaborn as sns
from ipywidgets import *
import math

sin = np.sin
cos = np.cos

In [42]:
#绘图元素比例  比较小
sns.set_context('paper', font_scale=2)
#设置显示中文字体
font = {'family' : 'SimHei',
#         'weight' : 'bold',
        'size'   : '15'}
plt.rc('font', **font)               # 步骤一（设置字体的更多属性）
plt.rc('axes', unicode_minus=False)  # 步骤二（解决坐标轴负数的负号显示问题）

In [43]:
#定义那个恐怖的目标函数
def f_2d(x,y):
    f=10*((1/20)*(x**2)+cos(x)+(sin(3*x))/3+(cos(5*x))/5+(sin(7*x))/7+(1/20)*(y**2)+cos(y)+(sin(3*y))/3+(cos(5*y))/5+(sin(7*y))/7+2)
    return f

def f_2d_e(x,y):
    f = 0.5*x**2+2*y**2+sin(x*y-3.14159265358/2)+1
    return f

In [44]:
#计算梯度
def f_grad(x,y):
    dfdx = 10*((1/10)*x-sin(x)+cos(3*x)-sin(5*x)+cos(7*x))
    dfdy = 10*((1/10)*y-sin(y)+cos(3*y)-sin(5*y)+cos(7*y))
    return dfdx,dfdy

def f_grad_e(x,y):
    dfdx = x+y*cos(x*y-3.14159265358/2)
    dfdy = 4*y+x*cos(x*y-3.14159265358/2)
    return dfdx,dfdy
    

In [45]:
#自定义的训练器
def train_2d(trainer,lr=0.001,epoch=50,init_x=-4,init_y=-4):
    """cyc自定义的训练器
        lr: learning rate
        epoch：轮次
        init_x,init_y:二者的初始值
    """
    x, y = init_x, init_y
    s_x, s_y = 0, 0
    path = [(x, y)]
    for i in range(epoch):
        x, y, s_x, s_y, lr = trainer(x, y, s_x, s_y, lr)
        path.append((x, y))
    return path

In [46]:
#不妨可视化下
def plot_2d(res, figsize=(10, 8), title=None):
    x_, y_ = zip(*res)
    fig = plt.figure(figsize=figsize)
    plt.plot([-2.93], [-2.93], 'r*', ms=5)
    plt.text(-2.93, -2.93, '最小值', color='k')
#     plt.plot([0], [0], 'r*', ms=5)
#     plt.text(0, 0, '最小值', color='k')
    plt.plot(x_[0], y_[0], 'ro', ms=10)
    plt.text(x_[0]+0.2, y_[0]-0.15, '起点', color='k')
    plt.plot(x_, y_, '-o', color='r')
    
    plt.plot(x_[-1], y_[-1], 'bo',ms=10)
    plt.text(x_[-1]+0.1, y_[-1]-0.4, '终点', color='k')
    
    x = np.linspace(-10,10, 100)
    y = np.linspace(-10,10, 100)
#     x = np.linspace(-5,5, 100)
#     y = np.linspace(-5,5, 100)
    x, y = np.meshgrid(x, y)
    #画出等高线区域，并用彩虹颜色填充，设置透明度
    cp=plt.contourf(x, y, f_2d(x, y),alpha=0.75, cmap=cm.rainbow)
    #画出等高线
    C=plt.contour(x, y, f_2d(x, y),colors='black')
    plt.clabel(C,inline=True,fontsize=15)
    cbar = fig.colorbar(cp)
    cbar.set_label('loss')
    plt.xlabel('x')
    plt.ylabel('y')
    plt.title(title)
    plt.show()
    x,y=res[-1]
    loss=f_2d(x,y)
    print('Minimum loss:',loss,'x:',x,' y:',y)

In [47]:
#最原始的梯度下降法
def sgd(x, y, s1, s2, lr):
    dfdx, dfdy = f_grad(x, y)
    return (x - lr * dfdx, y - lr * dfdy, 0, 0, lr)

#定义了4个变量控件，可以随时调节，查看效果 (最小值，最大值，步长)
@interact(lr=(0, 1, 0.001),epoch=(0,100,1),init_x1=(-10,10,0.1),init_x2=(-10,10,0.1),continuous_update=False)
def visualize_gradient_descent(lr=0.10,epoch=100,init_x1=6,init_x2=-4):
    res = train_2d(sgd, lr,epoch,init_x1,init_x2)
    plot_2d(res,(12,8), title='SGD')

interactive(children=(FloatSlider(value=0.1, description='lr', max=1.0, step=0.001), IntSlider(value=100, desc…

In [54]:
@interact(lr=(0, 1, 0.001), gamma=(0, 0.99, 0.01),
          continuous_update=False,epoch=(0,100,1),init_x1=(-10,10,0.1),init_x2=(-10,10,0.1))
def visualize_sgd_momentum(lr=0.3, gamma=0.1,epoch=100,init_x1=6,init_x2=-4):
    '''lr: learning rate
    gamma: parameter for inertia sgd'''
    
    def momentum(x1, x2, v1, v2, lr):
        dfdx1, dfdx2 = f_grad(x1, x2)
        v1 = gamma * v1 + lr * dfdx1
        v2 = gamma * v2 + lr * dfdx2
        x1 = x1 - v1
        x2 = x2 - v2
        return (x1, x2, v1, v2, lr)
    
    res = train_2d(momentum, lr,epoch,init_x1,init_x2)
    plot_2d(res, title='Momentum')

interactive(children=(FloatSlider(value=0.3, description='lr', max=1.0, step=0.001), FloatSlider(value=0.1, de…

In [61]:
@interact(lr=(0, 1, 0.01),
          continuous_update=False,epoch=(0,100,1),init_x1=(-10,10,0.1),init_x2=(-10,10,0.1))
def visualize_adagrad(lr=0.3,epoch=100,init_x1=6,init_x2=-4):
    '''lr: learning rate'''
    def adagrad_2d(x1, x2, s1, s2, lr):
        g1, g2 = f_grad(x1, x2)
        eps = 1e-6
        s1 += g1 ** 2
        s2 += g2 ** 2
        x1 -= lr / math.sqrt(s1 + eps) * g1
        x2 -= lr / math.sqrt(s2 + eps) * g2
        return x1, x2, s1, s2, lr
    
    res = train_2d(adagrad_2d, lr,epoch,init_x1,init_x2)
    plot_2d(res, title='AdaGrad')

interactive(children=(FloatSlider(value=0.3, description='lr', max=1.0, step=0.01), IntSlider(value=100, descr…

In [65]:
@interact(lr=(0, 1, 0.001), 
          gamma=(0, 0.99, 0.001),
          continuous_update=False,epoch=(0,100,1),init_x1=(-10,10,0.1),init_x2=(-10,10,0.1))
def visualize_rmsprop(lr=0.3, gamma=0.9,epoch=100,init_x1=6,init_x2=-4):
    '''lr: learning rate, 
       gamma: momentum'''  
    def rmsprop_2d(x1, x2, s1, s2, lr):
        eps = 1e-6
        g1, g2 = f_grad(x1, x2)
        s1 = gamma * s1 + (1 - gamma) * g1 ** 2
        s2 = gamma * s2 + (1 - gamma) * g2 ** 2
        x1 -= lr / math.sqrt(s1 + eps) * g1
        x2 -= lr / math.sqrt(s2 + eps) * g2
        return x1, x2, s1, s2, lr

    res = train_2d(rmsprop_2d, lr,epoch,init_x1,init_x2)
    plot_2d(res, title='RMSProp')

interactive(children=(FloatSlider(value=0.3, description='lr', max=1.0, step=0.001), FloatSlider(value=0.9, de…

In [70]:
@interact(lr=(0, 1, 0.01), 
          beta1=(0, 0.999, 0.001),
          beta2=(0, 0.999, 0.001),
          continuous_update=False,epoch=(0,100,1),init_x1=(-10,10,0.1),init_x2=(-10,10,0.1))
def visualize_adam(lr=0.3, beta1=0.9, beta2=0.999,epoch=100,init_x1=-10,init_x2=-4):
    '''lr: learning rate
    beta1: parameter for E(g)
    beta2: parameter for E(g^2)
    '''  
    def Deltax(m, n, g, t):
        eps = 1.0E-6
        m = beta1 * m + (1 - beta1) * g
        n = beta2 * n + (1 - beta2) * g*g
        m_hat = m / (1 - beta1**t)
        n_hat = n / (1 - beta2**t)
        dx = lr * m_hat / (math.sqrt(n_hat) + eps)
        return m, n, dx
        
    def adam_2d(x1, x2, m1, n1, m2, n2, lr, t):
        '''m1, m2: E(g1), E(g2)
           n1, n2: E(g1^2), E(g2^2) where E() is expectation
           lr: learning rate
           t: time step'''
        eps = 1e-6
        g1, g2 = f_grad(x1, x2)
        m1, n1, dx1 = Deltax(m1, n1, g1, t)
        m2, n2, dx2 = Deltax(m2, n2, g2, t)       
        x1 -= dx1
        x2 -= dx2
        return x1, x2, m1, n1, m2, n2, lr
    
    def train_adam(trainer, lr,epoch=100,init_x1=-8,init_x2=-10):
        """Train a 2d object function with a customized trainer"""
        x1, x2 = init_x1,init_x2
        m1, n1, m2, n2 = 0, 0, 0, 0
        res = [(x1, x2)]
        for i in range(epoch):
            x1, x2, m1, n1, m2, n2, lr = trainer(x1, x2, m1, n1, m2, n2, lr, i+1)
            res.append((x1, x2))
        return res
    
    res = train_adam(adam_2d, lr,epoch,init_x1,init_x2)
    plot_2d(res, title='Adam')

interactive(children=(FloatSlider(value=0.3, description='lr', max=1.0, step=0.01), FloatSlider(value=0.9, des…