# Dropout
Dropout 是一种常用的正则化方法。

In [2]:
# 初始化
from __future__ import print_function
import time
import numpy as np
import matplotlib.pyplot as plt
from fc_net import *
from gradient_check import eval_numerical_gradient, eval_numerical_gradient_array
from solver import Solver

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

def rel_error(x, y):
  """ returns relative error """
  return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

In [4]:
# 获取数据
# 初始化设置
import numpy as np
import matplotlib.pyplot as plt
import pickle 
from gradient_check import eval_numerical_gradient, eval_numerical_gradient_array
from solver import Solver
from layers import *
from layer_utils import *


# 载入数据
def load_data(filename):
    with open(filename,'rb') as f:
        datadict = pickle.load(f,encoding = 'latin1')
        X = datadict['data']
        Y = datadict['labels']
        #X = X.reshape(10000,3,32,32)
        Y = np.array(Y)
        return X,Y
    
root1 = 'data_batch_1'
root2 = 'data_batch_2'
root3 = 'data_batch_3'

(data1, labels1) = load_data(root1)
(data2, labels2) = load_data(root2)
(data3, labels3) = load_data(root3)

data = np.vstack((data1,data2,data3))
labels = np.hstack((labels1,labels2,labels3)) 

# 参数预处理
data = data - np.mean(data, axis=0)
data = data / np.std(data, axis=0)

train_set = data[0:28000]
train_labels = labels[0:28000]

val_set = data[28000:29000]
val_labels = labels[28000:29000]

test_set = data[29000:29500]
test_labels = labels[29000:29500]

# Dropout forward()

In [34]:
np.random.seed(231)
x = np.random.randn(500, 500) + 10

for p in [0.25, 0.4, 0.7]:
    out, _ = dropout_forward(x, {'mode': 'train', 'p': p})
    out_test, _ = dropout_forward(x, {'mode': 'test', 'p': p})

    print('Running tests with p = ', p)
    print('Mean of input: ', x.mean())
    print('Mean of train-time output: ', out.mean())
    print('Mean of test-time output: ', out_test.mean())
    print('Fraction of train-time output set to zero: ', (out == 0).mean())
    print('Fraction of test-time output set to zero: ', (out_test == 0).mean())
    print()

Running tests with p =  0.25
Mean of input:  10.000207878477502
Mean of train-time output:  10.014059116977283
Mean of test-time output:  10.000207878477502
Fraction of train-time output set to zero:  0.749784
Fraction of test-time output set to zero:  0.0

Running tests with p =  0.4
Mean of input:  10.000207878477502
Mean of train-time output:  9.977917658761159
Mean of test-time output:  10.000207878477502
Fraction of train-time output set to zero:  0.600796
Fraction of test-time output set to zero:  0.0

Running tests with p =  0.7
Mean of input:  10.000207878477502
Mean of train-time output:  9.987811912159426
Mean of test-time output:  10.000207878477502
Fraction of train-time output set to zero:  0.30074
Fraction of test-time output set to zero:  0.0



In [35]:
x = np.array([1,2,3])
p = 0.5
mask = (np.random.rand(*x.shape) < p) / p
print(mask)

[2. 2. 2.]


# Dropout backward

In [36]:
np.random.seed(231)
x = np.random.randn(10, 10) + 10
dout = np.random.randn(*x.shape)

dropout_param = {'mode': 'train', 'p': 0.2, 'seed': 123}
out, cache = dropout_forward(x, dropout_param)
dx = dropout_backward(dout, cache)
dx_num = eval_numerical_gradient_array(lambda xx: dropout_forward(xx, dropout_param)[0], x, dout)

# Error should be around e-10 or less
print('dx relative error: ', rel_error(dx, dx_num))

dx relative error:  5.44560814873387e-11


## Inline Question 1:
What happens if we do not divide the values being passed through inverse dropout by `p` in the dropout layer? Why does that happen?

## Answer:
简单的概率论。 当 X 服从 U（0,1）时， P（X < p） = p 当 p ∈（0,1），所以需要 除以p

# Full-Connected with Dropout