# 随机搜索

In [None]:
import numpy as np

def L(X, y, W): 
    """  
    fully‐vectorized implementation :  
    ‐ X holds all the training examples as columns (e.g. 3073 x 50,000 in CIFAR‐10)  
    ‐ y is array of integers specifying correct class (e.g. 50,000‐D array)  
    ‐ W are weights (e.g. 10 x 3073) 
    """  
    # evaluate loss over all examples in X without using any for loops  
    # left as exercise to reader in the assignment
    delta = 1.0  
    scores = W.dot(X)
    D = W.shape[0]
    loss_i = loss_i = 0.0
    for i in range(D):
        loss_i = np.mean(np.sqrt(scores[i] - y))
    return loss_i

# 假设X_train的每一列都是一个数据样本（比如3073 x 50000）
X_train = np.random.randn(3073,50000).astype('int64')
# 假设Y_train是数据样本的类别标签（比如一个长50000的一维数组）
Y_train = np.random.randn(1,50000).astype('int64')
# 假设函数L对损失函数进行评价   
bestloss = float("inf") # Python assigns the highest possible float value 
for num in range(1000):  
    W = np.random.randn(10,3073) * 0.0001 # generate random parameters  
    loss = L(X_train,Y_train,W) # get the loss over the entire training set  
    if loss < bestloss: # keep track of the best solution  
        bestloss = loss  
        bestW = W  
        print('in attempt %d the loss was %f, best %f' % (num, loss, bestloss))   
        # 输出: 
        # in attempt 0 the loss was 9.401632, best 9.401632 
        # in attempt 1 the loss was 8.959668, best 8.959668 
        # in attempt 2 the loss was 9.044034, best 8.959668 
        # in attempt 3 the loss was 9.278948, best 8.959668 
        # in attempt 4 the loss was 8.857370, best 8.857370 
        # in attempt 5 the loss was 8.943151, best 8.857370 
        # in attempt 6 the loss was 8.605604, best 8.605604 
        # ... (trunctated: continues for 1000 lines)
        
# 假设X_test尺寸是[3073 x 10000], Y_test尺寸是[10000 x 1] 
scores = Wbest.dot(Xte_cols) # 10 x 10000, the class scores for all test examples 
# 找到在每列中评分值最大的索引（即预测的分类） 
Yte_predict = np.argmax(scores,axis = 0) 
# 以及计算准确率 
np.mean(Yte_predict == Yte) 
# 返回 0.155



# 随机本地搜索

In [4]:
import numpy as np

W = np.random.randn(10,3073) * 0.001 # 生成随机初始W 
bestloss = float("inf") 
for i in range(1000):  
    step_size = 0.0001  
    Wtry = W + np.random.randn(10,3073) * step_size  
    loss = L(Xtr_cols,Ytr,Wtry)  
    if loss < bestloss:  
        W = Wtry  
        bestloss = loss  
    print('iter %d loss is %f' % (i,bestloss))

NameError: name 'L' is not defined

# 跟随梯度

In [5]:
def eval_numerical_gradient(f, x):  
    """   
    一个f在x处的数值梯度法的简单实现  
    ‐ f是只有一个参数的函数  
    ‐ x是计算梯度的点  
    """     
    fx = f(x) # 在原点计算函数值  
    grad = np.zeros(x.shape)  
    h = 0.00001    
    
    # 对x中所有的索引进行迭代  
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])  
    while not it.finished:    
        
        # 计算x+h处的函数值  
        ix = it.multi_index  
        old_value = x[ix]  
        x[ix] = old_value + h # 增加h  
        fxh = f(x) # 计算f(x + h)  x
        [ix] = old_value # 存到前一个值中 (非常重要)    
        
        # 计算偏导数  
        grad[ix] = (fxh - fx) / h # 坡度  
        it.iternext() # 到下个维度 
        
    return grad

In [6]:
# 要使用上面的代码我们需要一个只有一个参数的函数 
# (在这里参数就是权重)所以也包含了X_train和Y_train 
def CIFAR10_loss_fun(W):  
    return L(X_train, Y_train, W)   
W = np.random.rand(10, 3073) * 0.001 # 随机权重向量 
df = eval_numerical_gradient(CIFAR10_loss_fun, W) # 得到梯度

NameError: name 'L' is not defined

In [7]:
loss_original = CIFAR10_loss_fun(W) # 初始损失值 
print('original loss: %f' % (loss_original, ))# 查看不同步长的效果 
for step_size_log in [-10, -9, -8, -7, -6, -5,-4,-3,-2,-1]:  
    step_size = 10 ** step_size_log  
    W_new = W - step_size * df # 权重空间中的新位置  
    loss_new = CIFAR10_loss_fun(W_new)  
    print('for step size %f new loss: %f' % (step_size, loss_new))   
    # 输出: 
    # original loss: 2.200718 
    # for step size 1.000000e‐10 new loss: 2.200652 
    # for step size 1.000000e‐09 new loss: 2.200057 
    # for step size 1.000000e‐08 new loss: 2.194116 
    # for step size 1.000000e‐07 new loss: 2.135493 
    # for step size 1.000000e‐06 new loss: 1.647802
    # for step size 1.000000e‐05 new loss: 2.844355
    # for step size 1.000000e‐04 new loss: 25.558142 
    # for step size 1.000000e‐03 new loss: 254.086573 
    # for step size 1.000000e‐02 new loss: 2539.370888
    # for step size 1.000000e‐01 new loss: 25392.214036

NameError: name 'L' is not defined

In [8]:
# 普通的梯度下降   
while True:  
    weights_grad = evaluate_gradient(loss_fun, data, weights)  
    weights += - step_size * weights_grad # 进行梯度更

NameError: name 'evaluate_gradient' is not defined

In [9]:
# 普通的小批量数据梯度下降   
while True:  
    data_batch = sample_training_data(data, 256) # 256个数据  
    weights_grad = evaluate_gradient(loss_fun, data_batch, weights)  
    weights += - step_size * weights_grad # 参数更新

NameError: name 'sample_training_data' is not defined