In [11]:
import numpy as np
import random



https://zhuanlan.zhihu.com/p/29668368

In [7]:
def softmax(x):
    orig_shape = x.shape

    # 根据输入类型是矩阵还是向量分别计算softmax
    if len(x.shape) > 1:
        # 矩阵
        tmp = np.max(x,axis=1) # 得到每行的最大值，用于缩放每行的元素，避免溢出
        x-=tmp.reshape((x.shape[0],1)) # 使每行减去所在行的最大值（广播运算）

        x = np.exp(x) # 第一步，计算所有值以e为底的x次幂
        tmp = np.sum(x, axis = 1) # 将每行求和并保存
        x /= tmp.reshape((x.shape[0], 1)) # 所有元素除以所在行的元素和（广播运算）
    else:
        # 向量
        tmp = np.max(x) # 得到最大值
        x -= tmp # 利用最大值缩放数据
        x = np.exp(x) # 对所有元素求以e为底的x次幂
        tmp = np.sum(x) # 求元素和
        x /= tmp # 求somftmax
    return x

In [8]:
def sigmoid(x):
    s = np.true_divide(1, 1 + np.exp(-x)) # 使用np.true_divide进行加法运算
    return s

In [9]:
def sigmoid_grad(s):
    ds = s * (1 - s) # 可以证明：sigmoid函数关于输入x的导数等于`sigmoid(x)(1-sigmoid(x))`
    return ds

In [2]:
def softmaxCostAndGradient(predicted, target, outputVectors, dataset=None):

    N, D = outputVectors.shape

    inner_products = np.dot(outputVectors, predicted)
    scores = softmax(inner_products) #(N, 1)     
    cost = -np.dot(outputVectors[target], predicted) + np.log(np.sum(np.exp(inner_products)))

    gradPred = - outputVectors[target] + np.sum(scores.reshape(-1, 1) * outputVectors, axis=0)
    grad = scores.reshape(-1, 1)* np.tile(predicted, (N, 1))
    grad[target] -= predicted

    return cost, gradPred, grad

In [3]:
def skipgram(currentWord, C, contextWords, tokens, inputVectors, outputVectors, dataset, word2vecCostAndGradient=softmaxCostAndGradient):
    """ Skip-gram model in word2vec 
    currrentWord -- a string of the current center word 
    C -- integer, context size 
    contextWords -- list of no more than 2*C strings, the context words 
    tokens -- a dictionary that maps words to their indices in the word vector list """

    cost = 0.0
    gradIn = np.zeros(inputVectors.shape)
    gradOut = np.zeros(outputVectors.shape)
    N, D = inputVectors.shape

    center_index = tokens[currentWord]
    predicted = inputVectors[center_index]
    for word in contextWords:
        target = tokens[word]
        cur_cost, cur_gradIn, cur_gradOut = word2vecCostAndGradient(predicted, target, outputVectors, dataset)
        cost += cur_cost  # loss
        gradIn[center_index] += cur_gradIn
        gradOut += cur_gradOut

    return cost, gradIn, gradOut

In [4]:
def cbow(currentWord, C, contextWords, tokens, inputVectors, outputVectors, dataset, word2vecCostAndGradient=softmaxCostAndGradient):
    cost = 0.0
    gradIn = np.zeros(inputVectors.shape)
    gradOut = np.zeros(outputVectors.shape)
    N, D = inputVectors.shape

    target = tokens[currentWord]
    contextWords_vectors = np.array([inputVectors[tokens[word]] for word in contextWords])
    predicted = np.sum(contextWords_vectors, axis = 0)
    cost, gradpred, grad = word2vecCostAndGradient(predicted, target, outputVectors, dataset)

    for word in contextWords:
        gradIn[tokens[word]] += gradpred
    gradOut = grad

    return cost, gradIn, gradOut

In [None]:
inputVectors = np.random.randn(5, 3) # 输入矩阵，语料库中字母的数量是5，我们使用3维向量表示一个字母
outputVectors = np.random.randn(5, 3) # 输出矩阵

sentence = ['a', 'e', 'd', 'b', 'd', 'c','d', 'e', 'e', 'c', 'a'] # 句子
centerword = 'c' # 中心字母
context = ['a', 'e', 'd', 'd', 'd', 'd', 'e', 'e', 'c', 'a'] # 上下文字母
tokens = dict([("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)]) # 用于映射字母在输入输出矩阵中的索引

c, gin, gout = skipgram(centerword, context, tokens, inputVectors, outputVectors)
step = 0.01 #更新步进
print('原始输入矩阵:\n',inputVectors)
print('原始输出矩阵:\n',outputVectors)
inputVectors -= step * gin # 更行输入词向量矩阵
outputVectors -= step * gout
print('更新后的输入矩阵:\n',inputVectors)
print('更新后的输出矩阵:\n',outputVectors)