In [None]:
https://github.com/fchollet/keras/issues/3110
embedding word2vec example


In [None]:
def make_word2vec_model(num_ns, embedding_dim, num_words, separate_context = True):
	'''
	num_ns: (int) number of negative samples
	embedding_dim: (int) embedding dimension
	num_words: (int) size of the vocabulary
	separate_context: (bool) whether or not to use a separate set of word embeddings
							for the context embeddings. Reccomended for text data to
							better fit the distributional hypothesis.

	creates a skipgram with negative sampling model. word_input is a (batch_size, 1)
	shaped tensor indicating the index of the center word in the sliding window. context_input
	is a (batch_size, num_ns+1) shaped tensor, where the first column are the indices
	of the positive samples, and the num_ns following columns are the indices of the negative
	samples. Labels should be a (batch_size, num_ns+1) shaped tensor, where the first column
	is ones (positive label) and the rest of the matrix is zero (negative label)
	'''
	from keras.layers import Input, Embedding, Reshape, Merge, Flatten, Activation
	from keras.models import Model

	word_input = Input(shape = (1,), dtype = 'int32')
	context_input = Input(shape = (num_ns+1,), dtype = 'int32')

	node_embedding = Embedding(num_nodes, embedding_dim)
	we = node_embedding(word_input)

	if separate_context:
		context_embedding = Embedding(num_nodes, embedding_dim)
	else:
		context_embedding = node_embedding
	ce = Reshape((embedding_dim,num_ns+1))(context_embedding(context_input))

	dots = Flatten()(Merge(mode = 'dot', dot_axes = (1,2))([ce, we]))
	acts = Activation('sigmoid')(dots)
	
	model = Model(input = [word_input, context_input], output = acts)
	model.compile('adam', loss = 'binary_crossentropy')