## This notebook works on Word Embeddings using Keras (covers Word2vec for CBOW and Skip Gram models, and Glove)

In [8]:
# import the necessary libraries
from keras.layers import Merge
from keras.layers.embeddings import Embedding
from keras.layers.core import Dense,Reshape
from keras.models import Sequential

vocab_size = 5000
embedding_size = 300

### Define the Context model and the Word model

In [9]:
# word model definition
word_model = Sequential()
word_model.add(Embedding(vocab_size, embedding_size, embeddings_initializer='glorot_uniform', input_length=1))
word_model.add(Reshape((embedding_size,)))

# Context model definition
context_model = Sequential()
context_model.add(Embedding(vocab_size, embedding_size, embeddings_initializer='glorot_uniform', input_length=1))
context_model.add(Reshape((embedding_size,)))

# add dot product and then a dense layer with sigmoid activation
model = Sequential()
model.add(Merge([word_model, context_model], mode="dot"))
model.add(Dense(1, init="glorot_uniform", activation="sigmoid"))
model.compile(loss="mean_squared_error",optimizer="adam")



### This section handles and preprocesses the text to create the skip-grams

In [12]:
from keras.preprocessing.text import *
from keras.preprocessing.sequence import skipgrams

text = "I love green eggs and ham"

In [18]:
# create dictionary from word_to_id and id_to_word
tokenizer = Tokenizer()
tokenizer.fit_on_texts([text])
word2id = tokenizer.word_index
id2word = {v:k for k,v in word2id.items()}
print(word2id)
print(id2word)

{'ham': 1, 'eggs': 2, 'and': 3, 'i': 4, 'green': 5, 'love': 6}
{1: 'ham', 2: 'eggs', 3: 'and', 4: 'i', 5: 'green', 6: 'love'}


ham 1
eggs 2
and 3
i 4
green 5
love 6
