In [None]:
import ipywidgets as widgets
import gensim
import os

## Word embedding model for arabic
This app allows you to find related words in a word embedding model for Arabic.

The current model is the Wikipedia CBOW model from [AraVec](https://github.com/bakrianoo/aravec), see: Abu Bakr Soliman, Kareem Eisa, and Samhaa R. El-Beltagy, “AraVec: A set of Arabic Word Embedding Models for use in Arabic NLP”, in proceedings of the 3rd International Conference on Arabic Computational Linguistics (ACLing 2017), Dubai, UAE, 2017.

In [None]:
model_path = 'data'

In [None]:
model_w2vec = gensim.models.Word2Vec.load(os.path.join(model_path, 'wikipedia_cbow_100'))

In [None]:
def most_similar(word, model, topn=10):
    res = model.wv.most_similar(word, topn=topn)
    output = [u'{} \t{:.3f}'.format(w, s) for w, s in res]
    return '\n'.join(output)

In [None]:
output = widgets.Textarea(disabled=True, rows=100)

def print_related_words(button):
    try:
        output.value = most_similar(input_word.value, model_w2vec, input_number.value)
    except KeyError:
        output.value = 'Error: word does not exist in vocabulary'
    except:
        output.value = 'Unknown error'

input_word = widgets.Text(description='Word:')
input_number = widgets.IntSlider(
    value=10,
    min=5,
    max=100,
    step=5,
    description='Number of results:',
)

button_submit = widgets.Button(description='Submit')
button_submit.on_click(print_related_words)

widgets.VBox((input_word, input_number, button_submit, output))