**Arabicthon**

**Ibn Sidah Team**

Prof. Yaser Hifny
yhifny@yahoo.com

Dr. Waleed Nazeeh
w.nazeeh@gmail.com

Mr. Amr ElGendy
amr.algendy@gmail.com



# 1) Mounting Google drive and define paths

In [7]:
# Print CPU and memory details
import tensorflow as tf
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

!lscpu |grep 'Model name'

print('Normal CPU')
print('Processor model')
!cat /proc/cpuinfo  | grep 'name'| uniq
print('Number of processors')
!cat /proc/cpuinfo  | grep process| wc -l
print('Memory details')
!free -h

Your runtime has 13.6 gigabytes of available RAM

Not using a high-RAM runtime
Model name:          Intel(R) Xeon(R) CPU @ 2.20GHz
Normal CPU
Processor model
model name	: Intel(R) Xeon(R) CPU @ 2.20GHz
Number of processors
2
Memory details
              total        used        free      shared  buff/cache   available
Mem:            12G        5.4G        2.9G        1.2M        4.4G        7.6G
Swap:            0B          0B          0B


In [8]:
import os
from google.colab import drive, files
# Mount google drive folders
drive.mount('/content/drive')
# Project path
PROJ_PATH = '/content/drive/My Drive/Sense_Gram_Project'
# Set current directory to the project directory
os.chdir(PROJ_PATH)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# 2) Prepare sense gram requirments and load our model.*Takes about 2 minutes.*

In [9]:
!pip install -r requirements.txt
!pip install faiss-cpu
# In the requirements.txt but not installed correctly so we have to use pip command
#!pip install gensim==3.8.1
!python -m spacy download en_core_web_sm
# Install gradio for user interface
!pip install gradio


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting en_core_web_sm==2.2.5
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.2.5/en_core_web_sm-2.2.5.tar.gz (12.0 MB)
[K     |████████████████████████████████| 12.0 MB 4.3 MB/s 
[38;5;2m✔ Download and installation successful[0m
You can now load the model via spacy.load('en_core_web_sm')
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [10]:
import sensegram
from wsd import WSD
from gensim.models import KeyedVectors

# Model files
sense_vectors_fpath_abualkhair = "./ara_billion_without_contexts_from_itself/arabic_billion_words.sense_vectors"
word_vectors_fpath_abualkhair = "./ara_billion_without_contexts_from_itself/arabic_billion_words.word_vectors"

sense_vectors_fpath_wiki = "./ara_wiki_without_contexts_from_itself/ara_wiki.sense_vectors"
word_vectors_fpath_wiki = "./ara_wiki_without_contexts_from_itself/ara_wiki.word_vectors"

sense_vectors_fpath_abualkhair_and_wiki = "./ara_wiki_and_ara_billion_without_contexts_from_ara_billion_dataset/ara_wiki_and_billion.sense_vectors"
word_vectors_fpath_abualkhair_and_wiki = "./ara_wiki_and_ara_billion_without_contexts_from_ara_billion_dataset/ara_wiki_and_billion.word_vectors"

# Model parameters
max_context_words  = 3
context_window_size = 5
ignore_case = True
lang = "ar" # to filter out stopwords

# Model loading ... takes some time
sv_abualkhair = sensegram.SenseGram.load_word2vec_format(sense_vectors_fpath_abualkhair, binary=False)
wv_abualkhair = KeyedVectors.load_word2vec_format(word_vectors_fpath_abualkhair, binary=False, unicode_errors="ignore")

sv_wiki = sensegram.SenseGram.load_word2vec_format(sense_vectors_fpath_wiki, binary=False)
wv_wiki = KeyedVectors.load_word2vec_format(word_vectors_fpath_wiki, binary=False, unicode_errors="ignore")

sv_abualkhair_wiki = sensegram.SenseGram.load_word2vec_format(sense_vectors_fpath_abualkhair_and_wiki, binary=False)
wv_abualkhair_wiki = KeyedVectors.load_word2vec_format(word_vectors_fpath_abualkhair_and_wiki, binary=False, unicode_errors="ignore")

# Method takes word and context and retirn the results of the model.
def wsd_method(word, context, model_type, wsd_flag):
  if model_type == "العربية المعاصرة الإخبارية":
    output = f"working with model: {model_type}\n"
    sv = sv_abualkhair
    wv = wv_abualkhair
  elif model_type == "العربية المعاصرة":
    output = f"working with model: {model_type}\n"
    sv = sv_abualkhair_wiki
    wv = wv_abualkhair_wiki
  else:
    output = f"working with model: {model_type}\n"
    sv = sv_wiki
    wv = wv_wiki

  sense_tuple = sv.get_senses(word, ignore_case=ignore_case)
  sense_count = len(sense_tuple)
  # Disambiguate a word in a context
  wsd_model = WSD(sv, wv, window=context_window_size, lang=lang,
                  max_context_words=max_context_words, ignore_case=ignore_case) 
  wsd_output = (wsd_model.disambiguate(context, word))

  # print the output

  output += f"sense count: {sense_count}\n"
  output += f"selected sense: {wsd_output[0]}\n\n"     
  if wsd_flag == "عرض الحقول الدلالية":
    output = ''
  output += f"semantic fields details:\n"
  for sense_id, prob in sense_tuple:
      output += sense_id
      output += ("\n"+"="*20+"\n")
      for rsense_id, sim in sv.wv.most_similar(sense_id):
          output += "{} {:f}\n".format(rsense_id, sim)
      output +="\n"
  
  return output


# 3) Live Demo

In [14]:
import gradio as gr
# Lanuching live demo
demo = gr.Interface(
    fn=wsd_method,
    inputs=[gr.Textbox(lines=1, placeholder="الكلمة"),gr.Textbox(lines=2, placeholder="السياق"), gr.Radio(["العربية المعاصرة (أخبار)", "العربية المعاصرة (موسوعات)","العربية المعاصرة (عام)"]),gr.Radio(["فك الالتباس الدلالي", "عرض الحقول الدلالية"])],
    outputs="text",
    title="فـك الالتباس الدلالي",
    description="فضلًا أدخل الكلمة ثم السياق ثم اضغط على زر إرسال، ولاستعراض المخرجات كاملة يرجى استخدام زر التمرير لأسفل.",
)
demo.launch()


Colab notebook detected. To show errors in colab notebook, set `debug=True` in `launch()`
Running on public URL: https://34425.gradio.app

This share link expires in 72 hours. For free permanent hosting, check out Spaces (https://huggingface.co/spaces)


(<gradio.routes.App at 0x7f253e65c610>,
 'http://127.0.0.1:7863/',
 'https://34425.gradio.app')