In [11]:
import requests 
from gensim.models import KeyedVectors
import random
import numpy as np
import concurrent.futures
from functools import partial


# Load the word embedding model
model = KeyedVectors.load_word2vec_format("modeleCemantix.bin", binary=True, unicode_errors="ignore")

def send_word(mot: str, url: str) -> float:
    """
    Send a word to the Cemantix API to get its score.

    Args:
    mot (str): The word to send to the API.
    url (str): The URL of the API endpoint.

    Returns:
    float: The score of the word from the API, or -1000 if an error occurs.
    """
    response = requests.post(url, headers={'origin': 'https://cemantix.certitudes.org'}, data={'word': mot})
    r = response.json()
    if 'error' in r.keys():
        return -1000
    else:
        return r["score"]

def best_start(model: KeyedVectors, dico_mot: dict, url: str,nb_start : int = 50 ,num_threads: int = 10) -> dict:
    """
    Randomly select and score words from the model to initialize the dictionary of scored words.

    Args:
    model (KeyedVectors): The word embedding model.
    dico_mot (dict): A dictionary to store words and their scores.
    url (str): The URL of the Cemantix API.
    num_threads (int): The number of threads to use for concurrent requests.

    Returns:
    dict: The updated dictionary with initial words and their scores.
    """

    rd=[random.randint(1, 100000) for _ in range(nb_start)]
    rd_words=[ model.index_to_key[r] for r in rd]
    send_word_map = partial(send_word, url=url)

    dico_mot={}
    with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
        future_to_word = {executor.submit(send_word_map, word): word for word in rd_words}
        for future in concurrent.futures.as_completed(future_to_word):
            word = future_to_word[future]
            score = future.result()
            dico_mot[word]=score

    return dico_mot


def find_word(model: KeyedVectors) -> str:
    """
    Find the word of the day using the Cemantix model and API.

    Args:
    model (KeyedVectors): The word embedding model.

    Returns:
    str: The word of the day.
    """
    WORD_to_FIND = ""
    word_to_test = []
    dico_mot = {}

    dico_mot = best_start(model,{},'https://cemantix.certitudes.org/score',150,10)
    mot = max(dico_mot, key=dico_mot.get)
    word_to_test = word_to_test + [i[0] for i in model.most_similar(mot, topn=50)][::-1]

    while WORD_to_FIND == "":
        if not word_to_test:
            dico_mot = best_start(model, dico_mot)
            mot = max(dico_mot, key=dico_mot.get)
            word_to_test = word_to_test + [i[0] for i in model.most_similar(mot, topn=50)][::-1]

        mot_test = word_to_test[-1]
        word_to_test.pop(-1)

        if mot_test not in dico_mot.keys():
            sc = send_word(mot_test,'https://cemantix.certitudes.org/score')
            if sc > 0.99:
                WORD_to_FIND = mot_test

            dico_mot[mot_test] = sc
            if dico_mot[mot_test] > dico_mot[mot]:
                mot = mot_test
                word_to_test = word_to_test + [i[0] for i in model.most_similar(mot, topn=50)][::-1]

    return WORD_to_FIND

# Run the function to find the word of the day
find_word(model)

'concession'

In [12]:
# Load the word embedding model
model = KeyedVectors.load_word2vec_format("modeleCemantix.bin", binary=True, unicode_errors="ignore")
find_word(model)

'concession'