# Keywords model


https://github.com/MaartenGr/KeyBERT


In [3]:
import nltk
from keybert import KeyBERT
from nltk.corpus import wordnet

In [4]:
model = KeyBERT()

sample_text = """
Machine learning is a method of data analysis that automates analytical model building. 
It is a branch of artificial intelligence based on the idea that systems can learn from data, 
identify patterns, and make decisions with minimal human intervention.
"""

In [4]:
keywords_extracted = model.extract_keywords(sample_text)
keywords = [word[0] for word in keywords_extracted]
keywords

['learning', 'machine', 'data', 'automates', 'model']

In [9]:
keywords = model.extract_keywords(sample_text, highlight=True)

In [8]:
keywords_extracted = model.extract_keywords(
    sample_text, keyphrase_ngram_range=(1, 1), top_n=10
)
keywords_extracted

[('learning', 0.3656),
 ('machine', 0.3539),
 ('data', 0.3163),
 ('automates', 0.3064),
 ('model', 0.3041),
 ('analysis', 0.2465),
 ('learn', 0.2387),
 ('intelligence', 0.2384),
 ('artificial', 0.2214),
 ('systems', 0.2191)]

In [5]:
def get_synonyms_for_keyword(keyword, n_synonyms=5):

    synonyms = []
    for syn in wordnet.synsets(keyword):
        for l in syn.lemmas():
            synonyms.append(l.name().replace("_", " "))

    synonyms = list(set(synonyms))
    synonyms_filtered = [syn for syn in synonyms if syn.lower() != keyword.lower()]
    synonyms_filtered = synonyms_filtered[0:n_synonyms]

    return synonyms_filtered

In [7]:
keywords_synonyms = {}
for k in keywords:
    synonyms = get_synonyms_for_keyword(k)
    keywords_synonyms[k] = synonyms

keywords_synonyms

{'learning': ['check', 'encyclopaedism', 'larn', 'read', 'instruct'],
 'machine': ['motorcar',
  'car',
  'political machine',
  'simple machine',
  'automobile'],
 'data': ['datum', 'information', 'data point'],
 'automates': ['automatize', 'automatise', 'automate'],
 'model': ['mock up', 'mannikin', 'modelling', 'simulation', 'role model']}