Install the keyBERT library first

In [3]:
pip install keybert



In [4]:
from keybert import KeyBERT

In [5]:
doc = """
         Supervised learning is the machine learning task of learning a function that
         maps an input to an output based on example input-output pairs. It infers a
         function from labeled training data consisting of a set of training examples.
         In supervised learning, each example is a pair consisting of an input object
         (typically a vector) and a desired output value (also called the supervisory signal).
         A supervised learning algorithm analyzes the training data and produces an inferred function,
         which can be used for mapping new examples. An optimal scenario will allow for the
         algorithm to correctly determine the class labels for unseen instances. This requires
         the learning algorithm to generalize from the training data to unseen situations in a
         'reasonable' way (see inductive bias).
      """

kw_model= KeyBERT()
keywords= kw_model.extract_keywords(doc)
keywords

[('supervised', 0.6676),
 ('labeled', 0.4896),
 ('learning', 0.4813),
 ('training', 0.4134),
 ('labels', 0.3947)]

keyphrase_ngram_range to set the length of the resulting keywords/keyphrases:

In [6]:
kw_model.extract_keywords(doc, keyphrase_ngram_range=(1,1), stop_words=None)

[('supervised', 0.6676),
 ('labeled', 0.4896),
 ('learning', 0.4813),
 ('training', 0.4134),
 ('labels', 0.3947)]

To extract keyphrases, simply set keyphrase_ngram_range to (1, 2) or higher depending on the number of words 

In [7]:
kw_model.extract_keywords(doc, keyphrase_ngram_range=(1,2), stop_words=None)

[('supervised learning', 0.6779),
 ('supervised', 0.6676),
 ('signal supervised', 0.6152),
 ('in supervised', 0.6124),
 ('labeled training', 0.6013)]

We can highlight the keywords in the document by simply setting hightlight:

In [8]:
keywords = kw_model.extract_keywords(doc, highlight=True)

Max sum similarity

In [9]:
kw_model.extract_keywords(doc, keyphrase_ngram_range=(3, 3), stop_words='english',
                              use_maxsum=True, nr_candidates=20, top_n=5)

[('learning function maps', 0.5178),
 ('training data unseen', 0.5194),
 ('learning algorithm analyzes', 0.5208),
 ('machine learning task', 0.5628),
 ('supervisory signal supervised', 0.5802)]

Maximal Marginal Relevance

In [10]:
kw_model.extract_keywords(doc, keyphrase_ngram_range=(3, 3), stop_words='english',
                              use_mmr=True, diversity=0.7)

[('supervised learning algorithm', 0.6992),
 ('pairs infers function', 0.1981),
 ('unseen situations reasonable', 0.2142),
 ('value called supervisory', 0.2895),
 ('class labels unseen', 0.3469)]

In [11]:
kw_model.extract_keywords(doc, keyphrase_ngram_range=(3, 3), stop_words='english',use_mmr=True, diversity=0.2)

[('supervised learning algorithm', 0.6992),
 ('function labeled training', 0.663),
 ('supervised learning example', 0.6807),
 ('supervisory signal supervised', 0.5802),
 ('supervised learning machine', 0.6706)]

Embeding models

In [17]:
from keybert import KeyBERT
kw_model1 = KeyBERT(model='all-MiniLM-L6-v2')

In [18]:
kw_model1.extract_keywords(doc, highlight=True)

[('supervised', 0.6676),
 ('labeled', 0.4896),
 ('learning', 0.4813),
 ('training', 0.4134),
 ('labels', 0.3947)]

sentence transformer

In [13]:
from keybert import KeyBERT
from sentence_transformers import SentenceTransformer

sentence_model = SentenceTransformer("all-MiniLM-L6-v2")
kw_model2 = KeyBERT(model=sentence_model)

In [15]:
kw_model2.extract_keywords(doc, keyphrase_ngram_range=(3, 3), stop_words='english',use_mmr=True, diversity=0.2)

[('supervised learning algorithm', 0.6992),
 ('function labeled training', 0.663),
 ('supervised learning example', 0.6807),
 ('supervisory signal supervised', 0.5802),
 ('supervised learning machine', 0.6706)]