In [1]:
from transformers import AutoModelForMaskedLM , AutoTokenizer
import torch
model_path="bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_path)


In [3]:
from prompt import Prompting
prompting= Prompting(model=model_path)
prompt="Because it was [MASK]."

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [5]:
# Let’s Pass a positive sentence
text="I really like the film a lot."
prompting.prompt_pred(text+prompt)[:10]

[('great', tensor(9.5558)),
 ('amazing', tensor(9.2532)),
 ('good', tensor(9.1464)),
 ('fun', tensor(8.3979)),
 ('fantastic', tensor(8.3277)),
 ('wonderful', tensor(8.2719)),
 ('beautiful', tensor(8.1584)),
 ('awesome', tensor(8.1071)),
 ('incredible', tensor(8.0140)),
 ('funny', tensor(7.8785))]

In [7]:
# Passing a negative sentence:
text="I did not like the film."
prompting.prompt_pred(text+prompt)[:10]

[('bad', tensor(8.6784)),
 ('funny', tensor(8.1660)),
 ('good', tensor(7.9858)),
 ('awful', tensor(7.7454)),
 ('scary', tensor(7.3526)),
 ('boring', tensor(7.1553)),
 ('wrong', tensor(7.1402)),
 ('terrible', tensor(7.1296)),
 ('horrible', tensor(6.9923)),
 ('ridiculous', tensor(6.7731))]

# Producing the results based on a list of neg/pos words

In [8]:
text="not worth watching"
prompting.compute_tokens_prob(text+prompt, token_list1=["great","amazin","good"], token_list2= ["bad","awfull","terrible"])

tensor([0.1496, 0.8504])

# Unbiasing the language model

In [9]:
prompting.compute_tokens_prob("it was "+ prompting.tokenizer.mask_token +".", token_list1=["good"], token_list2= ["bad"])


tensor([0.8495, 0.1505])

# Name Entity Recognition in zero-shot setting

In [10]:
prompting.prompt_pred("John went to Paris to visit the University. John is a type of [MASK].")[:5]

[('man', tensor(8.1382)),
 ('john', tensor(7.1325)),
 ('guy', tensor(6.9672)),
 ('writer', tensor(6.4336)),
 ('philosopher', tensor(6.3823))]

In [13]:
prompting.prompt_pred('Savaş went to Paris to visit the university. Savaş is a type of [MASK].')[:5]

[('philosopher', tensor(7.6558)),
 ('poet', tensor(7.5621)),
 ('saint', tensor(7.0104)),
 ('man', tensor(6.8890)),
 ('pigeon', tensor(6.6780))]

In [15]:
prompting.compute_tokens_prob('It is a type of [MASK].', token_list1=["person","man"], token_list2=["location","city","place"])


tensor([0.7603, 0.2397])

In [18]:
prompting.compute_tokens_prob("Savaş went to Paris to visit the parliament. Savaş is a type of [MASK].",  token_list1=["person","man"], token_list2=["location","city","place"])


tensor([9.9987e-01, 1.2744e-04])

In [20]:
prompting.compute_tokens_prob("Savaş went to Laris to visit the parliament. Laris is a type of [MASK].",  token_list1=["person","man"], token_list2=["location","city","place"])


tensor([0.3263, 0.6737])

In [21]:
prompting.compute_tokens_prob("Savas went to XYZ to visit friends. XYZ is a type of [MASK].", token_list1=["person","man"], token_list2=["location","city","place"])

tensor([0.5516, 0.4484])

# Topic Classification

In [23]:
prompting.prompt_pred("Savas went to Paris to study computer science. he started to learn basic staff like programming, algorithm, operating systemvisit the parliament. The topic is a type of [MASK].")[:10]


[('mathematics', tensor(8.8438)),
 ('computer', tensor(7.9713)),
 ('programming', tensor(7.7146)),
 ('computing', tensor(7.6635)),
 ('math', tensor(7.5142)),
 ('algebra', tensor(7.1716)),
 ('computers', tensor(7.0012)),
 ('game', tensor(6.9694)),
 ('physics', tensor(6.9225)),
 ('computation', tensor(6.8152))]

# Sentence Embeddings

In [27]:
from transformers import pipeline
fe=pipeline("feature-extraction", model=model_path)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [28]:
text="the film is ok. it means [MASK]."
indexed_tokens= tokenizer(text, return_tensors="pt").input_ids
tokenized_text= tokenizer.convert_ids_to_tokens (indexed_tokens[0])
mask_pos= (indexed_tokens[0]== tokenizer.mask_token_id).nonzero().item()
text_emb=fe(text)
mask_emb=text_emb[0][mask_pos]
len(mask_emb)

768