In [1]:
# First, make sure you have the Transformers library installed:

pip install transformers

You should consider upgrading via the '/Users/binod/Documents/software/anaconda/anaconda3/bin/python -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
# Next, we'll import the necessary modules and load a pre-trained BERT model:

from transformers import BertTokenizer, BertForSequenceClassification

# Load the pre-trained BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Load the pre-trained BERT model for sequence classification
model = BertForSequenceClassification.from_pretrained('bert-base-uncased')


HBox(children=(HTML(value='Downloading pytorch_model.bin'), FloatProgress(value=0.0, max=440473133.0), HTML(va…




Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [3]:
# Now, let's use the model to classify some text. 
# First, we'll tokenize the text using the BERT tokenizer, 
# which converts the text into a sequence of tokens that can be fed into the model:

# Tokenize the text
text = "This is a test sentence"
inputs = tokenizer(text, return_tensors='pt')

#The return_tensors='pt' argument tells the tokenizer to return the input as PyTorch tensors, 
# which is the format that the BERT model expects.

In [4]:
inputs

{'input_ids': tensor([[ 101, 2023, 2003, 1037, 3231, 6251,  102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1]])}

In [5]:
# Now, let's use the model to classify the text. We'll pass the tokenized input to the model's forward method, 
# which will return a tuple containing the model's output:

# Classify the text
outputs = model(**inputs)
logits = outputs.logits


In [6]:
logits

tensor([[ 0.2175, -0.3369]], grad_fn=<AddmmBackward0>)

In [7]:
# The logits variable contains the model's predictions for each class. 
# To get the predicted class, we can use the argmax method:

# Get the predicted class
predicted_class = logits.argmax().item()


In [8]:
predicted_class

0

Name Entity Recognition (NER) example

In [9]:
from transformers import AutoTokenizer, AutoModelForTokenClassification

# Load the pre-trained BERT tokenizer and model for NER
tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")


HBox(children=(HTML(value='Downloading (…)okenizer_config.json'), FloatProgress(value=0.0, max=59.0), HTML(val…




HBox(children=(HTML(value='Downloading (…)lve/main/config.json'), FloatProgress(value=0.0, max=829.0), HTML(va…




HBox(children=(HTML(value='Downloading (…)solve/main/vocab.txt'), FloatProgress(value=0.0, max=213450.0), HTML…




HBox(children=(HTML(value='Downloading (…)in/added_tokens.json'), FloatProgress(value=0.0, max=2.0), HTML(valu…




HBox(children=(HTML(value='Downloading (…)cial_tokens_map.json'), FloatProgress(value=0.0, max=112.0), HTML(va…




HBox(children=(HTML(value='Downloading pytorch_model.bin'), FloatProgress(value=0.0, max=433316646.0), HTML(va…




In [10]:
# Next, we'll tokenize some input text using the BERT tokenizer, and pass it to the model for prediction:

# Tokenize the input text
text = "John lives in New York City"
tokens = tokenizer.encode(text, return_tensors='pt')

# Get the predicted NER labels for the tokens
outputs = model(tokens)
predictions = outputs.logits.argmax(dim=-1)
labels = [model.config.id2label[label_id] for label_id in predictions[0].tolist()]


In [11]:
labels

['O', 'B-PER', 'O', 'O', 'B-LOC', 'I-LOC', 'I-LOC', 'O']

sentiment analysis:

In [12]:
# First, we'll load a pre-trained BERT model for sentiment analysis:

from transformers import AutoTokenizer, AutoModelForSequenceClassification

# Load the pre-trained BERT tokenizer and model for sentiment analysis
tokenizer = AutoTokenizer.from_pretrained("textattack/bert-base-uncased-imdb")
model = AutoModelForSequenceClassification.from_pretrained("textattack/bert-base-uncased-imdb")


HBox(children=(HTML(value='Downloading (…)okenizer_config.json'), FloatProgress(value=0.0, max=48.0), HTML(val…




HBox(children=(HTML(value='Downloading (…)lve/main/config.json'), FloatProgress(value=0.0, max=511.0), HTML(va…




HBox(children=(HTML(value='Downloading (…)solve/main/vocab.txt'), FloatProgress(value=0.0, max=231508.0), HTML…




HBox(children=(HTML(value='Downloading (…)cial_tokens_map.json'), FloatProgress(value=0.0, max=112.0), HTML(va…




HBox(children=(HTML(value='Downloading pytorch_model.bin'), FloatProgress(value=0.0, max=437985387.0), HTML(va…




In [15]:
# Next, we'll tokenize some input text using the BERT tokenizer, and pass it to the model for prediction:

# Tokenize the input text
text = "This movie was absolutely amazing! The acting, direction, and visuals were all incredible but songs were very bad."
tokens = tokenizer.encode(text, return_tensors='pt')

# Get the predicted sentiment label for the tokens
outputs = model(tokens)
predictions = outputs.logits.argmax(dim=-1)
label = "positive" if predictions[0].item() == 1 else "negative"


In [16]:
label

'positive'

BERT model for question answering:

In [19]:
import torch
from transformers import AutoTokenizer, AutoModelForQuestionAnswering

# Load the BERT tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
model = AutoModelForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")

# Define the question and context
question = "What is the capital of France?"
context = "France is a country in Europe. Its capital is Paris."

# Tokenize the inputs
inputs = tokenizer.encode_plus(question, context, return_tensors="pt")

# Perform the question-answering task
answer_start_scores, answer_end_scores = model(**inputs).values()
answer_start = torch.argmax(answer_start_scores)
answer_end = torch.argmax(answer_end_scores) + 1
answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(inputs["input_ids"][0][answer_start:answer_end]))

# Print the answer
print(answer)


paris
