## Install Transformers

In [None]:
pip install transformers

In [None]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

sentence = "I am learning Machine Learning"

encoded_sentence = tokenizer(sentence)
encoded_ids = encoded_sentence["input_ids"]
print("Input Ids:", encoded_ids)
print("Tokens:", tokenizer.convert_ids_to_tokens(encoded_ids))

decode_string = tokenizer.decode(encoded_ids)
print("Decoded String:", decode_string)

### Example Results

**Input Ids:** [101, 1045, 2572, 4083, 3698, 4083, 102]

**Tokens:** ['[CLS]', 'i', 'am', 'learning', 'machine', 'learning', '[SEP]']

**Decoded String:** [CLS] i am learning machine learning [SEP]

# Perform following NLP operation using HuggingFace library with Pipeline functions

1. Sentiment analysis
2. Fill Mask
3. NER
4. Q&A
5. Text Generation

## Install torch

In [None]:
pip install torch

## 1. Sentiment Analysis

In [None]:
from transformers import pipeline

classifier = pipeline("sentiment-analysis")

positive_sentence = "I am learning Machine Learning after a long time, which is great!"
positive_result = classifier(positive_sentence)
print("Sentiment Positive")
print("Sentences:", positive_sentence)
print("Sentiment:", positive_result)

negative_sentence = "I am learning Machine Learning but its not working out for me"
negative_result = classifier(negative_sentence)
print("Sentiment Negative")
print("Sentences:", negative_sentence)
print("Sentiment:", negative_result)

### Example Results

#### Sentiment Positive

**Sentences:** I am learning Machine Learning after a long time, which is great!

**Sentiment:** [{'label': 'POSITIVE', 'score': 0.9997515082359314}]

#### Sentiment Negative

**Sentences:** I am learning Machine Learning but its not working out for me
**Sentiment:** [{'label': 'NEGATIVE', 'score': 0.9995772242546082}]

## 2. Fill Mask

In [None]:
from transformers import pipeline

fill_mask = pipeline("fill-mask", model="distilbert/distilroberta-base")

fill_mask_sentence = "I am learning <mask> today, make me can cook spicy food"
fill_mask_result = fill_mask(fill_mask_sentence)
print("Use default model: distilbert/distilroberta-base")
print("Fill Mask Sentence:", fill_mask_sentence)
print("Fill Mask Result:", fill_mask_result)

fill_mask = pipeline("fill-mask", model="bert-base-uncased")

fill_mask_sentence = "I am learning [MASK] today, make me can cook spicy food"
fill_mask_result = fill_mask(fill_mask_sentence)
print("Use default model: bert-base-uncased")
print("Fill Mask Sentence:", fill_mask_sentence)
print("Fill Mask Result:", fill_mask_result)

## 3. Named Entity Recognition (NER)

In [None]:
from transformers import pipeline

classifier = pipeline("ner")

sentence = "My name is Dika and I work as a software engineer."

result = classifier(sentence)
print("grouped_entities=False (default)")
print("Sentence:", sentence)
print("Named Entity Recognition Result:", result)

classifier = pipeline("ner", grouped_entities=True)

result = classifier(sentence)
print("grouped_entities=True")
print("Sentence:", sentence)
print("Named Entity Recognition Result:", result)

No model was supplied, defaulted to dbmdz/bert-large-cased-finetuned-conll03-english and revision 4c53496 (https://huggingface.co/dbmdz/bert-large-cased-finetuned-conll03-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use mps:0
No model

grouped_entities=False (default)
Sentence: My name is Dika and I work as a software engineer.
Named Entity Recognition Result: [{'entity': 'I-PER', 'score': np.float32(0.9990803), 'index': 4, 'word': 'Di', 'start': 11, 'end': 13}, {'entity': 'I-PER', 'score': np.float32(0.9972364), 'index': 5, 'word': '##ka', 'start': 13, 'end': 15}]


Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use mps:0


grouped_entities=False (default)
Sentence: My name is Dika and I work as a software engineer.
Named Entity Recognition Result: [{'entity_group': 'PER', 'score': np.float32(0.99815834), 'word': 'Dika', 'start': 11, 'end': 15}]




### Example Results

#### grouped_entities=False (default)

**Sentence:** My name is Dika and I work as a software engineer.

**Named Entity Recognition Result:** [{'entity': 'I-PER', 'score': np.float32(0.9990803), 'index': 4, 'word': 'Di', 'start': 11, 'end': 13}, {'entity': 'I-PER', 'score': np.float32(0.9972364), 'index': 5, 'word': '##ka', 'start': 13, 'end': 15}]

#### grouped_entities=True

**Sentence:** My name is Dika and I work as a software engineer.

**Named Entity Recognition Result:** [{'entity_group': 'PER', 'score': np.float32(0.99815834), 'word': 'Dika', 'start': 11, 'end': 15}]