In [1]:
!date "+[%F %R:%S] [INIT] 1_QuickTour.ipynb ($CONDA_DEFAULT_ENV)"
import time
t0 = time.time()

[2020-10-23 17:56:09] [INIT] HelloTransformers1.ipynb (trans)


## Environment

In [2]:
!printf "Using NVidia Driver: "
!nvidia-smi
import torch
print('\n')
print(f'Using pytorch [{torch.__version__}] with {"cuda" if torch.cuda.is_available() else "cpu"}')

import transformers
print(f'Using transformers [{transformers.__version__}]')

Using NVidia Driver: Fri Oct 23 17:56:09 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.95.01    Driver Version: 440.95.01    CUDA Version: 10.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  TITAN RTX           Off  | 00000000:3B:00.0 Off |                  N/A |
| 41%   29C    P8    10W / 280W |     11MiB / 24220MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   1  TITAN RTX           Off  | 00000000:D9:00.0 Off |                  N/A |
| 41%   30C    P8     5W / 280W |     11MiB / 24220MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                   

# Quick tour
- https://huggingface.co/transformers/quicktour.html

## Getting started on a task with a pipeline

In [3]:
from transformers import pipeline
classifier = pipeline('sentiment-analysis', model="nlptown/bert-base-multilingual-uncased-sentiment")
res1 = classifier('We are very happy to show you the 🤗 Transformers library.')
classifier = pipeline('sentiment-analysis')
res2 = classifier('We are very happy to show you the 🤗 Transformers library.')
res1, res2

([{'label': '5 stars', 'score': 0.772534966468811}],
 [{'label': 'POSITIVE', 'score': 0.9997795224189758}])

In [4]:
results = classifier(["We are very happy to show you the 🤗 Transformers library.", "We hope you don't hate it."])
for result in results:
    print(f"label: {result['label']}, with score: {round(result['score'], 4)}")

label: POSITIVE, with score: 0.9998
label: NEGATIVE, with score: 0.5309


In [5]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
classifier = pipeline('sentiment-analysis', model=model, tokenizer=tokenizer)
classifier

<transformers.pipelines.TextClassificationPipeline at 0x7f771938e0a0>

## Under the hood: pretrained models

In [6]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
pt_model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer

<transformers.tokenization_distilbert.DistilBertTokenizer at 0x7f7649ef41f0>

### Using the tokenizer

In [7]:
inputs = tokenizer("We are very happy to show you the 🤗 Transformers library.")
inputs

{'input_ids': [101, 2057, 2024, 2200, 3407, 2000, 2265, 2017, 1996, 100, 19081, 3075, 1012, 102], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}

In [8]:
pt_batch = tokenizer(
    ["We are very happy to show you the 🤗 Transformers library.", "We hope you don't hate it."],
    padding=True,
    truncation=True,
    return_tensors="pt"
)
pt_batch

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


{'input_ids': tensor([[  101,  2057,  2024,  2200,  3407,  2000,  2265,  2017,  1996,   100,
         19081,  3075,  1012,   102],
        [  101,  2057,  3246,  2017,  2123,  1005,  1056,  5223,  2009,  1012,
           102,     0,     0,     0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0]])}

In [9]:
for key, value in pt_batch.items():
    print(f"{key}: {value.numpy().tolist()}")

input_ids: [[101, 2057, 2024, 2200, 3407, 2000, 2265, 2017, 1996, 100, 19081, 3075, 1012, 102], [101, 2057, 3246, 2017, 2123, 1005, 1056, 5223, 2009, 1012, 102, 0, 0, 0]]
attention_mask: [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0]]


### Using the model

In [10]:
pt_outputs = pt_model(**pt_batch)
pt_outputs

(tensor([[-4.0833,  4.3364],
         [ 0.0818, -0.0418]], grad_fn=<AddmmBackward>),)

In [11]:
import torch.nn.functional as F
pt_predictions = F.softmax(pt_outputs[0], dim=-1)
pt_predictions

tensor([[2.2043e-04, 9.9978e-01],
        [5.3086e-01, 4.6914e-01]], grad_fn=<SoftmaxBackward>)

In [12]:
import torch
pt_outputs = pt_model(**pt_batch, labels = torch.tensor([1, 0]))
pt_outputs

(tensor(0.3167, grad_fn=<NllLossBackward>),
 tensor([[-4.0833,  4.3364],
         [ 0.0818, -0.0418]], grad_fn=<AddmmBackward>))

In [13]:
pt_outputs = pt_model(**pt_batch, output_hidden_states=True, output_attentions=True)
all_hidden_states, all_attentions = pt_outputs[-2:]

### Accessing the code

In [14]:
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
model = DistilBertForSequenceClassification.from_pretrained(model_name)
tokenizer = DistilBertTokenizer.from_pretrained(model_name)
tokenizer

<transformers.tokenization_distilbert.DistilBertTokenizer at 0x7f764858bfa0>

### Customizing the model

In [15]:
from transformers import DistilBertConfig, DistilBertTokenizer, DistilBertForSequenceClassification
config = DistilBertConfig(n_heads=8, dim=512, hidden_dim=4*512)
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
model = DistilBertForSequenceClassification(config)
model

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 512, padding_idx=0)
      (position_embeddings): Embedding(512, 512)
      (LayerNorm): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0): TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=512, out_features=512, bias=True)
            (k_lin): Linear(in_features=512, out_features=512, bias=True)
            (v_lin): Linear(in_features=512, out_features=512, bias=True)
            (out_lin): Linear(in_features=512, out_features=512, bias=True)
          )
          (sa_layer_norm): LayerNorm((512,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
       

In [16]:
from transformers import DistilBertConfig, DistilBertTokenizer, DistilBertForSequenceClassification
model_name = "distilbert-base-uncased"
model = DistilBertForSequenceClassification.from_pretrained(model_name, num_labels=10)
tokenizer = DistilBertTokenizer.from_pretrained(model_name)
tokenizer

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

<transformers.tokenization_distilbert.DistilBertTokenizer at 0x7f764b2fd9a0>

In [17]:
!date "+[%F %R:%S] [EXIT] 1_QuickTour.ipynb ($CONDA_DEFAULT_ENV)"
print(f"td={time.time() - t0:.3f}")

[2020-10-23 17:57:02] [EXIT] HelloTransformers1.ipynb (trans)
td=53.053
