# BART

- https://huggingface.co/docs/transformers/model_doc/bart#mask-filling

In [4]:
import transformers

### Github Issue

우선 친절한 답변 감사드립니다!
제가 착오가 있었네요
기존의 BART 모델과 비교하여 약간 다른 점이 있는 것 같은데 맞는지 궁금합니다.

1. 영어 Bart 모델의 경우엔 아래와 같이 encoder input으로 bos 토큰도 입력하는데, KoBart에선 사용하지 않나요?
![](https://user-images.githubusercontent.com/43404665/146330976-783ab949-3401-46b1-afa9-71206ddfa062.png)

1. 아래와 같이 영어 Bart에선 decoder bos 토큰으로 eos 토큰을 사용하는데 KoBart는 bos를 사용한건가요?

https://github.com/huggingface/transformers/blob/master/src/transformers/models/bart/configuration_bart.py#L138

In [5]:
from transformers import BartForConditionalGeneration, BartTokenizer

tok = BartTokenizer.from_pretrained("facebook/bart-large")
example_english_phrase = "UN Chief Says There Is No <mask> in Syria"

print(tok(example_english_phrase))
print(tok.convert_ids_to_tokens(tok(example_english_phrase).input_ids))

{'input_ids': [0, 4154, 1231, 15674, 345, 1534, 440, 50264, 11, 1854, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
['<s>', 'UN', 'ĠChief', 'ĠSays', 'ĠThere', 'ĠIs', 'ĠNo', '<mask>', 'Ġin', 'ĠSyria', '</s>']


### Mask Filling

In [6]:
from transformers import BartForConditionalGeneration, BartTokenizer

model = BartForConditionalGeneration.from_pretrained("facebook/bart-large", forced_bos_token_id=0)
tok = BartTokenizer.from_pretrained("facebook/bart-large")
example_english_phrase = "UN Chief Says There Is No <mask> in Syria"
batch = tok(example_english_phrase, return_tensors='pt')
generated_ids = model.generate(batch['input_ids'])

print(tok.batch_decode(generated_ids, skip_special_tokens=False))
assert tok.batch_decode(generated_ids, skip_special_tokens=True) == ['UN Chief Says There Is No Plan to Stop Chemical Weapons in Syria']

2022-01-07 18:39:26.727598: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-01-07 18:39:26.727651: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


['</s><s>UN Chief Says There Is No Plan to Stop Chemical Weapons in Syria</s>']


In [7]:
print(batch.keys())
for k in batch:
    print(k, batch[k])

dict_keys(['input_ids', 'attention_mask'])
input_ids tensor([[    0,  4154,  1231, 15674,   345,  1534,   440, 50264,    11,  1854,
             2]])
attention_mask tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])


In [8]:
from transformers import BartModel, BartConfig

# Initializing a BART facebook/bart-large style configuration
configuration = BartConfig()

# Initializing a model from the facebook/bart-large style configuration
model = BartModel(configuration)

# Accessing the model configuration
configuration = model.config

In [9]:
print(configuration)

BartConfig {
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "attention_dropout": 0.0,
  "bos_token_id": 0,
  "classifier_dropout": 0.0,
  "d_model": 1024,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 4096,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 12,
  "decoder_start_token_id": 2,
  "dropout": 0.1,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 4096,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 12,
  "eos_token_id": 2,
  "forced_eos_token_id": 2,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2"
  },
  "init_std": 0.02,
  "is_encoder_decoder": true,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2
  },
  "max_position_embeddings": 1024,
  "model_type": "bart",
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "scale_embedding": false,
  "transformers_version": "4.15.0",
  "use_cache": true,
  "vocab_size": 50265
}



### BartModel

In [10]:
from transformers import BartTokenizer, BartModel

tokenizer = BartTokenizer.from_pretrained('facebook/bart-large')
model = BartModel.from_pretrained('facebook/bart-large')

inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
outputs = model(**inputs)

last_hidden_states = outputs.last_hidden_state

In [11]:
print(type(tokenizer))
print(type(model))
print(inputs)
print(outputs.keys())
for k in outputs:
    print(k, outputs[k])

<class 'transformers.models.bart.tokenization_bart.BartTokenizer'>
<class 'transformers.models.bart.modeling_bart.BartModel'>
{'input_ids': tensor([[    0, 31414,     6,   127,  2335,    16, 11962,     2]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1]])}
odict_keys(['last_hidden_state', 'past_key_values', 'encoder_last_hidden_state'])
last_hidden_state tensor([[[ 0.5512,  0.8389, -1.4707,  ...,  1.3124, -0.2047,  0.2392],
         [ 0.5512,  0.8389, -1.4707,  ...,  1.3124, -0.2047,  0.2392],
         [ 0.9143,  0.9399, -1.2426,  ...,  0.9184, -0.1838, -0.9975],
         ...,
         [ 0.2561,  0.2253,  0.4470,  ...,  0.3447,  0.0087,  1.5508],
         [ 0.2077, -1.3086, -1.4295,  ..., -0.2998,  0.1828,  0.4700],
         [-0.4893,  2.5148, -1.5513,  ...,  0.5783,  1.0961,  0.1736]]],
       grad_fn=<NativeLayerNormBackward0>)
past_key_values ((tensor([[[[ 2.7406e-01,  4.6150e-01,  1.9901e+00,  ...,  9.9268e-01,
            2.8700e+00, -1.3778e+00],
          [-7.4523e-02,  6.4

In [12]:
?transformers.models.bart.modeling_bart.BartModel

[0;31mInit signature:[0m
[0mtransformers[0m[0;34m.[0m[0mmodels[0m[0;34m.[0m[0mbart[0m[0;34m.[0m[0mmodeling_bart[0m[0;34m.[0m[0mBartModel[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mconfig[0m[0;34m:[0m [0mtransformers[0m[0;34m.[0m[0mmodels[0m[0;34m.[0m[0mbart[0m[0;34m.[0m[0mconfiguration_bart[0m[0;34m.[0m[0mBartConfig[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m     
The bare BART Model outputting raw hidden-states without any specific head on top.
This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic
methods the library implements for all its model (such as downloading or saving, resizing the input embeddings,
pruning heads etc.)

This model is also a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module)
subclass. Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to
general u

### Generate Summary

In [13]:
from transformers import BartTokenizer, BartForConditionalGeneration, BartConfig

model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn") 
tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")

ARTICLE_TO_SUMMARIZE = "My friends are cool but they eat too many carbs." 
inputs = tokenizer([ARTICLE_TO_SUMMARIZE], max_length=1024, return_tensors="pt")

summary_ids = model.generate(inputs["input_ids"], num_beams=4, max_length=5, early_stopping=True) 
print([tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summary_ids])

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


['My friends']


### Mask filling

In [14]:
from transformers import BartTokenizer, BartForConditionalGeneration 
tokenizer = BartTokenizer.from_pretrained("facebook/bart-large") 
TXT = "My friends are <mask> but they eat too many carbs."
print("input:", TXT)

model = BartForConditionalGeneration.from_pretrained("facebook/bart-large", forced_bos_token_id=0) 
input_ids = tokenizer([TXT], return_tensors="pt")["input_ids"] 
outputs = model(input_ids)
logits = outputs.logits

masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item() 
probs = logits[0, masked_index].softmax(dim=0) 
values, predictions = probs.topk(5)
print("candidates:", tokenizer.decode(predictions))   # candidate output tokens for mask token

generated_ids = model.generate(input_ids)
print("output:", tok.batch_decode(generated_ids, skip_special_tokens=False)[0])

input: My friends are <mask> but they eat too many carbs.
candidates:  good great all really very
output: </s><s>My friends are good people, but they eat too many carbs.</s>


In [15]:
print("mask_token_id:", tokenizer.mask_token_id)
print("input_ids:", input_ids)
print((input_ids[0] == tokenizer.mask_token_id))
print("masked_index:" ,masked_index)

print("outputs:", outputs)
print("logits:", logits.shape, logits)
print(logits[0, masked_index])

mask_token_id: 50264
input_ids: tensor([[    0,  2387,   964,    32, 50264,    53,    51,  3529,   350,   171,
         33237,     4,     2]])
tensor([False, False, False, False,  True, False, False, False, False, False,
        False, False, False])
masked_index: 4
outputs: Seq2SeqLMOutput(loss=None, logits=tensor([[[ 14.0215,  -1.0324,  10.7964,  ...,  -1.8337,  -2.2583,   6.7222],
         [ 14.0215,  -1.0324,  10.7965,  ...,  -1.8337,  -2.2583,   6.7222],
         [-13.3413,  -4.3637,   1.8103,  ...,  -6.0602,  -6.7870,   0.7438],
         ...,
         [ -9.9913,  -3.2930,   7.0242,  ...,  -1.7956,  -1.8567,   3.7523],
         [-18.4161,  -3.7959,   7.0068,  ...,  -2.8675,  -2.5782,   4.7485],
         [ -2.4355,  -4.5476,  16.5449,  ...,  -6.4282,  -6.0449,  -0.5252]]],
       grad_fn=<AddBackward0>), past_key_values=((tensor([[[[ 0.2741,  0.4615,  1.9901,  ...,  0.9927,  2.8700, -1.3778],
          [-0.0745,  0.6451,  1.7084,  ...,  0.9115,  2.5264, -1.6802],
          [-2.2453