In [7]:
import ecco
import json
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModel, AutoModelForSeq2SeqLM

In [8]:
device  = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_dir = "hyunwoongko/kobart"

In [9]:
tokenizer = AutoTokenizer.from_pretrained(model_dir)
model = AutoModelForSeq2SeqLM.from_pretrained(model_dir)
# model = AutoModel.from_pretrained(model_dir)

## Making Model Config
1. Find Embedding Layer (Encoder)

In [10]:
model

BartForConditionalGeneration(
  (model): BartModel(
    (shared): Embedding(30000, 768, padding_idx=3)
    (encoder): BartEncoder(
      (embed_tokens): Embedding(30000, 768, padding_idx=3)
      (embed_positions): BartLearnedPositionalEmbedding(1028, 768)
      (layers): ModuleList(
        (0): BartEncoderLayer(
          (self_attn): BartAttention(
            (k_proj): Linear(in_features=768, out_features=768, bias=True)
            (v_proj): Linear(in_features=768, out_features=768, bias=True)
            (q_proj): Linear(in_features=768, out_features=768, bias=True)
            (out_proj): Linear(in_features=768, out_features=768, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (activation_fn): GELUActivation()
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (fc2): Linear(in_features=3072, out_features=768, bias=True)
          (final_layer_norm): LayerNorm((768,), eps=1e-05,

In [11]:
model.model.encoder.embed_tokens

Embedding(30000, 768, padding_idx=3)

In [12]:
model.model.encoder.layers[0]
# .layer[0].intermediate.dense

BartEncoderLayer(
  (self_attn): BartAttention(
    (k_proj): Linear(in_features=768, out_features=768, bias=True)
    (v_proj): Linear(in_features=768, out_features=768, bias=True)
    (q_proj): Linear(in_features=768, out_features=768, bias=True)
    (out_proj): Linear(in_features=768, out_features=768, bias=True)
  )
  (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  (activation_fn): GELUActivation()
  (fc1): Linear(in_features=768, out_features=3072, bias=True)
  (fc2): Linear(in_features=3072, out_features=768, bias=True)
  (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
)

In [13]:
# https://github.com/jalammar/ecco/issues/44
# "ModuleAttributeError: 'Embedding' object has no attribute 'shape'"
# Embedding layer is "model.encoder.embed_tokens" but it calls nn.Embeddings object -> call .weight to avoid error

model_config = {
    # 'embedding': "embeddings.word_embeddings",
    'embedding': "model.encoder.embed_tokens.weight",
    'type': 'enc-dec',
    'activations': ['intermediate\.dense'], #This is a regex
    'token_prefix': '▁',
    'partial_token_prefix': ''
}

In [14]:
ecco_model = ecco.from_pretrained(model_dir, 
                                  activations=False,
                                  model_config=model_config)

In [31]:
# BEAM-5
# decode_params = {
#             "beam_size": 5,
#             "temperature": 0.8,
#             "do_sample": False,
#             "repetition_penalty": 1.2,
#             "no_repeat_ngram_size": 3,
#             "early_stopping": True
#         }

# Top-P
decode_params = {
            "top_p": 0.8,
            "temperature": 0.8,
            "do_sample": True,
            "repetition_penalty": 1.2,
            "no_repeat_ngram_size": 3,
            "early_stopping": True
        }

query = "안녕하세요"
output = ecco_model.generate(query, generate = 5,  attribution = ["ig"], **decode_params)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [32]:
output.primary_attributions(attr_method = "ig", style = "detailed")

<IPython.core.display.Javascript object>