Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pegasus variables mapping #9

Open
huseinzol05 opened this issue Jan 30, 2021 · 1 comment
Open

Pegasus variables mapping #9

huseinzol05 opened this issue Jan 30, 2021 · 1 comment

Comments

@huseinzol05
Copy link

huseinzol05 commented Jan 30, 2021

I have my own pretrained Pegasus model, now I want to finetune using BigBird, so this is my mapping function,

import re
import collections

def get_assignment_map_from_checkpoint(tvars, init_checkpoint):
    """Compute the union of the current variables and checkpoint variables."""
    assignment_map = {}
    initialized_variable_names = {}

    name_to_variable = collections.OrderedDict()
    for var in tvars:
        name = var.name
        m = re.match('^(.*):\\d+$', name)
        if m is not None:
            name = m.group(1)
        name_to_variable[name] = var

    init_vars = tf.train.list_variables(init_checkpoint)
    assignment_map = collections.OrderedDict()
    for x in init_vars:
        (name, var) = (x[0], x[1])

        l = 'pegasus/' + name
        l = l.replace('embeddings/weights', 'embeddings/word_embeddings')
        l = l.replace('self/output', 'output')
        l = l.replace('ffn/dense_1', 'output/dense')
        l = l.replace('ffn', 'intermediate')
        l = l.replace('memory_attention/output', 'attention/encdec_output')
        l = l.replace('memory_attention', 'attention/encdec')

        if l not in name_to_variable:
            continue
        assignment_map[name] = name_to_variable[l]
        initialized_variable_names[l + ':0'] = 1

    return (assignment_map, initialized_variable_names)

output,

OrderedDict([('decoder/LayerNorm/beta',
              <tf.Variable 'pegasus/decoder/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>),
             ('decoder/LayerNorm/gamma',
              <tf.Variable 'pegasus/decoder/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_0/attention/self/LayerNorm/beta',
              <tf.Variable 'pegasus/decoder/layer_0/attention/self/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_0/attention/self/LayerNorm/gamma',
              <tf.Variable 'pegasus/decoder/layer_0/attention/self/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_0/attention/self/key/kernel',
              <tf.Variable 'pegasus/decoder/layer_0/attention/self/key/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_0/attention/self/output/dense/kernel',
              <tf.Variable 'pegasus/decoder/layer_0/attention/output/dense/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_0/attention/self/query/kernel',
              <tf.Variable 'pegasus/decoder/layer_0/attention/self/query/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_0/attention/self/value/kernel',
              <tf.Variable 'pegasus/decoder/layer_0/attention/self/value/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_0/ffn/LayerNorm/beta',
              <tf.Variable 'pegasus/decoder/layer_0/intermediate/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_0/ffn/LayerNorm/gamma',
              <tf.Variable 'pegasus/decoder/layer_0/intermediate/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_0/ffn/dense/bias',
              <tf.Variable 'pegasus/decoder/layer_0/intermediate/dense/bias:0' shape=(3072,) dtype=float32_ref>),
             ('decoder/layer_0/ffn/dense/kernel',
              <tf.Variable 'pegasus/decoder/layer_0/intermediate/dense/kernel:0' shape=(512, 3072) dtype=float32_ref>),
             ('decoder/layer_0/ffn/dense_1/bias',
              <tf.Variable 'pegasus/decoder/layer_0/output/dense/bias:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_0/ffn/dense_1/kernel',
              <tf.Variable 'pegasus/decoder/layer_0/output/dense/kernel:0' shape=(3072, 512) dtype=float32_ref>),
             ('decoder/layer_0/memory_attention/LayerNorm/beta',
              <tf.Variable 'pegasus/decoder/layer_0/attention/encdec/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_0/memory_attention/LayerNorm/gamma',
              <tf.Variable 'pegasus/decoder/layer_0/attention/encdec/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_0/memory_attention/key/kernel',
              <tf.Variable 'pegasus/decoder/layer_0/attention/encdec/key/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_0/memory_attention/output/dense/kernel',
              <tf.Variable 'pegasus/decoder/layer_0/attention/encdec_output/dense/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_0/memory_attention/query/kernel',
              <tf.Variable 'pegasus/decoder/layer_0/attention/encdec/query/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_0/memory_attention/value/kernel',
              <tf.Variable 'pegasus/decoder/layer_0/attention/encdec/value/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_1/attention/self/LayerNorm/beta',
              <tf.Variable 'pegasus/decoder/layer_1/attention/self/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_1/attention/self/LayerNorm/gamma',
              <tf.Variable 'pegasus/decoder/layer_1/attention/self/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_1/attention/self/key/kernel',
              <tf.Variable 'pegasus/decoder/layer_1/attention/self/key/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_1/attention/self/output/dense/kernel',
              <tf.Variable 'pegasus/decoder/layer_1/attention/output/dense/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_1/attention/self/query/kernel',
              <tf.Variable 'pegasus/decoder/layer_1/attention/self/query/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_1/attention/self/value/kernel',
              <tf.Variable 'pegasus/decoder/layer_1/attention/self/value/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_1/ffn/LayerNorm/beta',
              <tf.Variable 'pegasus/decoder/layer_1/intermediate/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_1/ffn/LayerNorm/gamma',
              <tf.Variable 'pegasus/decoder/layer_1/intermediate/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_1/ffn/dense/bias',
              <tf.Variable 'pegasus/decoder/layer_1/intermediate/dense/bias:0' shape=(3072,) dtype=float32_ref>),
             ('decoder/layer_1/ffn/dense/kernel',
              <tf.Variable 'pegasus/decoder/layer_1/intermediate/dense/kernel:0' shape=(512, 3072) dtype=float32_ref>),
             ('decoder/layer_1/ffn/dense_1/bias',
              <tf.Variable 'pegasus/decoder/layer_1/output/dense/bias:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_1/ffn/dense_1/kernel',
              <tf.Variable 'pegasus/decoder/layer_1/output/dense/kernel:0' shape=(3072, 512) dtype=float32_ref>),
             ('decoder/layer_1/memory_attention/LayerNorm/beta',
              <tf.Variable 'pegasus/decoder/layer_1/attention/encdec/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_1/memory_attention/LayerNorm/gamma',
              <tf.Variable 'pegasus/decoder/layer_1/attention/encdec/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_1/memory_attention/key/kernel',
              <tf.Variable 'pegasus/decoder/layer_1/attention/encdec/key/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_1/memory_attention/output/dense/kernel',
              <tf.Variable 'pegasus/decoder/layer_1/attention/encdec_output/dense/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_1/memory_attention/query/kernel',
              <tf.Variable 'pegasus/decoder/layer_1/attention/encdec/query/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_1/memory_attention/value/kernel',
              <tf.Variable 'pegasus/decoder/layer_1/attention/encdec/value/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_2/attention/self/LayerNorm/beta',
              <tf.Variable 'pegasus/decoder/layer_2/attention/self/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_2/attention/self/LayerNorm/gamma',
              <tf.Variable 'pegasus/decoder/layer_2/attention/self/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_2/attention/self/key/kernel',
              <tf.Variable 'pegasus/decoder/layer_2/attention/self/key/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_2/attention/self/output/dense/kernel',
              <tf.Variable 'pegasus/decoder/layer_2/attention/output/dense/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_2/attention/self/query/kernel',
              <tf.Variable 'pegasus/decoder/layer_2/attention/self/query/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_2/attention/self/value/kernel',
              <tf.Variable 'pegasus/decoder/layer_2/attention/self/value/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_2/ffn/LayerNorm/beta',
              <tf.Variable 'pegasus/decoder/layer_2/intermediate/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_2/ffn/LayerNorm/gamma',
              <tf.Variable 'pegasus/decoder/layer_2/intermediate/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_2/ffn/dense/bias',
              <tf.Variable 'pegasus/decoder/layer_2/intermediate/dense/bias:0' shape=(3072,) dtype=float32_ref>),
             ('decoder/layer_2/ffn/dense/kernel',
              <tf.Variable 'pegasus/decoder/layer_2/intermediate/dense/kernel:0' shape=(512, 3072) dtype=float32_ref>),
             ('decoder/layer_2/ffn/dense_1/bias',
              <tf.Variable 'pegasus/decoder/layer_2/output/dense/bias:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_2/ffn/dense_1/kernel',
              <tf.Variable 'pegasus/decoder/layer_2/output/dense/kernel:0' shape=(3072, 512) dtype=float32_ref>),
             ('decoder/layer_2/memory_attention/LayerNorm/beta',
              <tf.Variable 'pegasus/decoder/layer_2/attention/encdec/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_2/memory_attention/LayerNorm/gamma',
              <tf.Variable 'pegasus/decoder/layer_2/attention/encdec/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_2/memory_attention/key/kernel',
              <tf.Variable 'pegasus/decoder/layer_2/attention/encdec/key/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_2/memory_attention/output/dense/kernel',
              <tf.Variable 'pegasus/decoder/layer_2/attention/encdec_output/dense/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_2/memory_attention/query/kernel',
              <tf.Variable 'pegasus/decoder/layer_2/attention/encdec/query/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_2/memory_attention/value/kernel',
              <tf.Variable 'pegasus/decoder/layer_2/attention/encdec/value/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_3/attention/self/LayerNorm/beta',
              <tf.Variable 'pegasus/decoder/layer_3/attention/self/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_3/attention/self/LayerNorm/gamma',
              <tf.Variable 'pegasus/decoder/layer_3/attention/self/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_3/attention/self/key/kernel',
              <tf.Variable 'pegasus/decoder/layer_3/attention/self/key/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_3/attention/self/output/dense/kernel',
              <tf.Variable 'pegasus/decoder/layer_3/attention/output/dense/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_3/attention/self/query/kernel',
              <tf.Variable 'pegasus/decoder/layer_3/attention/self/query/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_3/attention/self/value/kernel',
              <tf.Variable 'pegasus/decoder/layer_3/attention/self/value/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_3/ffn/LayerNorm/beta',
              <tf.Variable 'pegasus/decoder/layer_3/intermediate/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_3/ffn/LayerNorm/gamma',
              <tf.Variable 'pegasus/decoder/layer_3/intermediate/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_3/ffn/dense/bias',
              <tf.Variable 'pegasus/decoder/layer_3/intermediate/dense/bias:0' shape=(3072,) dtype=float32_ref>),
             ('decoder/layer_3/ffn/dense/kernel',
              <tf.Variable 'pegasus/decoder/layer_3/intermediate/dense/kernel:0' shape=(512, 3072) dtype=float32_ref>),
             ('decoder/layer_3/ffn/dense_1/bias',
              <tf.Variable 'pegasus/decoder/layer_3/output/dense/bias:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_3/ffn/dense_1/kernel',
              <tf.Variable 'pegasus/decoder/layer_3/output/dense/kernel:0' shape=(3072, 512) dtype=float32_ref>),
             ('decoder/layer_3/memory_attention/LayerNorm/beta',
              <tf.Variable 'pegasus/decoder/layer_3/attention/encdec/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_3/memory_attention/LayerNorm/gamma',
              <tf.Variable 'pegasus/decoder/layer_3/attention/encdec/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_3/memory_attention/key/kernel',
              <tf.Variable 'pegasus/decoder/layer_3/attention/encdec/key/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_3/memory_attention/output/dense/kernel',
              <tf.Variable 'pegasus/decoder/layer_3/attention/encdec_output/dense/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_3/memory_attention/query/kernel',
              <tf.Variable 'pegasus/decoder/layer_3/attention/encdec/query/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_3/memory_attention/value/kernel',
              <tf.Variable 'pegasus/decoder/layer_3/attention/encdec/value/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_4/attention/self/LayerNorm/beta',
              <tf.Variable 'pegasus/decoder/layer_4/attention/self/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_4/attention/self/LayerNorm/gamma',
              <tf.Variable 'pegasus/decoder/layer_4/attention/self/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_4/attention/self/key/kernel',
              <tf.Variable 'pegasus/decoder/layer_4/attention/self/key/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_4/attention/self/output/dense/kernel',
              <tf.Variable 'pegasus/decoder/layer_4/attention/output/dense/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_4/attention/self/query/kernel',
              <tf.Variable 'pegasus/decoder/layer_4/attention/self/query/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_4/attention/self/value/kernel',
              <tf.Variable 'pegasus/decoder/layer_4/attention/self/value/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_4/ffn/LayerNorm/beta',
              <tf.Variable 'pegasus/decoder/layer_4/intermediate/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_4/ffn/LayerNorm/gamma',
              <tf.Variable 'pegasus/decoder/layer_4/intermediate/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_4/ffn/dense/bias',
              <tf.Variable 'pegasus/decoder/layer_4/intermediate/dense/bias:0' shape=(3072,) dtype=float32_ref>),
             ('decoder/layer_4/ffn/dense/kernel',
              <tf.Variable 'pegasus/decoder/layer_4/intermediate/dense/kernel:0' shape=(512, 3072) dtype=float32_ref>),
             ('decoder/layer_4/ffn/dense_1/bias',
              <tf.Variable 'pegasus/decoder/layer_4/output/dense/bias:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_4/ffn/dense_1/kernel',
              <tf.Variable 'pegasus/decoder/layer_4/output/dense/kernel:0' shape=(3072, 512) dtype=float32_ref>),
             ('decoder/layer_4/memory_attention/LayerNorm/beta',
              <tf.Variable 'pegasus/decoder/layer_4/attention/encdec/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_4/memory_attention/LayerNorm/gamma',
              <tf.Variable 'pegasus/decoder/layer_4/attention/encdec/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_4/memory_attention/key/kernel',
              <tf.Variable 'pegasus/decoder/layer_4/attention/encdec/key/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_4/memory_attention/output/dense/kernel',
              <tf.Variable 'pegasus/decoder/layer_4/attention/encdec_output/dense/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_4/memory_attention/query/kernel',
              <tf.Variable 'pegasus/decoder/layer_4/attention/encdec/query/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_4/memory_attention/value/kernel',
              <tf.Variable 'pegasus/decoder/layer_4/attention/encdec/value/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_5/attention/self/LayerNorm/beta',
              <tf.Variable 'pegasus/decoder/layer_5/attention/self/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_5/attention/self/LayerNorm/gamma',
              <tf.Variable 'pegasus/decoder/layer_5/attention/self/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_5/attention/self/key/kernel',
              <tf.Variable 'pegasus/decoder/layer_5/attention/self/key/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_5/attention/self/output/dense/kernel',
              <tf.Variable 'pegasus/decoder/layer_5/attention/output/dense/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_5/attention/self/query/kernel',
              <tf.Variable 'pegasus/decoder/layer_5/attention/self/query/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_5/attention/self/value/kernel',
              <tf.Variable 'pegasus/decoder/layer_5/attention/self/value/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_5/ffn/LayerNorm/beta',
              <tf.Variable 'pegasus/decoder/layer_5/intermediate/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_5/ffn/LayerNorm/gamma',
              <tf.Variable 'pegasus/decoder/layer_5/intermediate/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_5/ffn/dense/bias',
              <tf.Variable 'pegasus/decoder/layer_5/intermediate/dense/bias:0' shape=(3072,) dtype=float32_ref>),
             ('decoder/layer_5/ffn/dense/kernel',
              <tf.Variable 'pegasus/decoder/layer_5/intermediate/dense/kernel:0' shape=(512, 3072) dtype=float32_ref>),
             ('decoder/layer_5/ffn/dense_1/bias',
              <tf.Variable 'pegasus/decoder/layer_5/output/dense/bias:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_5/ffn/dense_1/kernel',
              <tf.Variable 'pegasus/decoder/layer_5/output/dense/kernel:0' shape=(3072, 512) dtype=float32_ref>),
             ('decoder/layer_5/memory_attention/LayerNorm/beta',
              <tf.Variable 'pegasus/decoder/layer_5/attention/encdec/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_5/memory_attention/LayerNorm/gamma',
              <tf.Variable 'pegasus/decoder/layer_5/attention/encdec/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>),
             ('decoder/layer_5/memory_attention/key/kernel',
              <tf.Variable 'pegasus/decoder/layer_5/attention/encdec/key/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_5/memory_attention/output/dense/kernel',
              <tf.Variable 'pegasus/decoder/layer_5/attention/encdec_output/dense/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_5/memory_attention/query/kernel',
              <tf.Variable 'pegasus/decoder/layer_5/attention/encdec/query/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('decoder/layer_5/memory_attention/value/kernel',
              <tf.Variable 'pegasus/decoder/layer_5/attention/encdec/value/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('embeddings/weights',
              <tf.Variable 'pegasus/embeddings/word_embeddings:0' shape=(32128, 512) dtype=float32_ref>),
             ('encoder/LayerNorm/beta',
              <tf.Variable 'pegasus/encoder/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>),
             ('encoder/LayerNorm/gamma',
              <tf.Variable 'pegasus/encoder/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>),
             ('encoder/layer_0/attention/self/LayerNorm/beta',
              <tf.Variable 'pegasus/encoder/layer_0/attention/self/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>),
             ('encoder/layer_0/attention/self/LayerNorm/gamma',
              <tf.Variable 'pegasus/encoder/layer_0/attention/self/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>),
             ('encoder/layer_0/attention/self/key/kernel',
              <tf.Variable 'pegasus/encoder/layer_0/attention/self/key/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('encoder/layer_0/attention/self/output/dense/kernel',
              <tf.Variable 'pegasus/encoder/layer_0/attention/output/dense/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('encoder/layer_0/attention/self/query/kernel',
              <tf.Variable 'pegasus/encoder/layer_0/attention/self/query/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('encoder/layer_0/attention/self/value/kernel',
              <tf.Variable 'pegasus/encoder/layer_0/attention/self/value/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('encoder/layer_0/ffn/LayerNorm/beta',
              <tf.Variable 'pegasus/encoder/layer_0/intermediate/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>),
             ('encoder/layer_0/ffn/LayerNorm/gamma',
              <tf.Variable 'pegasus/encoder/layer_0/intermediate/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>),
             ('encoder/layer_0/ffn/dense/bias',
              <tf.Variable 'pegasus/encoder/layer_0/intermediate/dense/bias:0' shape=(3072,) dtype=float32_ref>),
             ('encoder/layer_0/ffn/dense/kernel',
              <tf.Variable 'pegasus/encoder/layer_0/intermediate/dense/kernel:0' shape=(512, 3072) dtype=float32_ref>),
             ('encoder/layer_0/ffn/dense_1/bias',
              <tf.Variable 'pegasus/encoder/layer_0/output/dense/bias:0' shape=(512,) dtype=float32_ref>),
             ('encoder/layer_0/ffn/dense_1/kernel',
              <tf.Variable 'pegasus/encoder/layer_0/output/dense/kernel:0' shape=(3072, 512) dtype=float32_ref>),
             ('encoder/layer_1/attention/self/LayerNorm/beta',
              <tf.Variable 'pegasus/encoder/layer_1/attention/self/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>),
             ('encoder/layer_1/attention/self/LayerNorm/gamma',
              <tf.Variable 'pegasus/encoder/layer_1/attention/self/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>),
             ('encoder/layer_1/attention/self/key/kernel',
              <tf.Variable 'pegasus/encoder/layer_1/attention/self/key/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('encoder/layer_1/attention/self/output/dense/kernel',
              <tf.Variable 'pegasus/encoder/layer_1/attention/output/dense/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('encoder/layer_1/attention/self/query/kernel',
              <tf.Variable 'pegasus/encoder/layer_1/attention/self/query/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('encoder/layer_1/attention/self/value/kernel',
              <tf.Variable 'pegasus/encoder/layer_1/attention/self/value/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('encoder/layer_1/ffn/LayerNorm/beta',
              <tf.Variable 'pegasus/encoder/layer_1/intermediate/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>),
             ('encoder/layer_1/ffn/LayerNorm/gamma',
              <tf.Variable 'pegasus/encoder/layer_1/intermediate/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>),
             ('encoder/layer_1/ffn/dense/bias',
              <tf.Variable 'pegasus/encoder/layer_1/intermediate/dense/bias:0' shape=(3072,) dtype=float32_ref>),
             ('encoder/layer_1/ffn/dense/kernel',
              <tf.Variable 'pegasus/encoder/layer_1/intermediate/dense/kernel:0' shape=(512, 3072) dtype=float32_ref>),
             ('encoder/layer_1/ffn/dense_1/bias',
              <tf.Variable 'pegasus/encoder/layer_1/output/dense/bias:0' shape=(512,) dtype=float32_ref>),
             ('encoder/layer_1/ffn/dense_1/kernel',
              <tf.Variable 'pegasus/encoder/layer_1/output/dense/kernel:0' shape=(3072, 512) dtype=float32_ref>),
             ('encoder/layer_2/attention/self/LayerNorm/beta',
              <tf.Variable 'pegasus/encoder/layer_2/attention/self/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>),
             ('encoder/layer_2/attention/self/LayerNorm/gamma',
              <tf.Variable 'pegasus/encoder/layer_2/attention/self/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>),
             ('encoder/layer_2/attention/self/key/kernel',
              <tf.Variable 'pegasus/encoder/layer_2/attention/self/key/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('encoder/layer_2/attention/self/output/dense/kernel',
              <tf.Variable 'pegasus/encoder/layer_2/attention/output/dense/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('encoder/layer_2/attention/self/query/kernel',
              <tf.Variable 'pegasus/encoder/layer_2/attention/self/query/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('encoder/layer_2/attention/self/value/kernel',
              <tf.Variable 'pegasus/encoder/layer_2/attention/self/value/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('encoder/layer_2/ffn/LayerNorm/beta',
              <tf.Variable 'pegasus/encoder/layer_2/intermediate/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>),
             ('encoder/layer_2/ffn/LayerNorm/gamma',
              <tf.Variable 'pegasus/encoder/layer_2/intermediate/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>),
             ('encoder/layer_2/ffn/dense/bias',
              <tf.Variable 'pegasus/encoder/layer_2/intermediate/dense/bias:0' shape=(3072,) dtype=float32_ref>),
             ('encoder/layer_2/ffn/dense/kernel',
              <tf.Variable 'pegasus/encoder/layer_2/intermediate/dense/kernel:0' shape=(512, 3072) dtype=float32_ref>),
             ('encoder/layer_2/ffn/dense_1/bias',
              <tf.Variable 'pegasus/encoder/layer_2/output/dense/bias:0' shape=(512,) dtype=float32_ref>),
             ('encoder/layer_2/ffn/dense_1/kernel',
              <tf.Variable 'pegasus/encoder/layer_2/output/dense/kernel:0' shape=(3072, 512) dtype=float32_ref>),
             ('encoder/layer_3/attention/self/LayerNorm/beta',
              <tf.Variable 'pegasus/encoder/layer_3/attention/self/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>),
             ('encoder/layer_3/attention/self/LayerNorm/gamma',
              <tf.Variable 'pegasus/encoder/layer_3/attention/self/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>),
             ('encoder/layer_3/attention/self/key/kernel',
              <tf.Variable 'pegasus/encoder/layer_3/attention/self/key/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('encoder/layer_3/attention/self/output/dense/kernel',
              <tf.Variable 'pegasus/encoder/layer_3/attention/output/dense/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('encoder/layer_3/attention/self/query/kernel',
              <tf.Variable 'pegasus/encoder/layer_3/attention/self/query/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('encoder/layer_3/attention/self/value/kernel',
              <tf.Variable 'pegasus/encoder/layer_3/attention/self/value/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('encoder/layer_3/ffn/LayerNorm/beta',
              <tf.Variable 'pegasus/encoder/layer_3/intermediate/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>),
             ('encoder/layer_3/ffn/LayerNorm/gamma',
              <tf.Variable 'pegasus/encoder/layer_3/intermediate/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>),
             ('encoder/layer_3/ffn/dense/bias',
              <tf.Variable 'pegasus/encoder/layer_3/intermediate/dense/bias:0' shape=(3072,) dtype=float32_ref>),
             ('encoder/layer_3/ffn/dense/kernel',
              <tf.Variable 'pegasus/encoder/layer_3/intermediate/dense/kernel:0' shape=(512, 3072) dtype=float32_ref>),
             ('encoder/layer_3/ffn/dense_1/bias',
              <tf.Variable 'pegasus/encoder/layer_3/output/dense/bias:0' shape=(512,) dtype=float32_ref>),
             ('encoder/layer_3/ffn/dense_1/kernel',
              <tf.Variable 'pegasus/encoder/layer_3/output/dense/kernel:0' shape=(3072, 512) dtype=float32_ref>),
             ('encoder/layer_4/attention/self/LayerNorm/beta',
              <tf.Variable 'pegasus/encoder/layer_4/attention/self/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>),
             ('encoder/layer_4/attention/self/LayerNorm/gamma',
              <tf.Variable 'pegasus/encoder/layer_4/attention/self/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>),
             ('encoder/layer_4/attention/self/key/kernel',
              <tf.Variable 'pegasus/encoder/layer_4/attention/self/key/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('encoder/layer_4/attention/self/output/dense/kernel',
              <tf.Variable 'pegasus/encoder/layer_4/attention/output/dense/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('encoder/layer_4/attention/self/query/kernel',
              <tf.Variable 'pegasus/encoder/layer_4/attention/self/query/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('encoder/layer_4/attention/self/value/kernel',
              <tf.Variable 'pegasus/encoder/layer_4/attention/self/value/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('encoder/layer_4/ffn/LayerNorm/beta',
              <tf.Variable 'pegasus/encoder/layer_4/intermediate/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>),
             ('encoder/layer_4/ffn/LayerNorm/gamma',
              <tf.Variable 'pegasus/encoder/layer_4/intermediate/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>),
             ('encoder/layer_4/ffn/dense/bias',
              <tf.Variable 'pegasus/encoder/layer_4/intermediate/dense/bias:0' shape=(3072,) dtype=float32_ref>),
             ('encoder/layer_4/ffn/dense/kernel',
              <tf.Variable 'pegasus/encoder/layer_4/intermediate/dense/kernel:0' shape=(512, 3072) dtype=float32_ref>),
             ('encoder/layer_4/ffn/dense_1/bias',
              <tf.Variable 'pegasus/encoder/layer_4/output/dense/bias:0' shape=(512,) dtype=float32_ref>),
             ('encoder/layer_4/ffn/dense_1/kernel',
              <tf.Variable 'pegasus/encoder/layer_4/output/dense/kernel:0' shape=(3072, 512) dtype=float32_ref>),
             ('encoder/layer_5/attention/self/LayerNorm/beta',
              <tf.Variable 'pegasus/encoder/layer_5/attention/self/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>),
             ('encoder/layer_5/attention/self/LayerNorm/gamma',
              <tf.Variable 'pegasus/encoder/layer_5/attention/self/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>),
             ('encoder/layer_5/attention/self/key/kernel',
              <tf.Variable 'pegasus/encoder/layer_5/attention/self/key/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('encoder/layer_5/attention/self/output/dense/kernel',
              <tf.Variable 'pegasus/encoder/layer_5/attention/output/dense/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('encoder/layer_5/attention/self/query/kernel',
              <tf.Variable 'pegasus/encoder/layer_5/attention/self/query/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('encoder/layer_5/attention/self/value/kernel',
              <tf.Variable 'pegasus/encoder/layer_5/attention/self/value/kernel:0' shape=(512, 512) dtype=float32_ref>),
             ('encoder/layer_5/ffn/LayerNorm/beta',
              <tf.Variable 'pegasus/encoder/layer_5/intermediate/LayerNorm/beta:0' shape=(512,) dtype=float32_ref>),
             ('encoder/layer_5/ffn/LayerNorm/gamma',
              <tf.Variable 'pegasus/encoder/layer_5/intermediate/LayerNorm/gamma:0' shape=(512,) dtype=float32_ref>),
             ('encoder/layer_5/ffn/dense/bias',
              <tf.Variable 'pegasus/encoder/layer_5/intermediate/dense/bias:0' shape=(3072,) dtype=float32_ref>),
             ('encoder/layer_5/ffn/dense/kernel',
              <tf.Variable 'pegasus/encoder/layer_5/intermediate/dense/kernel:0' shape=(512, 3072) dtype=float32_ref>),
             ('encoder/layer_5/ffn/dense_1/bias',
              <tf.Variable 'pegasus/encoder/layer_5/output/dense/bias:0' shape=(512,) dtype=float32_ref>),
             ('encoder/layer_5/ffn/dense_1/kernel',
              <tf.Variable 'pegasus/encoder/layer_5/output/dense/kernel:0' shape=(3072, 512) dtype=float32_ref>)])

My pegasus config, Copy pasted from https://github.com/google-research/bigbird/blob/master/bigbird/summarization/pegasus_large.sh

bert_config = {
    # transformer basic configs
    'attention_probs_dropout_prob': 0.1,
    'hidden_act': 'relu',
    'hidden_dropout_prob': 0.1,
    'hidden_size': 512,
    'initializer_range': 0.02,
    'intermediate_size': 3072,
    'max_position_embeddings': 4096,
    'max_encoder_length': 2048,
    'max_decoder_length': 512,
    'num_attention_heads': 8,
    'num_hidden_layers': 6,
    'type_vocab_size': 2,
    'scope': 'pegasus',
    'use_bias': False,
    'rescale_embedding': True,
    'vocab_model_file': None,
    # sparse mask configs
    'attention_type': 'block_sparse',
    'norm_type': 'prenorm',
    'block_size': 64,
    'num_rand_blocks': 3,
    'vocab_size': 32128,
    'beam_size': 1,
    'alpha': 0.0,
    'couple_encoder_decoder': False,
    'num_warmup_steps': 10000,
    'learning_rate': 0.1,
    'label_smoothing': 0.1,
    'optimizer': 'Adafactor',
    'use_tpu': True,
}

Not sure this is the correct one, finetuning is really slow, so any guide about variable mapping is really helpful.

@huseinzol05
Copy link
Author

@manzilz

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant