In [20]:
import torch
import torch.nn as nn
from torch.quantization import quantize_dynamic
from transformers import AutoTokenizer
from transformers import AutoModelForSeq2SeqLM
from transformers import AutoConfig
from torchinfo import summary

import performance_test

In [21]:
# checkpoint = 'kobart-summarization-finetuned-paper-sample-size-1000/checkpoint-1000'
# checkpoint = 'gogamza/kobart-summarization'
checkpoint = 'encoder_decoder_pruned_last_3'
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
config = AutoConfig.from_pretrained(checkpoint)

Some weights of BartForConditionalGeneration were not initialized from the model checkpoint at encoder_decoder_pruned_last_3 and are newly initialized: ['model.decoder.layers.5.fc1.bias', 'model.encoder.layers.3.self_attn.v_proj.bias', 'model.encoder.layers.4.final_layer_norm.weight', 'model.decoder.layers.3.final_layer_norm.bias', 'model.decoder.layers.4.encoder_attn.k_proj.weight', 'model.encoder.layers.4.final_layer_norm.bias', 'model.decoder.layers.3.self_attn.k_proj.weight', 'model.encoder.layers.4.fc1.weight', 'model.decoder.layers.4.encoder_attn.out_proj.weight', 'model.decoder.layers.3.encoder_attn.out_proj.weight', 'model.decoder.layers.3.fc2.weight', 'model.encoder.layers.3.self_attn.v_proj.weight', 'model.decoder.layers.4.encoder_attn_layer_norm.weight', 'model.decoder.layers.4.encoder_attn.v_proj.bias', 'model.decoder.layers.5.final_layer_norm.bias', 'model.decoder.layers.3.self_attn.v_proj.bias', 'model.encoder.layers.4.self_attn.k_proj.weight', 'model.decoder.layers.5.enc

In [17]:
summary(model)

Layer (type:depth-idx)                                  Param #
BartForConditionalGeneration                            --
├─BartModel: 1-1                                        --
│    └─Embedding: 2-1                                   23,040,000
│    └─BartEncoder: 2-2                                 --
│    │    └─Embedding: 3-1                              (recursive)
│    │    └─BartLearnedPositionalEmbedding: 3-2         789,504
│    │    └─ModuleList: 3-3                             42,527,232
│    │    └─LayerNorm: 3-4                              1,536
│    └─BartDecoder: 2-3                                 --
│    │    └─Embedding: 3-5                              (recursive)
│    │    └─BartLearnedPositionalEmbedding: 3-6         789,504
│    │    └─ModuleList: 3-7                             56,710,656
│    │    └─LayerNorm: 3-8                              1,536
├─Linear: 1-2                                           23,040,000
Total params: 146,899,968
Trainable params: 

In [18]:
model

BartForConditionalGeneration(
  (model): BartModel(
    (shared): Embedding(30000, 768, padding_idx=3)
    (encoder): BartEncoder(
      (embed_tokens): Embedding(30000, 768, padding_idx=3)
      (embed_positions): BartLearnedPositionalEmbedding(1028, 768)
      (layers): ModuleList(
        (0): BartEncoderLayer(
          (self_attn): BartAttention(
            (k_proj): Linear(in_features=768, out_features=768, bias=True)
            (v_proj): Linear(in_features=768, out_features=768, bias=True)
            (q_proj): Linear(in_features=768, out_features=768, bias=True)
            (out_proj): Linear(in_features=768, out_features=768, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (fc2): Linear(in_features=3072, out_features=768, bias=True)
          (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        )
        

In [6]:
nn.Sequential(*list(model.model.encoder.layers.children())[:-3])
nn.Sequential(*list(model.model.decoder.layers.children())[:-3])

Sequential(
  (0): BartDecoderLayer(
    (self_attn): BartAttention(
      (k_proj): Linear(in_features=768, out_features=768, bias=True)
      (v_proj): Linear(in_features=768, out_features=768, bias=True)
      (q_proj): Linear(in_features=768, out_features=768, bias=True)
      (out_proj): Linear(in_features=768, out_features=768, bias=True)
    )
    (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    (encoder_attn): BartAttention(
      (k_proj): Linear(in_features=768, out_features=768, bias=True)
      (v_proj): Linear(in_features=768, out_features=768, bias=True)
      (q_proj): Linear(in_features=768, out_features=768, bias=True)
      (out_proj): Linear(in_features=768, out_features=768, bias=True)
    )
    (encoder_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    (fc1): Linear(in_features=768, out_features=3072, bias=True)
    (fc2): Linear(in_features=3072, out_features=768, bias=True)
    (final_layer_norm): LayerNo

# Structured Pruning

In [22]:
model.model.encoder.layers = nn.Sequential(*list(model.model.encoder.layers.children())[:-3])
model.model.decoder.layers = nn.Sequential(*list(model.model.decoder.layers.children())[:-3])

In [23]:
model

BartForConditionalGeneration(
  (model): BartModel(
    (shared): Embedding(30000, 768, padding_idx=3)
    (encoder): BartEncoder(
      (embed_tokens): Embedding(30000, 768, padding_idx=3)
      (embed_positions): BartLearnedPositionalEmbedding(1028, 768)
      (layers): Sequential(
        (0): BartEncoderLayer(
          (self_attn): BartAttention(
            (k_proj): Linear(in_features=768, out_features=768, bias=True)
            (v_proj): Linear(in_features=768, out_features=768, bias=True)
            (q_proj): Linear(in_features=768, out_features=768, bias=True)
            (out_proj): Linear(in_features=768, out_features=768, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (fc2): Linear(in_features=3072, out_features=768, bias=True)
          (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        )
        

In [25]:
config.encoder_layers=3
config.decoder_layers=3

# Saving the pruned model

In [26]:
model.save_pretrained('encoder_decoder_pruned_last_3')



In [27]:
tokenizer.save_pretrained('encoder_decoder_pruned_last_3')

('encoder_decoder_pruned_last_3/tokenizer_config.json',
 'encoder_decoder_pruned_last_3/special_tokens_map.json',
 'encoder_decoder_pruned_last_3/vocab.json',
 'encoder_decoder_pruned_last_3/merges.txt',
 'encoder_decoder_pruned_last_3/added_tokens.json',
 'encoder_decoder_pruned_last_3/tokenizer.json')

In [28]:
config.save_pretrained('encoder_decoder_pruned_last_3')



In [12]:
performance_test.performance_test(model=model, cpu_flag=False)

Reusing dataset paper_summarization (/opt/ml/.cache/huggingface/datasets/metamong1___paper_summarization/Paper Summarization/1.4.0/24bb09528ebb04fdc6aafb6e110202e52fbb818c0f204839bc833d8ce1e86a5f)


  0%|          | 0/2 [00:00<?, ?it/s]

Loading cached shuffled indices for dataset at /opt/ml/.cache/huggingface/datasets/metamong1___paper_summarization/Paper Summarization/1.4.0/24bb09528ebb04fdc6aafb6e110202e52fbb818c0f204839bc833d8ce1e86a5f/cache-c257b57580cd482c.arrow


  0%|          | 0/19 [00:00<?, ?ba/s]

====ROUGE score====
{'rouge1': 0.0753, 'rouge2': 0.0, 'rougeL': 0.0753, 'rougeLsum': 0.0753}
Model size (MB) = 283.36870288848877




Average time took(ms) 7.1e+01 +\- 2.38


In [16]:
performance_test.performance_test(model=model, cpu_flag=False)

Reusing dataset paper_summarization (/opt/ml/.cache/huggingface/datasets/metamong1___paper_summarization/Paper Summarization/2.2.0/46d835d4e22daa3a5a46d13de39e3d75f6c2eaef5ead153d48cbe8d7cd3bec9c)


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/19 [00:00<?, ?ba/s]

====ROUGE score====
{'rouge1': 0.0909, 'rouge2': 0.0, 'rougeL': 0.0909, 'rougeLsum': 0.0909}
Model size (MB) = 436.6297426223755
Average time took(ms) 1e+02 +\- 0.64
