In [1]:
!pip freeze | grep tensorflow
!pip freeze | grep sentencepiece
!pip freeze | grep transformers

tensorflow==2.6.2
tensorflow-estimator==2.6.0
sentencepiece==0.1.96
transformers==4.15.0


In [2]:
import json
import pandas as pd
import numpy as np
import os
import sys
import tensorflow as tf
from time import time
import io
import re

import pickle
from csv import reader
import matplotlib.pyplot as plt

import matplotlib.pyplot as plt
from matplotlib import colors
from matplotlib.ticker import PercentFormatter

from tensorflow.keras import layers
from tensorflow.keras.backend import sparse_categorical_crossentropy
from tensorflow.keras.layers import Dense, Flatten

from datetime import datetime

from collections import defaultdict

import sentencepiece

In [3]:
from transformers import BertTokenizer, TFBertModel, T5Tokenizer, TFT5ForConditionalGeneration

# T5<a id="T5" />


Let us now lay the foundations for another useful model: **T5**. 

T5 is a pre-trained transformer-based text-to-text model introduced by C. Raffel et al in  ["Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer"](https://arxiv.org/pdf/1910.10683.pdf) , that is also available from Huggingface.  The idea is to view/rephrase tasks as 'text-to-text' problems:   

T5 has performed very well on a variety of tasks.

In this spirit, let us approach the NER classification discussed above in a completely different may: **as a translation problem**. This may certainly lead to less good results than the BERT model, as phrasing it as a translation problem is not very natural. But it is instructive nevertheless.

(**Note:** this is pretty cutting-edge as there is very little information available on fine-tuning of T5 with TensorFlow/Keras. So this notebook should be viewed as work in progress, and mistakes may be present.)

T5 is available in various sizes. Here, we use the small size with about 60m parameters.

### T5 as a Black Box

Let us first play with Huggingface's T5 model. We start with the T5ForConditionalGeneration model imported above to verify some pre-training claims. This model uses a source sentence AND the task as an input and then generates the output token by token.

Here are some examples:

In [4]:
t5_model = 't5-small'

t5_tokenizer = T5Tokenizer.from_pretrained(t5_model)
t5 = TFT5ForConditionalGeneration.from_pretrained(t5_model)

All model checkpoint layers were used when initializing TFT5ForConditionalGeneration.

All the layers of TFT5ForConditionalGeneration were initialized from the model checkpoint at t5-small.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFT5ForConditionalGeneration for predictions without further training.


In [5]:
df = pd.read_csv('cleaned_lyrics.csv')
df.head(2)

Unnamed: 0,artist,title,lyrics,description,release date,year
0,ray charles,hit the road jack,\nhit the road jack and doncha come back\nno m...,This tongue and cheek verbal duel of a couple ...,August 1961,1961
1,ray charles,georgia on my mind,\ngeorgia\ngeorgia\nthe whole day through\n(th...,Written by Hoagy Carmichael and Stuart Gorrell...,September 1960,1960


In [6]:
df = df[df['artist'] == 'the beatles'].reset_index()
df

Unnamed: 0,index,artist,title,lyrics,description,release date,year
0,189,the beatles,let it be,\nwhen i find myself in times of trouble mothe...,"One of The Beatles' many iconic ballads, writt...","May 8, 1970",1970
1,190,the beatles,yesterday,\nyesterday\nall my troubles seemed so far awa...,“Yesterday” is the most covered song in histor...,"September 13, 1965",1965
2,191,the beatles,come together,\nshoot me\nshoot me\nshoot me\nshoot me\n\nhe...,“Come Together” is the opening track to Abbey ...,"September 26, 1969",1969
3,192,the beatles,hey jude,\nhey jude dont make it bad\ntake a sad song a...,"Originally titled “Hey Jules,” named after Joh...","August 26, 1968",1968


In [7]:
text = df['lyrics'][3].replace('\n', ' ')


encoding = t5_tokenizer.encode("""summarize: """ + text, return_tensors='tf')

Token indices sequence length is longer than the specified maximum sequence length for this model (1181 > 512). Running this sequence through the model will result in indexing errors


In [8]:
encoding

<tf.Tensor: shape=(1, 1181), dtype=int32, numpy=array([[21603,    10,     3, ..., 14312,    15,     1]], dtype=int32)>

In [54]:
outputs = t5.generate(encoding,
                      num_beams=4, 
                      no_repeat_ngram_size=2,
                      min_length=30,
                      max_length=100,
                      early_stopping=True)

In [55]:
outputs

<tf.Tensor: shape=(1, 78), dtype=int32, numpy=
array([[    0,     3, 13133,     3, 14312,    15,  2483,   143,    34,
         1282,   240,     3,     9,  6819,  2324,   258,    25,    54,
          456,    12,   143,     8,  2324,   394,    78,   752,    34,
           91,    11,   752,   160,   139,    39,   842,     3,     5,
           25,   214,    24,    34,    31,     7,   131,    25,     6,
           25,   195,   103,     8,  2426,    25,   174,    19,    30,
           39,  8173,     6,     3,    29,     9,     9,     3,     7,
           29,    32,    32,    40,    11,     3,    32,   107, 17945,
        17945,     3,    31, 10070, 17945,    31]], dtype=int32)>

In [56]:
summarization = t5_tokenizer.decode(outputs[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)

In [57]:
summarization

"hey jude dont make it bad take a sad song then you can start to make the song better so let it out and let her into your heart . you know that it's just you, youll do the movement you need is on your shoulder, naa snool and oh yeah yeah 'yes yeah'"

In [58]:
print([t5_tokenizer.decode(g, skip_special_tokens=True, 
                           clean_up_tokenization_spaces=False) for g in outputs])

["hey jude dont make it bad take a sad song then you can start to make the song better so let it out and let her into your heart . you know that it's just you, youll do the movement you need is on your shoulder, naa snool and oh yeah yeah 'yes yeah'"]


In [59]:
text

' hey jude dont make it bad take a sad song and make it better remember to let her into your heart then you can start to make it better  hey jude dont be afraid you were made to go out and get her the minute you let her under your skin then you begin to make it better  and anytime you feel the pain hey jude refrain dont carry the world upon your shoulders for well you know that its a fool who plays it cool by making his world a little colder na na na na na na na na na na  hey jude dont let me down you have found her now go and get her (let it out and let it in) remember (hey jude) to let her into your heart then you can start to make it better so let it out and let it in hey jude begin youre waiting for someone to perform with and dont you know that its just you hey jude youll do the movement you need is on your shoulder na na na na na na na na na yeah  hey jude dont make it bad take a sad song and make it better remember to let her under your skin then youll begin to make it (whoa fuc

In [60]:
summarization

"hey jude dont make it bad take a sad song then you can start to make the song better so let it out and let her into your heart . you know that it's just you, youll do the movement you need is on your shoulder, naa snool and oh yeah yeah 'yes yeah'"

## Evaluation Attempt

In [8]:
pip install bert_score -q

You should consider upgrading via the '/home/ccal0507/anaconda3/bin/python -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [9]:
import bert_score
bert_score.__version__

'0.3.11'

In [10]:
from bert_score import score

In [32]:
text

' when i find myself in times of trouble mother mary comes to me speaking words of wisdom "let it be" and in my hour of darkness she is standing right in front of me speaking words of wisdom "let it be"  let it be let it be let it be let it be whisper words of wisdom let it be  and when the brokenhearted people living in the world agree there will be an answer let it be for though they may be parted there is still a chance that they will see there will be an answer let it be  let it be let it be let it be let it be yeah there will be an answer let it be let it be let it be let it be let it be whisper words of wisdom let it be  let it be let it be let it be yeah let it be whisper words of wisdom let it be and when the night is cloudy there is still a light that shines on me shine on til tomorrow let it be i wake up to the sound of music mother mary comes to me speaking words of wisdom "let it be"  let it be let it be let it be yeah let it be oh there will be an answer let it be let it b

In [33]:
summarization

'mother mary comes to me speaking words of wisdom "let it be" let the answer be let it go yeah . when the brokenhearted people may be parted there will be a answer for if they will see there is still an answer letting it get letit be whisper words oh i wake up to the sound of music mother she is standing right in front of me talking words about wisdom \'let the be\''

In [39]:
score(summarization, text)

AssertionError: Different number of candidates and references

In [2]:
#!pip install transformers==2.2.0
!pip install bert-extractive-summarizer
#!pip install spacy==2.0.12

Collecting bert-extractive-summarizer
  Downloading bert_extractive_summarizer-0.10.1-py3-none-any.whl (25 kB)
Installing collected packages: bert-extractive-summarizer
Successfully installed bert-extractive-summarizer-0.10.1
You should consider upgrading via the '/home/ccal0507/anaconda3/bin/python -m pip install --upgrade pip' command.[0m


In [3]:
from summarizer import Summarizer,TransformerSummarizer

In [41]:
text

"One of The Beatles' many iconic ballads, written by McCartney. The level of repetition in the lyrics suggests this song was written quickly and through emotional inspiration — similar patterns can be seen in songs like “Hey Jude”."

In [32]:
bert_model = Summarizer()
bert_summary = ''.join(bert_model(text, min_length=60))

Some weights of the model checkpoint at bert-large-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f

In [33]:
print(bert_summary)

One of The Beatles' many iconic ballads, written by McCartney. The level of repetition in the lyrics suggests this song was written quickly and through emotional inspiration — similar patterns can be seen in songs like “Hey Jude”.


In [34]:
text

"One of The Beatles' many iconic ballads, written by McCartney. The level of repetition in the lyrics suggests this song was written quickly and through emotional inspiration — similar patterns can be seen in songs like “Hey Jude”."

# BERT

In [6]:
pip install pytorch_transformers

Collecting pytorch_transformers
  Downloading pytorch_transformers-1.2.0-py3-none-any.whl (176 kB)
     |████████████████████████████████| 176 kB 7.6 MB/s            
Collecting boto3
  Downloading boto3-1.21.27-py3-none-any.whl (132 kB)
     |████████████████████████████████| 132 kB 46.3 MB/s            
Collecting s3transfer<0.6.0,>=0.5.0
  Downloading s3transfer-0.5.2-py3-none-any.whl (79 kB)
     |████████████████████████████████| 79 kB 966 kB/s             
[?25hCollecting jmespath<2.0.0,>=0.7.1
  Downloading jmespath-1.0.0-py3-none-any.whl (23 kB)
Collecting botocore<1.25.0,>=1.24.27
  Downloading botocore-1.24.27-py3-none-any.whl (8.6 MB)
     |████████████████████████████████| 8.6 MB 48.1 MB/s            
Installing collected packages: jmespath, botocore, s3transfer, boto3, pytorch-transformers
Successfully installed boto3-1.21.27 botocore-1.24.27 jmespath-1.0.0 pytorch-transformers-1.2.0 s3transfer-0.5.2
You should consider upgrading via the '/home/ccal0507/anaconda3/bin/pyth

In [7]:
import copy

import torch
import torch.nn as nn
from pytorch_transformers import BertModel, BertConfig
from torch.nn.init import xavier_uniform_

from models.decoder import TransformerDecoder
from models.encoder import Classifier, ExtTransformerEncoder
from models.optimizers import Optimizer

ModuleNotFoundError: No module named 'models'

In [1]:
class Bert(nn.Module):
    def __init__(self, large, temp_dir, finetune=False):
        super(Bert, self).__init__()
        if(large):
            self.model = BertModel.from_pretrained('bert-large-uncased', cache_dir=temp_dir)
        else:
            self.model = BertModel.from_pretrained('bert-base-uncased', cache_dir=temp_dir)

        self.finetune = finetune

    def forward(self, x, segs, mask):
        if(self.finetune):
            top_vec, _ = self.model(x, segs, attention_mask=mask)
        else:
            self.eval()
            with torch.no_grad():
                top_vec, _ = self.model(x, segs, attention_mask=mask)
        return top_vec


class ExtSummarizer(nn.Module):
    def __init__(self, args, device, checkpoint):
        super(ExtSummarizer, self).__init__()
        self.args = args
        self.device = device
        self.bert = Bert(args.large, args.temp_dir, args.finetune_bert)

        self.ext_layer = ExtTransformerEncoder(self.bert.model.config.hidden_size, args.ext_ff_size, args.ext_heads,
                                               args.ext_dropout, args.ext_layers)
        if (args.encoder == 'baseline'):
            bert_config = BertConfig(self.bert.model.config.vocab_size, hidden_size=args.ext_hidden_size,
                                     num_hidden_layers=args.ext_layers, num_attention_heads=args.ext_heads, intermediate_size=args.ext_ff_size)
            self.bert.model = BertModel(bert_config)
            self.ext_layer = Classifier(self.bert.model.config.hidden_size)

        if(args.max_pos>512):
            my_pos_embeddings = nn.Embedding(args.max_pos, self.bert.model.config.hidden_size)
            my_pos_embeddings.weight.data[:512] = self.bert.model.embeddings.position_embeddings.weight.data
            my_pos_embeddings.weight.data[512:] = self.bert.model.embeddings.position_embeddings.weight.data[-1][None,:].repeat(args.max_pos-512,1)
            self.bert.model.embeddings.position_embeddings = my_pos_embeddings


        if checkpoint is not None:
            self.load_state_dict(checkpoint['model'], strict=True)
        else:
            if args.param_init != 0.0:
                for p in self.ext_layer.parameters():
                    p.data.uniform_(-args.param_init, args.param_init)
            if args.param_init_glorot:
                for p in self.ext_layer.parameters():
                    if p.dim() > 1:
                        xavier_uniform_(p)

        self.to(device)

    def forward(self, src, segs, clss, mask_src, mask_cls):
        top_vec = self.bert(src, segs, mask_src)
        sents_vec = top_vec[torch.arange(top_vec.size(0)).unsqueeze(1), clss]
        sents_vec = sents_vec * mask_cls[:, :, None].float()
        sent_scores = self.ext_layer(sents_vec, mask_cls).squeeze(-1)
        return sent_scores, mask_cls


class AbsSummarizer(nn.Module):
    def __init__(self, args, device, checkpoint=None, bert_from_extractive=None):
        super(AbsSummarizer, self).__init__()
        self.args = args
        self.device = device
        self.bert = Bert(args.large, args.temp_dir, args.finetune_bert)

        if bert_from_extractive is not None:
            self.bert.model.load_state_dict(
                dict([(n[11:], p) for n, p in bert_from_extractive.items() if n.startswith('bert.model')]), strict=True)

        if (args.encoder == 'baseline'):
            bert_config = BertConfig(self.bert.model.config.vocab_size, hidden_size=args.enc_hidden_size,
                                     num_hidden_layers=args.enc_layers, num_attention_heads=8,
                                     intermediate_size=args.enc_ff_size,
                                     hidden_dropout_prob=args.enc_dropout,
                                     attention_probs_dropout_prob=args.enc_dropout)
            self.bert.model = BertModel(bert_config)

        if(args.max_pos>512):
            my_pos_embeddings = nn.Embedding(args.max_pos, self.bert.model.config.hidden_size)
            my_pos_embeddings.weight.data[:512] = self.bert.model.embeddings.position_embeddings.weight.data
            my_pos_embeddings.weight.data[512:] = self.bert.model.embeddings.position_embeddings.weight.data[-1][None,:].repeat(args.max_pos-512,1)
            self.bert.model.embeddings.position_embeddings = my_pos_embeddings
        self.vocab_size = self.bert.model.config.vocab_size
        tgt_embeddings = nn.Embedding(self.vocab_size, self.bert.model.config.hidden_size, padding_idx=0)
        if (self.args.share_emb):
            tgt_embeddings.weight = copy.deepcopy(self.bert.model.embeddings.word_embeddings.weight)

        self.decoder = TransformerDecoder(
            self.args.dec_layers,
            self.args.dec_hidden_size, heads=self.args.dec_heads,
            d_ff=self.args.dec_ff_size, dropout=self.args.dec_dropout, embeddings=tgt_embeddings)

        self.generator = get_generator(self.vocab_size, self.args.dec_hidden_size, device)
        self.generator[0].weight = self.decoder.embeddings.weight


        if checkpoint is not None:
            self.load_state_dict(checkpoint['model'], strict=True)
        else:
            for module in self.decoder.modules():
                if isinstance(module, (nn.Linear, nn.Embedding)):
                    module.weight.data.normal_(mean=0.0, std=0.02)
                elif isinstance(module, nn.LayerNorm):
                    module.bias.data.zero_()
                    module.weight.data.fill_(1.0)
                if isinstance(module, nn.Linear) and module.bias is not None:
                    module.bias.data.zero_()
            for p in self.generator.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)
                else:
                    p.data.zero_()
            if(args.use_bert_emb):
                tgt_embeddings = nn.Embedding(self.vocab_size, self.bert.model.config.hidden_size, padding_idx=0)
                tgt_embeddings.weight = copy.deepcopy(self.bert.model.embeddings.word_embeddings.weight)
                self.decoder.embeddings = tgt_embeddings
                self.generator[0].weight = self.decoder.embeddings.weight

        self.to(device)

NameError: name 'nn' is not defined