# Model builder

This notebook serves to build and test the models for song interpretation. We store and load them from the Hugging Face Hub repository. Note that building the models requires a considerable amount of resources.

# Imports and installs

Installs and imports the necessary libraries and packages. Also mounts the google drive and logs into hugging face.

In [None]:
! pip install -U transformers
!pip install transformers[sentencepiece]
!pip install -U huggingface_hub
!pip install tokenizers
! pip install -U accelerate
!pip install datasets
!pip install nltk
!pip install bert-score
!pip install rouge

Collecting transformers
  Downloading transformers-4.34.1-py3-none-any.whl (7.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.7/7.7 MB[0m [31m22.3 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.16.4 (from transformers)
  Downloading huggingface_hub-0.18.0-py3-none-any.whl (301 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.0/302.0 kB[0m [31m30.2 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<0.15,>=0.14 (from transformers)
  Downloading tokenizers-0.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.8/3.8 MB[0m [31m57.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m55.0 MB/s[0m eta [36m0:00:00[0m
Col

In [None]:
# general
import pickle
import numpy as np

# dataset
!pip install ftfy
!pip install datasets

from datasets import Dataset, DatasetDict
from ftfy import fix_text
import pandas as pd
import json
from sklearn.model_selection import train_test_split
import re

from transformers import AutoTokenizer

# metrics
from rouge import Rouge
from bert_score import score

# model
import nltk
nltk.download("punkt")
nltk.download('wordnet')
from nltk import sent_tokenize, word_tokenize
from transformers import TFAutoModelForSeq2SeqLM
from transformers import create_optimizer
import tensorflow as tf
from transformers import DataCollatorForSeq2Seq
from transformers import Seq2SeqTrainingArguments
from transformers import Seq2SeqTrainer
from tqdm import tqdm
import numpy as np

Collecting ftfy
  Downloading ftfy-6.1.1-py3-none-any.whl (53 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/53.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.1/53.1 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: ftfy
Successfully installed ftfy-6.1.1


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Please copy and paste the commented token (and uncheck "Add token as git credential?"). This allows us to save the models at each epoch to our Hugging Face repo.

In [None]:
from huggingface_hub import notebook_login
notebook_login()

# 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

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

# Dataset functions

Utility functions for loading, saving and pre-processing the dataset. When building a model, you can use the slider to select the number of rows (song-lyric pairs) of the dataset to keep.

In [None]:
songs_to_load = 3000 # @param {type:"slider", min:500, max:3000, step:500}

def load_clean_dataset(load_path):
  with open(load_path, "rb") as f:
    dd = pickle.load(f)
  print(dd)
  print("=======================\nLoaded DATASET\n=======================\n")
  return dd

def save_dataset(save_path, dataset):
  with open(save_path, "wb") as f:
    pickle.dump(dataset, f)

# comments end with stuff like "xxletsxrockon January 17, 2007\xa0\xa0\xa0" - a username and date
# the username appears to be concatenated with "on" [January 17]
# So we just find the last word that ends with "on" and delete it and everything after that

def clean_comment(comment):

  date_pattern = r'\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s\d{1,2},\s\d{4}\b'
  matches = re.findall(date_pattern, comment)

  if not matches:
    return comment
  else:
    arr_comment = comment.split()
    try:
      while not arr_comment.pop().endswith("on"): pass
    except:
      return comment
    return " ".join(arr_comment)


def load_unclean_dataset(load_file_path, save_file_path):

  # load json file
  with open(load_file_path) as f:
    text = f.read()

  # clean text / fix encoding errors
  fixed_text = fix_text(text)
  data = json.loads(fixed_text)

  # convert to pandas dataframe to easily clean comments
  df = pd.DataFrame.from_dict(pd.json_normalize(data), orient='columns')

  # clean comments
  df['comment'] = df['comment'].apply(lambda x: clean_comment(x))

  # remove long entries
  df = df[df['lyrics'].map(len) < 2048]

  # Limit df as dataset is way to big
  df = df.sample(n=songs_to_load, random_state=42)

  # remove useless columns
  df = df.drop(['music4all_id', 'songmeanings_id'], axis=1)

  # Create a Hugging Face Dataset from the pandas DataFrame
  dataset = Dataset.from_pandas(df)

  # split into sets
  train, val = train_test_split(df, test_size=0.1, random_state=42, shuffle=True)

  # create dataset
  dataset_train = Dataset.from_dict(train)
  dataset_validation = Dataset.from_dict(val)
  dataset_dict = DatasetDict({"train": dataset_train, "validation":dataset_validation})

  with open(save_file_path, "wb") as f: pickle.dump(dataset_dict, f)
  print(dataset_dict)
  print(dataset_dict['train']['comment'][3])
  print("=======================\nLoaded DATASET\n=======================\n")
  return dataset_dict

# Tokenizer class

A specialized tokeniser class is used to simplify interacting with a pre-trained tokenizer.

In [None]:
class TokenizerManager:
    def __init__(self, name):
        self.tokenizer = AutoTokenizer.from_pretrained(name)
        print("=======================\nLoaded TOKENIZER\n=======================\n")

    def get_tokenizer(self):
        return self.tokenizer

    def preprocess_function(self, dataset):
        model_inputs = self.tokenizer(dataset["lyrics"], max_length=2048, truncation=True)
        labels = self.tokenizer(dataset["comment"], max_length=2048, truncation=True)
        model_inputs["labels"] = labels["input_ids"]
        return model_inputs

    def tokenize_dataset(self, dataset):
        tokenized_datasets = dataset.map(self.preprocess_function, batched=True)
        print(tokenized_datasets)
        print("=======================\nTokenized DATASET\n=======================\n")
        return tokenized_datasets

    def tokenize_sentence(self, sen):
        inputs = self.tokenizer(sen)
        print("Inputs to the model would be : ", inputs)
        print("Converting input_ids back into words : ", self.tokenizer.convert_ids_to_tokens(inputs.input_ids))

# Model functions

Utility functions for loading, training and testing models.

In [None]:
def load_pretrained_model(name):
  model = TFAutoModelForSeq2SeqLM.from_pretrained(name)
  return model

# Define a function for learning rate schedule
def scheduler(epoch, lr):
  if epoch > 6:
        return 0.000006  # Reduce learning rate to 6x10^(-5) from the 11th epoch
  return 0.00006  # Initial learning rate of 6x10^(-4)


# XLA applies various optimizations to the model’s computation graph,
# and results in significant improvements to speed and memory usage
@tf.function(jit_compile=True)
def generate_with_xla(batch):
    return my_model.generate(
        input_ids=batch["input_ids"],
        attention_mask=batch["attention_mask"],
        max_new_tokens = 60
    )

def generate_without_xla(batch):
    return my_model.generate(
        input_ids=batch["input_ids"],
        attention_mask=batch["attention_mask"],
        max_new_tokens = 60
    )

def train_model(model, tokenizer, tokenized_datasets, hf_save):

  data_collator = DataCollatorForSeq2Seq(tokenizer, model=model, return_tensors="tf")

  # Remove the text-based columns becuase our model only deals with the input ids
  tokenized_datasets = tokenized_datasets.remove_columns(["lyrics", "comment"])
  print(tokenized_datasets)

  # Just a bit of a test to see what the data collector outputs
  features = [tokenized_datasets["train"][i] for i in range(2)]
  print(features)
  print(data_collator(features))

  # Convert datasets using data collactor
  tf_train_dataset = model.prepare_tf_dataset(
    tokenized_datasets["train"],
    collate_fn=data_collator,
    shuffle=True,
    batch_size=2,
    )

  tf_eval_dataset = model.prepare_tf_dataset(
    tokenized_datasets["validation"],
    collate_fn=data_collator,
    shuffle=False,
    batch_size=2,
  )

  # The number of training steps is the number of samples in the dataset, divided by the batch size then multiplied
  # by the total number of epochs. Note that the tf_train_dataset here is a batched tf.data.Dataset,
  # not the original Hugging Face Dataset, so its len() is already num_samples // batch_size.
  num_train_epochs = 10
  #num_train_steps = len(tf_train_dataset) * num_train_epochs
  opt = tf.keras.optimizers.Adafactor(
    learning_rate=0.00006,
    jit_compile=True,
    name="Adafactor",
  )

  model.compile(optimizer=opt)

  # Add learning rate scheduler
  initial_learning_rate = 6e-4
  lr_callback = tf.keras.callbacks.LearningRateScheduler(scheduler, verbose=1)

  # Train in mixed-precision float16
  tf.keras.mixed_precision.set_global_policy("mixed_float16")

  # Save in hugging face repo
  from transformers.keras_callbacks import PushToHubCallback
  hf_callback = PushToHubCallback(output_dir=hf_save, tokenizer=tokenizer)
  # use callbacks=[callback] when good internet and can upload

  # Train the model
  model.fit(
    tf_train_dataset,
    validation_data=tf_eval_dataset,
    callbacks=[hf_callback, lr_callback],
    epochs=num_train_epochs
  )


def test_model(model,  tokenizer, tokenized_datasets):

  output = []

  generation_data_collator = DataCollatorForSeq2Seq(
      tokenizer, model=model, return_tensors="tf", pad_to_multiple_of=320
      )

  tf_generate_dataset = model.prepare_tf_dataset(
      tokenized_datasets["test"],
      collate_fn=generation_data_collator,
      shuffle=False,
      batch_size=2,
      drop_remainder=True,
  )

  all_preds = []
  all_labels = []
  for batch, labels in tqdm(tf_generate_dataset):
      try:
        predictions = generate_with_xla(batch)
      except:
        predictions = generate_without_xla(batch)
      decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
      labels = labels.numpy()
      labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
      decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
      decoded_preds = ["\n".join(sent_tokenize(pred.strip())) for pred in decoded_preds]
      decoded_labels = ["\n".join(sent_tokenize(label.strip())) for label in decoded_labels]
      all_preds.extend(decoded_preds)
      all_labels.extend(decoded_labels)

  rouge = Rouge()
  rouge_result = rouge.get_scores(decoded_preds, decoded_labels, avg=True)
  output.append(rouge_result)

  meteor_result = 0
  for pred, ref in zip(decoded_preds, decoded_labels):
    meteor_result += nltk.translate.meteor_score.meteor_score([word_tokenize(pred)], word_tokenize(ref))
  meteor_result = meteor_result / len(decoded_labels)
  output.append(meteor_result)

  P, R, F1 = score(decoded_preds, decoded_labels, lang="en", verbose=True)
  output.append([P,R,F1])
  return output

# Model builder

This process is meant to be as smooth as possible:
  - Load either an unclean or clean dataset (the latter requires no pre-processing)
  - The model checkpoint is the pre-trained model to load from the Hugging Face Hub (you can load any from https://huggingface.co/models - by simply pasting their name). We have only used "facebook/bart-large-cnn" and "google/pegasus-large"
  - If you have logged into the Hugging Face account, the "hf-directory" will be the model's name. This can be used to easily load it later (either for testing or directly to interpret songs)


In [None]:
# @title Enter parameters
load_clean = False # @param {type:"boolean"}
dataset_load_path = "/content/drive/My Drive/Comp 700/datasets/unclean/dataset_full_256.json" # @param {type:"string"}
dataset_save_path = "/content/drive/My Drive/Comp 700/datasets/clean/dataset_dict.pkl" # @param {type:"string"}

load_saved_model = True # @param {type:"boolean"}
model_checkpoint = "facebook/bart-large-cnn" # @param {type:"string"}
hf_directory = "fblc-3000" # @param {type:"string"}


if not load_clean:
  dataset = load_unclean_dataset(dataset_load_path, dataset_save_path)
else:
  dataset = load_clean_dataset(dataset_load_path)

my_tk = TokenizerManager(model_checkpoint)
my_tk_dataset = my_tk.tokenize_dataset(dataset)

if not load_saved_model:
  my_model = load_pretrained_model(model_checkpoint)
  train_model(my_model, my_tk.get_tokenizer(), my_tk_dataset, hf_directory)
else:
  my_model = TFAutoModelForSeq2SeqLM.from_pretrained("s-man2099/" + hf_directory)


DatasetDict({
    train: Dataset({
        features: ['lyrics', 'comment'],
        num_rows: 2700
    })
    validation: Dataset({
        features: ['lyrics', 'comment'],
        num_rows: 300
    })
})
Garden State soundtrack. I bought it for The Shins and Iron and Wine, not exactly expecting such an incredible song. You know how in the movie, before giving him the Shins song, she says it will change his life? This song changed my life. Beauty changed my life.
Loaded DATASET



Downloading (…)lve/main/config.json:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Loaded TOKENIZER



Map:   0%|          | 0/2700 [00:00<?, ? examples/s]

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['lyrics', 'comment', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 2700
    })
    validation: Dataset({
        features: ['lyrics', 'comment', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 300
    })
})
Tokenized DATASET



Downloading (…)lve/main/config.json:   0%|          | 0.00/1.63k [00:00<?, ?B/s]

Downloading tf_model.h5:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

All model checkpoint layers were used when initializing TFBartForConditionalGeneration.

All the layers of TFBartForConditionalGeneration were initialized from the model checkpoint at s-man2099/fblc-3000.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBartForConditionalGeneration for predictions without further training.


Downloading (…)neration_config.json:   0%|          | 0.00/358 [00:00<?, ?B/s]

# Model tester

Utility function fot loading an unclean test set.

In [None]:
def load_unclean_test_dataset(load_file_path, save_file_path):
  # load json file
  with open(load_file_path) as f:
    text = f.read()

  # clean text / fix encoding errors
  fixed_text = fix_text(text)
  data = json.loads(fixed_text)

  # convert to pandas dataframe to easily clean comments
  df = pd.DataFrame.from_dict(pd.json_normalize(data), orient='columns')

  # Limit df cos dataset is way to big
  num_rows_to_keep = 100  # or if %, use: int(0.01 * len(df))
  df = df.sample(n=num_rows_to_keep, random_state=42)

  # remove useless columns
  df = df.drop(['music4all_id', 'songmeanings_id'], axis=1)

  # Create a Hugging Face Dataset from the pandas DataFrame
  dataset = Dataset.from_pandas(df)
  dataset = dataset.remove_columns(["__index_level_0__"])

  # create dataset
  dataset_dict = DatasetDict({"test": dataset})

  with open(save_file_path, "wb") as f: pickle.dump(dataset_dict, f)
  print(dataset_dict)
  print("=======================\nLoaded DATASET\n=======================\n")
  return dataset_dict

The following is used to load a test set (either clean or unclean) and to test the model.

In [None]:

load_clean_test = True # @param {type:"boolean"}
test_dataset_load_path = "/content/drive/My Drive/Comp 700/datasets/clean/dataset_test.pkl"# @param {type:"string"}
test_dataset_save_path = ""# @param {type:"string"}

if load_clean_test:
  test_dataset = load_clean_dataset(test_dataset_load_path)
else:
  test_dataset = load_unclean_test_dataset(test_dataset_load_path, test_dataset_save_path)

test_tk_dataset = my_tk.tokenize_dataset(test_dataset)

results = test_model(my_model, my_tk.get_tokenizer(), test_tk_dataset)

DatasetDict({
    test: Dataset({
        features: ['lyrics', 'comment'],
        num_rows: 100
    })
})
Loaded DATASET



Map:   0%|          | 0/100 [00:00<?, ? examples/s]

You're using a BartTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


DatasetDict({
    test: Dataset({
        features: ['lyrics', 'comment', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 100
    })
})
Tokenized DATASET



100%|██████████| 50/50 [31:01<00:00, 37.23s/it]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/1 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/1 [00:00<?, ?it/s]

done in 0.09 seconds, 22.79 sentences/sec


Run the following to view the results.

In [None]:
results

[{'rouge-1': {'r': 0.17743764172335602,
   'p': 0.4225563909774436,
   'f': 0.24900486099130287},
  'rouge-2': {'r': 0.04455128205128205,
   'p': 0.11736111111111111,
   'f': 0.06452380557278939},
  'rouge-l': {'r': 0.17126480221718315,
   'p': 0.40939849624060154,
   'f': 0.24060149964676505}},
 0.28509573388607945,
 [tensor([0.8596, 0.8433]),
  tensor([0.8255, 0.8194]),
  tensor([0.8422, 0.8312])]]

# Model train times and captured results

The following are simply text block for storing the results and loading times of the models.

# google/pegasus-large

<hr>
Loading the whole dataset:
Epoch 1: LearningRateScheduler setting learning rate to 6e-05.
Epoch 1/20
   156/125536 [..............................] - ETA: 22:50:30 - loss: 4.2857
<hr>

Loading half the dataset:
Epoch 1: LearningRateScheduler setting learning rate to 6e-05.
Epoch 1/20
   13/62788 [..............................] - ETA: 12:49:32 - loss: 4.5145
<hr>
Loading 40%
Epoch 1: LearningRateScheduler setting learning rate to 6e-05.
Epoch 1/20
   32/50233 [..............................] - ETA: 9:45:38 - loss: 4.5979
<hr>
Loading 30%
Epoch 1: LearningRateScheduler setting learning rate to 6e-05.
Epoch 1/20
   28/37655 [..............................] - ETA: 7:53:14 - loss: 4.3459
<hr>
Loading 10%
Epoch 1: LearningRateScheduler setting learning rate to 6e-05.
Epoch 1/20
  141/12564 [..............................] - ETA: 2:15:11 - loss: 4.1844
<hr><br>

# <u>500 pairs of songs</u><br>
<hr>
Epoch 1: LearningRateScheduler setting learning rate to 6e-05.
Epoch 1/10
225/225 [==============================] - 491s 1s/step - loss: 4.1677 - val_loss: 3.5365 - lr: 6.0000e-05

Epoch 2: LearningRateScheduler setting learning rate to 6e-05.
Epoch 2/10
225/225 [==============================] - 203s 901ms/step - loss: 3.8816 - val_loss: 3.4822 - lr: 6.0000e-05

Epoch 3: LearningRateScheduler setting learning rate to 6e-05.
Epoch 3/10
225/225 [==============================] - 284s 1s/step - loss: 3.7387 - val_loss: 3.4553 - lr: 6.0000e-05

Epoch 4: LearningRateScheduler setting learning rate to 6e-05.
Epoch 4/10
225/225 [==============================] - 286s 1s/step - loss: 3.6480 - val_loss: 3.4469 - lr: 6.0000e-05

Epoch 5: LearningRateScheduler setting learning rate to 6e-05.
Epoch 5/10
225/225 [==============================] - 282s 1s/step - loss: 3.5652 - val_loss: 3.4379 - lr: 6.0000e-05

Epoch 6: LearningRateScheduler setting learning rate to 6e-05.
Epoch 6/10
225/225 [==============================] - 282s 1s/step - loss: 3.4882 - val_loss: 3.4368 - lr: 6.0000e-05

Epoch 7: LearningRateScheduler setting learning rate to 6e-05.
Epoch 7/10
225/225 [==============================] - 281s 1s/step - loss: 3.4169 - val_loss: 3.4429 - lr: 6.0000e-05

Epoch 8: LearningRateScheduler setting learning rate to 6e-06.
Epoch 8/10
225/225 [==============================] - 285s 1s/step - loss: 3.3414 - val_loss: 3.4427 - lr: 6.0000e-06

Epoch 9: LearningRateScheduler setting learning rate to 6e-06.
Epoch 9/10
225/225 [==============================] - 197s 876ms/step - loss: 3.3382 - val_loss: 3.4421 - lr: 6.0000e-06

Epoch 10: LearningRateScheduler setting learning rate to 6e-06.
Epoch 10/10
225/225 [==============================] - 280s 1s/step - loss: 3.3443 - val_loss: 3.4426 - lr: 6.0000e-06
R, METEOR, BERT-score (P, R, F1)
[{'rouge-1': {'r': 0.01020408163265306, 'p': 0.2, 'f': 0.01941747526628335},
  'rouge-2': {'r': 0.0, 'p': 0.0, 'f': 0.0},
  'rouge-l': {'r': 0.01020408163265306, 'p': 0.2, 'f': 0.01941747526628335}},
 0.05859221777673178,
 [tensor([0.7319, 0.7708]),
  tensor([0.7736, 0.7973]),
  tensor([0.7522, 0.7838])]]

<hr>

# <u>Loading 1000 songs:</u><br>
Epoch 1: LearningRateScheduler setting learning rate to 6e-05.
Epoch 1/10
450/450 [==============================] - 597s 826ms/step - loss: 3.9851 - val_loss: 3.6010 - lr: 6.0000e-05

Epoch 2: LearningRateScheduler setting learning rate to 6e-05.
Epoch 2/10
450/450 [==============================] - 280s 623ms/step - loss: 3.7143 - val_loss: 3.5460 - lr: 6.0000e-05

Epoch 3: LearningRateScheduler setting learning rate to 6e-05.
Epoch 3/10
450/450 [==============================] - 277s 615ms/step - loss: 3.5973 - val_loss: 3.5299 - lr: 6.0000e-05

Epoch 4: LearningRateScheduler setting learning rate to 6e-05.
Epoch 4/10
450/450 [==============================] - 271s 603ms/step - loss: 3.5158 - val_loss: 3.5266 - lr: 6.0000e-05

Epoch 5: LearningRateScheduler setting learning rate to 6e-05.
Epoch 5/10
450/450 [==============================] - 276s 613ms/step - loss: 3.4439 - val_loss: 3.5331 - lr: 6.0000e-05

Epoch 6: LearningRateScheduler setting learning rate to 6e-05.
Epoch 6/10
450/450 [==============================] - 269s 599ms/step - loss: 3.3673 - val_loss: 3.5419 - lr: 6.0000e-05

Epoch 7: LearningRateScheduler setting learning rate to 6e-05.
Epoch 7/10
450/450 [==============================] - 270s 600ms/step - loss: 3.2942 - val_loss: 3.5538 - lr: 6.0000e-05

Epoch 8: LearningRateScheduler setting learning rate to 6e-06.
Epoch 8/10
450/450 [==============================] - 269s 598ms/step - loss: 3.2253 - val_loss: 3.5580 - lr: 6.0000e-06

Epoch 9: LearningRateScheduler setting learning rate to 6e-06.
Epoch 9/10
450/450 [==============================] - 268s 596ms/step - loss: 3.2226 - val_loss: 3.5617 - lr: 6.0000e-06

Epoch 10: LearningRateScheduler setting learning rate to 6e-06.
Epoch 10/10
450/450 [==============================] - 268s 597ms/step - loss: 3.2314 - val_loss: 3.5643 - lr: 6.0000e-06


[{'rouge-1': {'r': 0.018518518518518517, 'p': 0.25, 'f': 0.03448275797859692},
  'rouge-2': {'r': 0.004166666666666667,
   'p': 0.08333333333333333,
   'f': 0.007936507482993222},
  'rouge-l': {'r': 0.018518518518518517, 'p': 0.25, 'f': 0.03448275797859692}},
 0.06892781743239168,
 [tensor([0.8137, 0.7673]),
  tensor([0.7890, 0.7770]),
  tensor([0.8012, 0.7721])]]
<hr>

# <u>Loading 1500 songs:</u><br>
  Epoch 1: LearningRateScheduler setting learning rate to 6e-05.
Epoch 1/10
675/675 [==============================] - 609s 594ms/step - loss: 3.9497 - val_loss: 3.5189 - lr: 6.0000e-05

Epoch 2: LearningRateScheduler setting learning rate to 6e-05.
Epoch 2/10
675/675 [==============================] - 363s 538ms/step - loss: 3.6923 - val_loss: 3.4731 - lr: 6.0000e-05

Epoch 3: LearningRateScheduler setting learning rate to 6e-05.
Epoch 3/10
675/675 [==============================] - 353s 524ms/step - loss: 3.5513 - val_loss: 3.4603 - lr: 6.0000e-05

Epoch 4: LearningRateScheduler setting learning rate to 6e-05.
Epoch 4/10
675/675 [==============================] - 355s 527ms/step - loss: 3.4677 - val_loss: 3.4661 - lr: 6.0000e-05

Epoch 5: LearningRateScheduler setting learning rate to 6e-05.
Epoch 5/10
675/675 [==============================] - 356s 528ms/step - loss: 3.3892 - val_loss: 3.4678 - lr: 6.0000e-05

Epoch 6: LearningRateScheduler setting learning rate to 6e-05.
Epoch 6/10
675/675 [==============================] - 353s 523ms/step - loss: 3.3222 - val_loss: 3.4794 - lr: 6.0000e-05

Epoch 7: LearningRateScheduler setting learning rate to 6e-05.
Epoch 7/10
675/675 [==============================] - 352s 522ms/step - loss: 3.2575 - val_loss: 3.4887 - lr: 6.0000e-05

Epoch 8: LearningRateScheduler setting learning rate to 6e-06.
Epoch 8/10
675/675 [==============================] - 352s 521ms/step - loss: 3.1904 - val_loss: 3.4914 - lr: 6.0000e-06

Epoch 9: LearningRateScheduler setting learning rate to 6e-06.
Epoch 9/10
675/675 [==============================] - 352s 521ms/step - loss: 3.1848 - val_loss: 3.4940 - lr: 6.0000e-06

Epoch 10: LearningRateScheduler setting learning rate to 6e-06.
Epoch 10/10
675/675 [==============================] - 350s 519ms/step - loss: 3.1674 - val_loss: 3.4953 - lr: 6.0000e-06
[{'rouge-1': {'r': 0.028722600151171576,
   'p': 0.3875,
   'f': 0.05312533961674416},
  'rouge-2': {'r': 0.004166666666666667,
   'p': 0.0625,
   'f': 0.007812499414062544},
  'rouge-l': {'r': 0.028722600151171576,
   'p': 0.3875,
   'f': 0.05312533961674416}},
 0.09982092084754013,
 [tensor([0.7949, 0.7708]),
  tensor([0.7947, 0.7973]),
  tensor([0.7948, 0.7838])]]
<hr>

# <u>Loading 2000 songs:</u><br>
Epoch 1: LearningRateScheduler setting learning rate to 6e-05.
Epoch 1/10
900/900 [==============================] - 759s 586ms/step - loss: 3.9156 - val_loss: 3.4538 - lr: 6.0000e-05

Epoch 2: LearningRateScheduler setting learning rate to 6e-05.
Epoch 2/10
900/900 [==============================] - 472s 525ms/step - loss: 3.6384 - val_loss: 3.4017 - lr: 6.0000e-05

Epoch 3: LearningRateScheduler setting learning rate to 6e-05.
Epoch 3/10
900/900 [==============================] - 467s 519ms/step - loss: 3.5181 - val_loss: 3.3878 - lr: 6.0000e-05

Epoch 4: LearningRateScheduler setting learning rate to 6e-05.
Epoch 4/10
900/900 [==============================] - 464s 516ms/step - loss: 3.4227 - val_loss: 3.3802 - lr: 6.0000e-05

Epoch 5: LearningRateScheduler setting learning rate to 6e-05.
Epoch 5/10
900/900 [==============================] - 470s 523ms/step - loss: 3.3500 - val_loss: 3.3862 - lr: 6.0000e-05

Epoch 6: LearningRateScheduler setting learning rate to 6e-05.
Epoch 6/10
900/900 [==============================] - 468s 520ms/step - loss: 3.2821 - val_loss: 3.3914 - lr: 6.0000e-05

Epoch 7: LearningRateScheduler setting learning rate to 6e-05.
Epoch 7/10
900/900 [==============================] - 461s 512ms/step - loss: 3.2122 - val_loss: 3.4035 - lr: 6.0000e-05

Epoch 8: LearningRateScheduler setting learning rate to 6e-06.
Epoch 8/10
900/900 [==============================] - 461s 512ms/step - loss: 3.1475 - val_loss: 3.4099 - lr: 6.0000e-06

Epoch 9: LearningRateScheduler setting learning rate to 6e-06.
Epoch 9/10
900/900 [==============================] - 461s 513ms/step - loss: 3.1328 - val_loss: 3.4132 - lr: 6.0000e-06

Epoch 10: LearningRateScheduler setting learning rate to 6e-06.
Epoch 10/10
900/900 [==============================] - 458s 509ms/step - loss: 3.1254 - val_loss: 3.4150 - lr: 6.0000e-06
[{'rouge-1': {'r': 0.07086167800453515,
   'p': 0.4825174825174825,
   'f': 0.12326761453183363},
  'rouge-2': {'r': 0.020833333333333332,
   'p': 0.16666666666666666,
   'f': 0.03703703604938274},
  'rouge-l': {'r': 0.06575963718820861,
   'p': 0.4370629370629371,
   'f': 0.11409330260522814}},
 0.10804279255366213,
 [tensor([0.8073, 0.7865]),
  tensor([0.8105, 0.7979]),
  tensor([0.8089, 0.7921])]]
<hr>

# Loading 2500 songs:
  Epoch 1: LearningRateScheduler setting learning rate to 6e-05.
Epoch 1/10
1125/1125 [==============================] - 913s 608ms/step - loss: 3.8496 - val_loss: 3.4449 - lr: 6.0000e-05

Epoch 2: LearningRateScheduler setting learning rate to 6e-05.
Epoch 2/10
1125/1125 [==============================] - 561s 499ms/step - loss: 3.5895 - val_loss: 3.4074 - lr: 6.0000e-05

Epoch 3: LearningRateScheduler setting learning rate to 6e-05.
Epoch 3/10
1125/1125 [==============================] - 549s 488ms/step - loss: 3.4644 - val_loss: 3.3950 - lr: 6.0000e-05

Epoch 4: LearningRateScheduler setting learning rate to 6e-05.
Epoch 4/10
1125/1125 [==============================] - 554s 493ms/step - loss: 3.3848 - val_loss: 3.3998 - lr: 6.0000e-05

Epoch 5: LearningRateScheduler setting learning rate to 6e-05.
Epoch 5/10
1125/1125 [==============================] - 558s 496ms/step - loss: 3.3088 - val_loss: 3.4040 - lr: 6.0000e-05

Epoch 6: LearningRateScheduler setting learning rate to 6e-05.
Epoch 6/10
1125/1125 [==============================] - 563s 500ms/step - loss: 3.2404 - val_loss: 3.4086 - lr: 6.0000e-05

Epoch 7: LearningRateScheduler setting learning rate to 6e-05.
Epoch 7/10
1125/1125 [==============================] - 556s 494ms/step - loss: 3.1828 - val_loss: 3.4154 - lr: 6.0000e-05

Epoch 8: LearningRateScheduler setting learning rate to 6e-06.
Epoch 8/10
1125/1125 [==============================] - 560s 498ms/step - loss: 3.1157 - val_loss: 3.4240 - lr: 6.0000e-06

Epoch 9: LearningRateScheduler setting learning rate to 6e-06.
Epoch 9/10
1125/1125 [==============================] - 564s 501ms/step - loss: 3.1111 - val_loss: 3.4267 - lr: 6.0000e-06

Epoch 10: LearningRateScheduler setting learning rate to 6e-06.
Epoch 10/10
1125/1125 [==============================] - 565s 502ms/step - loss: 3.0929 - val_loss: 3.4296 - lr: 6.0000e-06

[{'rouge-1': {'r': 0.07086167800453515,
   'p': 0.4343891402714932,
   'f': 0.12183163492135196},
  'rouge-2': {'r': 0.020833333333333332,
   'p': 0.17857142857142858,
   'f': 0.03731343190020051},
  'rouge-l': {'r': 0.06468883849836231,
   'p': 0.3959276018099548,
   'f': 0.11119333704901155}},
 0.11153267947247295,
 [tensor([0.8052, 0.7871]),
  tensor([0.8125, 0.7991]),
  tensor([0.8088, 0.7930])]]

<hr>
# Loading 3000 songs:

Epoch 1: LearningRateScheduler setting learning rate to 6e-05.
Epoch 1/10
1350/1350 [==============================] - 1048s 596ms/step - loss: 3.8518 - val_loss: 3.4450 - lr: 6.0000e-05

Epoch 2: LearningRateScheduler setting learning rate to 6e-05.
Epoch 2/10
1350/1350 [==============================] - 656s 486ms/step - loss: 3.5976 - val_loss: 3.4109 - lr: 6.0000e-05

Epoch 3: LearningRateScheduler setting learning rate to 6e-05.
Epoch 3/10
1350/1350 [==============================] - 653s 483ms/step - loss: 3.4730 - val_loss: 3.3992 - lr: 6.0000e-05

Epoch 4: LearningRateScheduler setting learning rate to 6e-05.
Epoch 4/10
1350/1350 [==============================] - 650s 482ms/step - loss: 3.3789 - val_loss: 3.3987 - lr: 6.0000e-05

Epoch 5: LearningRateScheduler setting learning rate to 6e-05.
Epoch 5/10
1350/1350 [==============================] - 652s 483ms/step - loss: 3.3083 - val_loss: 3.4055 - lr: 6.0000e-05

Epoch 6: LearningRateScheduler setting learning rate to 6e-05.
Epoch 6/10
1350/1350 [==============================] - 653s 484ms/step - loss: 3.2449 - val_loss: 3.4183 - lr: 6.0000e-05

Epoch 7: LearningRateScheduler setting learning rate to 6e-05.
Epoch 7/10
1350/1350 [==============================] - 652s 483ms/step - loss: 3.1811 - val_loss: 3.4343 - lr: 6.0000e-05

Epoch 8: LearningRateScheduler setting learning rate to 6e-06.
Epoch 8/10
1350/1350 [==============================] - 641s 475ms/step - loss: 3.1092 - val_loss: 3.4363 - lr: 6.0000e-06

Epoch 9: LearningRateScheduler setting learning rate to 6e-06.
Epoch 9/10
1350/1350 [==============================] - 638s 472ms/step - loss: 3.0990 - val_loss: 3.4389 - lr: 6.0000e-06

Epoch 10: LearningRateScheduler setting learning rate to 6e-06.
Epoch 10/10
1350/1350 [==============================] - 640s 474ms/step - loss: 3.0904 - val_loss: 3.4411 - lr: 6.0000e-06

[{'rouge-1': {'r': 0.11407155454774502,
   'p': 0.2933634992458522,
   'f': 0.1594202864015872},
  'rouge-2': {'r': 0.025, 'p': 0.06976744186046512, 'f': 0.036809814008807354},
  'rouge-l': {'r': 0.10789871504157218,
   'p': 0.28054298642533937,
   'f': 0.15108695306825387}},
 0.18277770549823882,
 [tensor([0.8404, 0.7871]),
  tensor([0.8208, 0.7991]),
  tensor([0.8305, 0.7930])]]

# Results:

Songs_loaded = [500, 1000, 1500, 2000, 2500, 3000]

'rouge-1':
  'r': [0.01020408163265306,  0.018518518518518517, 0.028722600151171576, 0.07086167800453515, 0.07086167800453515, 0.11407155454774502]
  'p': [0.2, 0.25, 0.3875, 0.4825174825174825, 0.4343891402714932, 0.2933634992458522]
  'f': [0.01941747526628335, 0.03448275797859692, 0.05312533961674416, 0.12326761453183363, 0.12183163492135196, 0.1594202864015872]

'rouge-2':
  'r': [0.0, 0.004166666666666667, 0.004166666666666667, 0.020833333333333332,  0.020833333333333332, 0.025]
  'p': [0.0, 0.08333333333333333, 0.0625, 0.16666666666666666, 0.17857142857142858, 0.06976744186046512]
  'f': [0.0, 0.007936507482993222, 0.007812499414062544, 0.03703703604938274, 0.03731343190020051, 0.036809814008807354]

'rouge-l':
  'r': [0.01020408163265306, 0.018518518518518517, 0.028722600151171576, 0.06575963718820861, 0.06468883849836231, 0.10789871504157218]
  'p': [0.2, 0.25, 0.3875, 0.4370629370629371, 0.3959276018099548, 0.28054298642533937]
  'f': [0.01941747526628335, 0.03448275797859692, 0.05312533961674416, 0.11409330260522814, 0.11119333704901155, 0.15108695306825387]
  
'meteor':
[0.05859221777673178, 0.0689278174323916, 0.09982092084754013, 0.10804279255366213, 0.11153267947247295, 0.18277770549823882]

'BERTScore:
 'r': [0.7319, 0.7673, 0.7949, 0.8073, 0.8052, 0.8404]
 'p': [0.7736, 0.7770, 0.7947, 0.8105, 0.8125, 0.8208]
 'f': [0.7522, 0.7721, 0.7948, 0.8089, 0.8088, 0.8305]


# facebook/bart-large-cnn

#loading 500 songs:
Epoch 1: LearningRateScheduler setting learning rate to 6e-05.
Epoch 1/10
225/225 [==============================] - 351s 770ms/step - loss: 3.7792 - val_loss: 3.4643 - lr: 6.0000e-05

Epoch 2: LearningRateScheduler setting learning rate to 6e-05.
Epoch 2/10
225/225 [==============================] - 153s 682ms/step - loss: 3.5354 - val_loss: 3.4117 - lr: 6.0000e-05

Epoch 3: LearningRateScheduler setting learning rate to 6e-05.
Epoch 3/10
225/225 [==============================] - 73s 322ms/step - loss: 3.3964 - val_loss: 3.3825 - lr: 6.0000e-05

Epoch 4: LearningRateScheduler setting learning rate to 6e-05.
Epoch 4/10
225/225 [==============================] - 139s 619ms/step - loss: 3.2850 - val_loss: 3.3765 - lr: 6.0000e-05

Epoch 5: LearningRateScheduler setting learning rate to 6e-05.
Epoch 5/10
225/225 [==============================] - 139s 617ms/step - loss: 3.1830 - val_loss: 3.3616 - lr: 6.0000e-05

Epoch 6: LearningRateScheduler setting learning rate to 6e-05.
Epoch 6/10
225/225 [==============================] - 141s 626ms/step - loss: 3.0837 - val_loss: 3.3737 - lr: 6.0000e-05

Epoch 7: LearningRateScheduler setting learning rate to 6e-05.
Epoch 7/10
225/225 [==============================] - 143s 638ms/step - loss: 2.9746 - val_loss: 3.3838 - lr: 6.0000e-05

Epoch 8: LearningRateScheduler setting learning rate to 6e-06.
Epoch 8/10
225/225 [==============================] - 70s 310ms/step - loss: 2.8861 - val_loss: 3.3932 - lr: 6.0000e-06

Epoch 9: LearningRateScheduler setting learning rate to 6e-06.
Epoch 9/10
225/225 [==============================] - 143s 639ms/step - loss: 2.8652 - val_loss: 3.3984 - lr: 6.0000e-06

Epoch 10: LearningRateScheduler setting learning rate to 6e-06.
Epoch 10/10
225/225 [==============================] - 71s 315ms/step - loss: 2.8554 - val_loss: 3.4011 - lr: 6.0000e-06

[{'rouge-1': {'r': 0.14978584026203073,
   'p': 0.40714285714285714,
   'f': 0.21729611001257618},
  'rouge-2': {'r': 0.03205128205128205,
   'p': 0.09017223910840932,
   'f': 0.04720790589397701},
  'rouge-l': {'r': 0.1436130007558579,
   'p': 0.39285714285714285,
   'f': 0.20867542035740375}},
 0.26145411346736447,
 [tensor([0.8574, 0.8323]),
  tensor([0.8222, 0.8123]),
  tensor([0.8394, 0.8222])]]

## Loading 1000 songs:
Epoch 1: LearningRateScheduler setting learning rate to 6e-05.
Epoch 1/10
450/450 [==============================] - 429s 594ms/step - loss: 3.6412 - val_loss: 3.5800 - lr: 6.0000e-05

Epoch 2: LearningRateScheduler setting learning rate to 6e-05.
Epoch 2/10
450/450 [==============================] - 217s 483ms/step - loss: 3.4101 - val_loss: 3.5235 - lr: 6.0000e-05

Epoch 3: LearningRateScheduler setting learning rate to 6e-05.
Epoch 3/10
450/450 [==============================] - 205s 456ms/step - loss: 3.2777 - val_loss: 3.5013 - lr: 6.0000e-05

Epoch 4: LearningRateScheduler setting learning rate to 6e-05.
Epoch 4/10
450/450 [==============================] - 203s 451ms/step - loss: 3.1688 - val_loss: 3.4972 - lr: 6.0000e-05

Epoch 5: LearningRateScheduler setting learning rate to 6e-05.
Epoch 5/10
450/450 [==============================] - 202s 448ms/step - loss: 3.0803 - val_loss: 3.4901 - lr: 6.0000e-05

Epoch 6: LearningRateScheduler setting learning rate to 6e-05.
Epoch 6/10
450/450 [==============================] - 202s 450ms/step - loss: 2.9866 - val_loss: 3.5084 - lr: 6.0000e-05

Epoch 7: LearningRateScheduler setting learning rate to 6e-05.
Epoch 7/10
450/450 [==============================] - 202s 449ms/step - loss: 2.8833 - val_loss: 3.5218 - lr: 6.0000e-05

Epoch 8: LearningRateScheduler setting learning rate to 6e-06.
Epoch 8/10
450/450 [==============================] - 200s 445ms/step - loss: 2.8039 - val_loss: 3.5290 - lr: 6.0000e-06

Epoch 9: LearningRateScheduler setting learning rate to 6e-06.
Epoch 9/10
450/450 [==============================] - 201s 446ms/step - loss: 2.7741 - val_loss: 3.5326 - lr: 6.0000e-06

Epoch 10: LearningRateScheduler setting learning rate to 6e-06.
Epoch 10/10
450/450 [==============================] - 201s 447ms/step - loss: 2.7780 - val_loss: 3.5367 - lr: 6.0000e-06

[{'rouge-1': {'r': 0.1283068783068783,
   'p': 0.30030959752321984,
   'f': 0.17908071911249016},
  'rouge-2': {'r': 0.029166666666666667, 'p': 0.07, 'f': 0.041176468512110834},
  'rouge-l': {'r': 0.1283068783068783,
   'p': 0.30030959752321984,
   'f': 0.17908071911249016}},
 0.23785767209540679,
 [tensor([0.8598, 0.8114]),
  tensor([0.8236, 0.8036]),
  tensor([0.8413, 0.8075])]]

# Loading 1500 songs
Epoch 1: LearningRateScheduler setting learning rate to 6e-05.
Epoch 1/10
675/675 [==============================] - 524s 531ms/step - loss: 3.6098 - val_loss: 3.4504 - lr: 6.0000e-05

Epoch 2: LearningRateScheduler setting learning rate to 6e-05.
Epoch 2/10
675/675 [==============================] - 283s 420ms/step - loss: 3.3773 - val_loss: 3.4033 - lr: 6.0000e-05

Epoch 3: LearningRateScheduler setting learning rate to 6e-05.
Epoch 3/10
675/675 [==============================] - 278s 412ms/step - loss: 3.2586 - val_loss: 3.3806 - lr: 6.0000e-05

Epoch 4: LearningRateScheduler setting learning rate to 6e-05.
Epoch 4/10
675/675 [==============================] - 275s 408ms/step - loss: 3.1587 - val_loss: 3.3878 - lr: 6.0000e-05

Epoch 5: LearningRateScheduler setting learning rate to 6e-05.
Epoch 5/10
675/675 [==============================] - 279s 413ms/step - loss: 3.0531 - val_loss: 3.3933 - lr: 6.0000e-05

Epoch 6: LearningRateScheduler setting learning rate to 6e-05.
Epoch 6/10
675/675 [==============================] - 269s 399ms/step - loss: 2.9694 - val_loss: 3.4048 - lr: 6.0000e-05

Epoch 7: LearningRateScheduler setting learning rate to 6e-05.
Epoch 7/10
675/675 [==============================] - 273s 404ms/step - loss: 2.8831 - val_loss: 3.4272 - lr: 6.0000e-05

Epoch 8: LearningRateScheduler setting learning rate to 6e-06.
Epoch 8/10
675/675 [==============================] - 273s 404ms/step - loss: 2.7836 - val_loss: 3.4340 - lr: 6.0000e-06

Epoch 9: LearningRateScheduler setting learning rate to 6e-06.
Epoch 9/10
675/675 [==============================] - 274s 406ms/step - loss: 2.7754 - val_loss: 3.4389 - lr: 6.0000e-06

Epoch 10: LearningRateScheduler setting learning rate to 6e-06.
Epoch 10/10
675/675 [==============================] - 277s 410ms/step - loss: 2.7564 - val_loss: 3.4405 - lr: 6.0000e-06
[{'rouge-1': {'r': 0.18764172335600907,
   'p': 0.4291497975708502,
   'f': 0.2607495510866594},
  'rouge-2': {'r': 0.03685897435897436,
   'p': 0.09040816326530612,
   'f': 0.05234965088157531},
  'rouge-l': {'r': 0.1599899218946838,
   'p': 0.3643724696356275,
   'f': 0.2220450181786057}},
 0.2875325940215777,
 [tensor([0.8537, 0.8435]),
  tensor([0.8230, 0.8212]),
  tensor([0.8381, 0.8322])]]

# Loading 2000 songs

Epoch 1: LearningRateScheduler setting learning rate to 6e-05.
Epoch 1/10
900/900 [==============================] - 605s 488ms/step - loss: 3.5721 - val_loss: 3.3704 - lr: 6.0000e-05

Epoch 2: LearningRateScheduler setting learning rate to 6e-05.
Epoch 2/10
900/900 [==============================] - 351s 390ms/step - loss: 3.3599 - val_loss: 3.3262 - lr: 6.0000e-05

Epoch 3: LearningRateScheduler setting learning rate to 6e-05.
Epoch 3/10
900/900 [==============================] - 344s 382ms/step - loss: 3.2316 - val_loss: 3.3114 - lr: 6.0000e-05

Epoch 4: LearningRateScheduler setting learning rate to 6e-05.
Epoch 4/10
900/900 [==============================] - 341s 379ms/step - loss: 3.1259 - val_loss: 3.3045 - lr: 6.0000e-05

Epoch 5: LearningRateScheduler setting learning rate to 6e-05.
Epoch 5/10
900/900 [==============================] - 348s 386ms/step - loss: 3.0387 - val_loss: 3.3135 - lr: 6.0000e-05

Epoch 6: LearningRateScheduler setting learning rate to 6e-05.
Epoch 6/10
900/900 [==============================] - 348s 387ms/step - loss: 2.9528 - val_loss: 3.3179 - lr: 6.0000e-05

Epoch 7: LearningRateScheduler setting learning rate to 6e-05.
Epoch 7/10
900/900 [==============================] - 346s 384ms/step - loss: 2.8733 - val_loss: 3.3447 - lr: 6.0000e-05

Epoch 8: LearningRateScheduler setting learning rate to 6e-06.
Epoch 8/10
900/900 [==============================] - 345s 384ms/step - loss: 2.7908 - val_loss: 3.3482 - lr: 6.0000e-06

Epoch 9: LearningRateScheduler setting learning rate to 6e-06.
Epoch 9/10
900/900 [==============================] - 346s 384ms/step - loss: 2.7709 - val_loss: 3.3513 - lr: 6.0000e-06

Epoch 10: LearningRateScheduler setting learning rate to 6e-06.
Epoch 10/10
900/900 [==============================] - 345s 384ms/step - loss: 2.7650 - val_loss: 3.3547 - lr: 6.0000e-06

[{'rouge-1': {'r': 0.1599899218946838,
   'p': 0.4285714285714286,
   'f': 0.23168103058145825},
  'rouge-2': {'r': 0.04455128205128205,
   'p': 0.11956521739130435,
   'f': 0.06489594349234781},
  'rouge-l': {'r': 0.15381708238851094,
   'p': 0.41428571428571426,
   'f': 0.22306034092628582}},
 0.26823947764628797,
 [tensor([0.8675, 0.8411]),
  tensor([0.8298, 0.8154]),
  tensor([0.8482, 0.8281])]]

# Loading 2500 songs:
Epoch 1: LearningRateScheduler setting learning rate to 6e-05.
Epoch 1/10
1125/1125 [==============================] - 642s 441ms/step - loss: 3.5482 - val_loss: 3.3770 - lr: 6.0000e-05

Epoch 2: LearningRateScheduler setting learning rate to 6e-05.
Epoch 2/10
1125/1125 [==============================] - 405s 360ms/step - loss: 3.3327 - val_loss: 3.3388 - lr: 6.0000e-05

Epoch 3: LearningRateScheduler setting learning rate to 6e-05.
Epoch 3/10
1125/1125 [==============================] - 398s 354ms/step - loss: 3.2120 - val_loss: 3.3209 - lr: 6.0000e-05

Epoch 4: LearningRateScheduler setting learning rate to 6e-05.
Epoch 4/10
1125/1125 [==============================] - 394s 351ms/step - loss: 3.1087 - val_loss: 3.3167 - lr: 6.0000e-05

Epoch 5: LearningRateScheduler setting learning rate to 6e-05.
Epoch 5/10
1125/1125 [==============================] - 397s 353ms/step - loss: 3.0174 - val_loss: 3.3238 - lr: 6.0000e-05

Epoch 6: LearningRateScheduler setting learning rate to 6e-05.
Epoch 6/10
1125/1125 [==============================] - 394s 350ms/step - loss: 2.9282 - val_loss: 3.3296 - lr: 6.0000e-05

Epoch 7: LearningRateScheduler setting learning rate to 6e-05.
Epoch 7/10
1125/1125 [==============================] - 394s 351ms/step - loss: 2.8477 - val_loss: 3.3483 - lr: 6.0000e-05

Epoch 8: LearningRateScheduler setting learning rate to 6e-06.
Epoch 8/10
1125/1125 [==============================] - 396s 352ms/step - loss: 2.7597 - val_loss: 3.3612 - lr: 6.0000e-06

Epoch 9: LearningRateScheduler setting learning rate to 6e-06.
Epoch 9/10
1125/1125 [==============================] - 397s 353ms/step - loss: 2.7482 - val_loss: 3.3646 - lr: 6.0000e-06

Epoch 10: LearningRateScheduler setting learning rate to 6e-06.
Epoch 10/10
1125/1125 [==============================] - 399s 355ms/step - loss: 2.7399 - val_loss: 3.3678 - lr: 6.0000e-06

[{'rouge-1': {'r': 0.17126480221718315,
   'p': 0.4328547297297297,
   'f': 0.2440677926021495},
  'rouge-2': {'r': 0.04871794871794872,
   'p': 0.13041314086610253,
   'f': 0.07072908709047165},
  'rouge-l': {'r': 0.17126480221718315,
   'p': 0.4328547297297297,
   'f': 0.2440677926021495}},
 0.2764248119116917,
 [tensor([0.8539, 0.8199]),
  tensor([0.8249, 0.8142]),
  tensor([0.8392, 0.8170])]]

# Loading 3000 songs:
Epoch 1: LearningRateScheduler setting learning rate to 6e-05.
Epoch 1/10
1350/1350 [==============================] - 903s 529ms/step - loss: 3.5358 - val_loss: 3.4114 - lr: 6.0000e-05

Epoch 2: LearningRateScheduler setting learning rate to 6e-05.
Epoch 2/10
1350/1350 [==============================] - 844s 625ms/step - loss: 3.3241 - val_loss: 3.3655 - lr: 6.0000e-05

Epoch 3: LearningRateScheduler setting learning rate to 6e-05.
Epoch 3/10
1350/1350 [==============================] - 841s 623ms/step - loss: 3.2070 - val_loss: 3.3545 - lr: 6.0000e-05

Epoch 4: LearningRateScheduler setting learning rate to 6e-05.
Epoch 4/10
1350/1350 [==============================] - 862s 639ms/step - loss: 3.1091 - val_loss: 3.3574 - lr: 6.0000e-05

Epoch 5: LearningRateScheduler setting learning rate to 6e-05.
Epoch 5/10
1350/1350 [==============================] - 869s 644ms/step - loss: 3.0267 - val_loss: 3.3572 - lr: 6.0000e-05

Epoch 6: LearningRateScheduler setting learning rate to 6e-05.
Epoch 6/10
1350/1350 [==============================] - 858s 635ms/step - loss: 2.9431 - val_loss: 3.3675 - lr: 6.0000e-05

Epoch 7: LearningRateScheduler setting learning rate to 6e-05.
Epoch 7/10
1350/1350 [==============================] - 824s 611ms/step - loss: 2.8697 - val_loss: 3.3808 - lr: 6.0000e-05

Epoch 8: LearningRateScheduler setting learning rate to 6e-06.
Epoch 8/10
1350/1350 [==============================] - 896s 664ms/step - loss: 2.7708 - val_loss: 3.3960 - lr: 6.0000e-06

Epoch 9: LearningRateScheduler setting learning rate to 6e-06.
Epoch 9/10
1350/1350 [==============================] - 878s 650ms/step - loss: 2.7665 - val_loss: 3.3999 - lr: 6.0000e-06

Epoch 10: LearningRateScheduler setting learning rate to 6e-06.
Epoch 10/10
1350/1350 [==============================] - 849s 629ms/step - loss: 2.7552 - val_loss: 3.3997 - lr: 6.0000e-06

[{'rouge-1': {'r': 0.17743764172335602,
   'p': 0.4225563909774436,
   'f': 0.24900486099130287},
  'rouge-2': {'r': 0.04455128205128205,
   'p': 0.11736111111111111,
   'f': 0.06452380557278939},
  'rouge-l': {'r': 0.17126480221718315,
   'p': 0.40939849624060154,
   'f': 0.24060149964676505}},
 0.28509573388607945,
 [tensor([0.8596, 0.8433]),
  tensor([0.8255, 0.8194]),
  tensor([0.8422, 0.8312])]]

# Results:

bert_dict = {
    "rouge-1" : {
        'r': [0.14978584026203073, 0.1283068783068783, 0.18764172335600907, 0.1599899218946838, 0.17126480221718315, 0.17743764172335602],
        'p': [0.40714285714285714, 0.30030959752321984, 0.4291497975708502, 0.4285714285714286,  0.4328547297297297, 0.4225563909774436],
        'f': [0.21729611001257618, 0.17908071911249016, 0.2607495510866594, 0.23168103058145825, 0.2440677926021495, 0.24900486099130287]
    "rouge-2" : {
        'r': [0.03205128205128205, 0.029166666666666667, 0.03685897435897436, 0.04455128205128205, 0.04871794871794872, 0.04455128205128205],
        'p': [0.09017223910840932, 0.07, 0.09040816326530612, 0.11956521739130435, 0.13041314086610253, 0.11736111111111111],
        'f': [0.04720790589397701, 0.041176468512110834, 0.05234965088157531, 0.06489594349234781, 0.07072908709047165, 0.06452380557278939]
        },
    "rouge-l" : {
        'r': [0.1436130007558579, 0.1283068783068783, 0.1599899218946838, 0.15381708238851094, 0.17126480221718315, 0.17126480221718315],
        'p': [0.39285714285714285, 0.30030959752321984, 0.3643724696356275, 0.41428571428571426, 0.4328547297297297, 0.40939849624060154],
        'f': [ 0.20867542035740375, 0.17908071911249016, 0.2220450181786057, 0.22306034092628582, 0.2440677926021495, 0.24060149964676505]
        },
    "meteor" : [0.26145411346736447, 0.23785767209540679, 0.2875325940215777, 0.26823947764628797, 0.2764248119116917, 0.28509573388607945],
    "BERTScore" : {
        'r': [0.8574, 0.8598, 0.8537, 0.8675, 0.8539, 0.8596],
        'p': [0.8222, 0.8236, 0.8230, 0.8298, 0.8249, 0.8255],
        'f': [0.8394, 0.8413, 0.8381, 0.8482, 0.8392, 0.8422]
        }
    }
