# Read in dependencies

In [None]:
# import needed dependencies for testing PoemSum model
!pip install pytorch-lightning transformers torch

Collecting pytorch-lightning
  Downloading pytorch_lightning-2.4.0-py3-none-any.whl.metadata (21 kB)
Collecting torchmetrics>=0.7.0 (from pytorch-lightning)
  Downloading torchmetrics-1.6.0-py3-none-any.whl.metadata (20 kB)
Collecting lightning-utilities>=0.10.0 (from pytorch-lightning)
  Downloading lightning_utilities-0.11.9-py3-none-any.whl.metadata (5.2 kB)
Downloading pytorch_lightning-2.4.0-py3-none-any.whl (815 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m815.2/815.2 kB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading lightning_utilities-0.11.9-py3-none-any.whl (28 kB)
Downloading torchmetrics-1.6.0-py3-none-any.whl (926 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m926.4/926.4 kB[0m [31m34.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: lightning-utilities, torchmetrics, pytorch-lightning
Successfully installed lightning-utilities-0.11.9 pytorch-lightning-2.4.0 torchmetrics-1.6.0


In [None]:
# Import needed dependencies while avoiding conflicts
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger
from transformers import (
    T5ForConditionalGeneration,
    T5TokenizerFast as T5Tokenizer,
    AdamW
)
import re
import os
from sklearn.model_selection import train_test_split

In [None]:
import textwrap
def print_summary(text, width=70):
    print(textwrap.fill(text, width=width))


# Class Modules

In [None]:
# Custom Dataset class from PoemSum model
class LyricsSummaryDataset(Dataset):
    def __init__(
        self,
        data: pd.DataFrame,
        tokenizer: T5Tokenizer,
        text_max_token_len: int = 2000,
        summary_max_token_len: int = 10000
    ):
        self.tokenizer = tokenizer
        self.data = data
        self.text_max_token_len = text_max_token_len
        self.summary_max_token_len = summary_max_token_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index: int):
        data_row = self.data.iloc[index]

        text_encoding = self.tokenizer(
            data_row["text"],
            max_length=self.text_max_token_len,
            padding="max_length",
            truncation=True,
            return_attention_mask=True,
            add_special_tokens=True,
            return_tensors="pt"
        )

        summary_encoding = self.tokenizer(
            data_row["summary"],
            max_length=self.summary_max_token_len,
            padding="max_length",
            truncation=True,
            return_attention_mask=True,
            add_special_tokens=True,
            return_tensors="pt"
        )

        labels = summary_encoding["input_ids"]
        labels[labels == 0] = -100

        return dict(
            text=data_row["text"],
            summary=data_row["summary"],
            text_input_ids=text_encoding["input_ids"].flatten(),
            text_attention_mask=text_encoding["attention_mask"].flatten(),
            labels=labels.flatten(),
            labels_attention_mask=summary_encoding["attention_mask"].flatten()
        )

# Lightning Data Module from Poem Sum
class LyricsSummaryDataModule(pl.LightningDataModule):
    def __init__(
        self,
        train_df: pd.DataFrame,
        val_df: pd.DataFrame,
        tokenizer: T5Tokenizer,
        batch_size: int = 8,
        text_max_token_len: int = 512,
        summary_max_token_len: int = 256
    ):
        super().__init__()
        self.train_df = train_df
        self.val_df = val_df
        self.batch_size = batch_size
        self.tokenizer = tokenizer
        self.text_max_token_len = text_max_token_len
        self.summary_max_token_len = summary_max_token_len

    def setup(self, stage=None):
        self.train_dataset = LyricsSummaryDataset(
            self.train_df,
            self.tokenizer,
            self.text_max_token_len,
            self.summary_max_token_len
        )
        self.val_dataset = LyricsSummaryDataset(
            self.val_df,
            self.tokenizer,
            self.text_max_token_len,
            self.summary_max_token_len
        )

    def train_dataloader(self):
        return DataLoader(
            self.train_dataset,
            batch_size=self.batch_size,
            shuffle=True,
            num_workers=2
        )

    def val_dataloader(self):
        return DataLoader(
            self.val_dataset,
            batch_size=self.batch_size,
            shuffle=False,
            num_workers=2
        )

# Model Class
class LyricsSummaryModel(pl.LightningModule):
    def __init__(self, model_name='t5-small'):
        super().__init__()
        self.model = T5ForConditionalGeneration.from_pretrained(model_name, return_dict=True)

    def forward(self, input_ids, attention_mask, decoder_attention_mask, labels=None):
        output = self.model(
            input_ids,
            attention_mask=attention_mask,
            labels=labels,
            decoder_attention_mask=decoder_attention_mask
        )
        return output.loss, output.logits

    def training_step(self, batch, batch_idx):
        loss, outputs = self(
            batch["text_input_ids"],
            batch["text_attention_mask"],
            batch["labels_attention_mask"],
            batch["labels"]
        )
        self.log("train_loss", loss, prog_bar=True, logger=True)
        return loss

    def validation_step(self, batch, batch_idx):
        loss, outputs = self(
            batch["text_input_ids"],
            batch["text_attention_mask"],
            batch["labels_attention_mask"],
            batch["labels"]
        )
        self.log("val_loss", loss, prog_bar=True, logger=True)
        return loss

    def configure_optimizers(self):
        return AdamW(self.parameters(), lr=0.00001)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Data Preparation

Let's start with one artist and clean up their input file

In [None]:
#Train on all artists
# Initialize an empty list to store DataFrames
df_list = []

folder_path = "/content/drive/My Drive/266 Final Project/Cleaned Song Files"
# Iterate through each file in the directory
for filename in os.listdir(folder_path):
  # Check if the file is a CSV file
  if filename.endswith('.csv'):
    #Construct the full file path
    file_path = os.path.join(folder_path, filename)
    # Read the CSV file and append it to the list
    df = pd.read_csv(file_path)
    df_list.append(df)

# Concatenate all DataFrames in the list into a single DataFrame
df = pd.concat(df_list, ignore_index=True)

In [None]:
# Calculate average string length of the column
average_length = df['Lyrics'].str.len().mean()
min_length = df['Lyrics'].str.len().min()
max_length = df['Lyrics'].str.len().max()

# Display the result
print("Average string length of lyrics:", average_length)
print("Min string length of lyrics:", min_length)
print("Max string length of lyrics:", max_length)

Average string length of lyrics: 1456.5506551613935
Min string length of lyrics: 12.0
Max string length of lyrics: 5686.0


# Create and Train t5 model

In [None]:
# Do this before calling create_baseline_model
train_val_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

In [None]:
def create_baseline_model(df, save_dir="checkpoints"):
  # 1. Initialize model and tokenizer
  print("Initializing model and tokenizer...")
  MODEL_NAME = 't5-small'
  tokenizer = T5Tokenizer.from_pretrained(MODEL_NAME)
  model = LyricsSummaryModel(MODEL_NAME)

  # 2. Prepare data
  print("Preparing data...")

  # Handle missing or invalid data in 'Combined Annotations' column
  df['Combined Annotations'] = df['Combined Annotations'].astype(str)  # Convert to string type
  df['Combined Annotations'] = df['Combined Annotations'].fillna('')  # Fill missing values with empty string

  prepared_data = pd.DataFrame({
      'text': df.apply(
          lambda x: f"summarize lyrics and capture meaning: {x['Lyrics']}",
          axis=1
      ),
      'summary': df['Combined Annotations'].apply(
          lambda x: f"Meaning and themes: {' '.join(x.split()[:100])}"
      )
  })
  # 3. Split data
  train_size = int(0.8 * len(prepared_data))
  train_data = prepared_data[:train_size]
  val_data = prepared_data[train_size:]

  # 4. Set up data module
  data_module = LyricsSummaryDataModule(
      train_df=train_data,
      val_df=val_data,
      tokenizer=tokenizer,
      batch_size=2
  )

  # 5. Set up trainer
  trainer = pl.Trainer(
      max_epochs=2,
      accumulate_grad_batches=2,
      gradient_clip_val=1.0,
      precision=16 if torch.cuda.is_available() else 32,
      enable_checkpointing=True,
      default_root_dir=save_dir
  )

  # 6. Train model
  print("Starting training...")
  trainer.fit(model, data_module)

  # 9. Save model and tokenizer
  print("Saving model and tokenizer...")
  drive_path = '/content/drive/MyDrive/266 Final Project/Our Models/Lyrics + Genius'
  os.makedirs(drive_path, exist_ok=True)

  try:
      model.model.save_pretrained(drive_path)
      tokenizer.save_pretrained(drive_path)
      print(f"Model and tokenizer successfully saved to {drive_path}")
  except Exception as e:
      print(f"Failed to save model and tokenizer: {e}")

  return model, tokenizer, trainer


In [None]:
model, tokenizer, trainer = create_baseline_model(train_val_df)

Initializing model and tokenizer...


INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.callbacks.model_summary:
  | Name  | Type                       | Params | Mode
------------------------------------------------------------
0 | model | T5ForConditionalGeneration | 60.5 M | eval
------------------------------------------------------------
60.5 M    Trainable params
0         Non-trainable params
60.5 M    Total params
242.026   Total estimated model params size (MB)
0         Modules in train mode
277       Modules in eval mode


Preparing data...
Starting training...


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=2` reached.


Saving model and tokenizer...
Model and tokenizer successfully saved to /content/drive/MyDrive/266 Final Project/Our Models/Lyrics + Genius


# Generate Song Summary

In [None]:
def generate_song_summary(model, tokenizer, data, song_index, max_length=150):
    """Generate a summary for a single song"""

    #training input format
    input_text = f"summarize lyrics and capture meaning: {data.iloc[song_index]['Lyrics']}"


    # Encode the text
    inputs = tokenizer.encode(
        input_text,
        max_length=5000,
        truncation=True,
        padding="max_length",
        return_tensors="pt"
    )

    # Generate summary
    # The 'generate' method should be called directly on the 'model' object
    summary_ids = model.generate( # Removed 'model.' before generate
        inputs,
        max_length=300,
        min_length=100,
        num_beams=5,
        #temperature=0.9,
        length_penalty=0.5,
        early_stopping=True,
        no_repeat_ngram_size=2
    )

    # Decode summary
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

In [None]:
from transformers import T5ForConditionalGeneration, T5Tokenizer

model_path = '/content/drive/My Drive/266 Final Project/Our Models/Lyrics + Genius'
tokenizer = T5Tokenizer.from_pretrained(model_path)
model = T5ForConditionalGeneration.from_pretrained(model_path)

print("Model and tokenizer loaded successfully!")

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


Model and tokenizer loaded successfully!


In [None]:
# Usage example:
"""
generate_song_summary(model, tokenizer, df, song_index=0)
"""

summary = generate_song_summary(model, tokenizer, df, song_index=1)

print(df.iloc[1]['Title'])
print_summary(summary)


KeyboardInterrupt: 

In [None]:
print(df.columns.tolist())

['Song ID', 'Title', 'Lyrics URL', 'Combined Annotations', 'Wikipedia Annotation', 'Lyrics']


In [None]:
print_summary(df.iloc[1]['Combined Annotations'])


This song unfolds as a poignant reflection on a troubled relationship.
The song’s title immediately conjures images of unfulfilled dreams and
broken commitments. As the track progresses, it becomes evident that
Drake, is addressing a woman named Hailey.  The song’s opening lines,
“Hailey, it’s sad that I know all the tea,” set a tone of
disappointment and disillusionment. Drake references “broken pinky
promises” and recounts how a trip to the Bahamas was marred by
Hailey’s actions. There’s a palpable sense of betrayal and a
realization that the relationship isn’t working.  Throughout the song,
the theme of disappointment and broken trust resurfaces. Drake
expresses weariness with Hailey’s apologies, indicating that he’s
reached a breaking point. The wordplay involving “No” in monogamy
suggests that the relationship has been marked by infidelity or a lack
of commitment.  As the song unfolds, the emotional weight becomes more
pronounced. Drake laments that Hailey lives in his mind, rent-

#Evaluation

The main differences from the self-supervised approached used in Lyrics Only model are:

* Evaluation against reference annotations instead of between multiple generations
* Simplified ROUGE score calculation (comparing to reference instead of between generations)
* Removed consistency scoring between multiple generations
* Added reference annotations to the examples output

If I run into memory issues:

* Reduce batch_size
* Add more aggressive memory cleanup

In [None]:
# call saved model
from transformers import T5ForConditionalGeneration, T5Tokenizer

model_path = '/content/drive/My Drive/266 Final Project/Our Models/Lyrics + Genius'
tokenizer = T5Tokenizer.from_pretrained(model_path)
model = T5ForConditionalGeneration.from_pretrained(model_path)

print("Model and tokenizer loaded successfully!")

Model and tokenizer loaded successfully!


In [None]:
# Check that test_df was correctly initialized
print(test_df.shape)
print(test_df.columns)
print(test_df.head())

(638, 7)
Index(['Song ID', 'Title', 'Lyrics URL', 'Combined Annotations',
       'Wikipedia Annotation', 'Lyrics', 'generated_annotation'],
      dtype='object')
      Song ID                       Title  \
1029   328891              Someone for Me   
1001   141615  Saving All My Love for You   
785   8827806      What It Is (Block Boy)   
411   3037193             I’m Good (Blue)   
1105  1342410             The Way You Are   

                                             Lyrics URL  \
1029  https://genius.com/Whitney-houston-someone-for...   
1001  https://genius.com/Whitney-houston-saving-all-...   
785   https://genius.com/Doechii-what-it-is-block-bo...   
411   https://genius.com/David-guetta-and-bebe-rexha...   
1105  https://genius.com/Tears-for-fears-the-way-you...   

                                   Combined Annotations  \
1029  “Someone for Me” is the third track from Whitn...   
1001  “Saving All My Love for You” is a song written...   
785                                

In [None]:
# Check for NaN values
print("NaN values in test_df:")
print(test_df.isna().sum())

# Check data types
print("\nData types:")
print(test_df.dtypes)

# Clean the data
test_df['Lyrics'] = test_df['Lyrics'].fillna('')
test_df['Combined Annotations'] = test_df['Combined Annotations'].fillna('')

# Convert to string type
test_df['Lyrics'] = test_df['Lyrics'].astype(str)
test_df['Combined Annotations'] = test_df['Combined Annotations'].astype(str)

# Verify no empty strings that might cause issues
print("\nNumber of empty lyrics:", len(test_df[test_df['Lyrics'] == '']))
print("Number of empty annotations:", len(test_df[test_df['Combined Annotations'] == '']))

NaN values in test_df:
Song ID                   3
Title                     9
Lyrics URL               12
Combined Annotations      9
Wikipedia Annotation      9
Lyrics                    9
generated_annotation    635
dtype: int64

Data types:
Song ID                 object
Title                   object
Lyrics URL              object
Combined Annotations    object
Wikipedia Annotation    object
Lyrics                  object
generated_annotation    object
dtype: object

Number of empty lyrics: 9
Number of empty annotations: 9


In [None]:
# Install required package
!pip install bert-score
!pip install rouge-score

import torch
import numpy as np
import pandas as pd
from typing import List, Dict, Tuple
from transformers import T5Tokenizer
from rouge_score import rouge_scorer
from bert_score import score
from tqdm import tqdm

from bert_score import score
import torch
from sklearn.model_selection import train_test_split




In [None]:
def evaluate_supervised_model(
    model: T5ForConditionalGeneration,
    tokenizer: T5Tokenizer,
    test_data: pd.DataFrame,
    batch_size: int = 16
) -> Tuple[Dict[str, float], List[Dict]]:
    """
    Evaluate supervised lyrics model comparing against Genius annotations
    """
    model.eval()
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)

    evaluation_results = {
        'content_coverage': [],
        'consistency_score': [],
        'semantic_similarity': [],
        'rouge1_scores': [],
        'rouge2_scores': [],
        'rougeL_scores': [],
        'bert_scores': []
    }

    examples = []
    previous_bert_score = 0.0

    for idx in tqdm(range(0, len(test_data), batch_size)):
        batch_lyrics = test_data['Lyrics'].iloc[idx:idx + batch_size].tolist()
        batch_annotations = test_data['Combined Annotations'].iloc[idx:idx + batch_size].tolist()

        # Generate summaries
        inputs = tokenizer(
            [f"summarize lyrics and capture meaning: {lyric}" for lyric in batch_lyrics],
            padding=True,
            truncation=True,
            max_length=512,
            return_tensors="pt"
        ).to(device)

        with torch.no_grad():
            outputs = model.generate(
                input_ids=inputs['input_ids'],
                attention_mask=inputs['attention_mask'],
                max_length=150,
                min_length=50,
                num_beams=4,
                do_sample=True,
                temperature=0.7,
                top_k=50,
                no_repeat_ngram_size=3,
                length_penalty=1.0,
                repetition_penalty=1.2
            )

            generated_summaries = tokenizer.batch_decode(outputs, skip_special_tokens=True)

        # Evaluate each summary against its reference annotation
        for i in range(len(generated_summaries)):
            original_lyric = batch_lyrics[i]
            generated_summary = generated_summaries[i]
            reference_annotation = batch_annotations[i]

            # Content Coverage (between summary and lyrics)
            coverage_score = calculate_content_coverage(original_lyric, generated_summary)
            evaluation_results['content_coverage'].append(coverage_score)

            # Semantic Similarity (between summary and lyrics)
            semantic_score = calculate_semantic_similarity(original_lyric, generated_summary)
            evaluation_results['semantic_similarity'].append(semantic_score)

            # ROUGE Scores (between generated summary and reference annotation)
            rouge_scores = calculate_rouge_scores([generated_summary, reference_annotation])
            evaluation_results['rouge1_scores'].append(rouge_scores['rouge1'])
            evaluation_results['rouge2_scores'].append(rouge_scores['rouge2'])
            evaluation_results['rougeL_scores'].append(rouge_scores['rougeL'])

            # BERTScore (between generated summary and reference annotation)
            if i % 8 == 0:  # Compute less frequently to save time
                P, R, F1 = score([generated_summary], [reference_annotation], lang='en', verbose=False)
                previous_bert_score = F1.mean().item()
            evaluation_results['bert_scores'].append(previous_bert_score)

            # Store examples
            if len(examples) < 5:
                examples.append({
                    'lyrics': original_lyric,
                    'reference_annotation': reference_annotation,
                    'generated_summary': generated_summary,
                    'metrics': {
                        'content_coverage': coverage_score,
                        'semantic_similarity': semantic_score,
                        'rouge1': rouge_scores['rouge1'],
                        'rouge2': rouge_scores['rouge2'],
                        'rougeL': rouge_scores['rougeL'],
                        'bert_score': previous_bert_score
                    }
                })

        # Memory cleanup
        if idx % 5 == 0:
            torch.cuda.empty_cache()

    # Aggregate results
    metrics = {
        'avg_content_coverage': np.mean(evaluation_results['content_coverage']),
        'avg_semantic_similarity': np.mean(evaluation_results['semantic_similarity']),
        'avg_rouge1': np.mean(evaluation_results['rouge1_scores']),
        'avg_rouge2': np.mean(evaluation_results['rouge2_scores']),
        'avg_rougeL': np.mean(evaluation_results['rougeL_scores']),
        'avg_bert_score': np.mean(evaluation_results['bert_scores'])
    }

    return metrics, examples

def calculate_rouge_scores(texts: List[str]) -> Dict[str, float]:
    """Calculate ROUGE scores between texts"""
    rouge_scorer_obj = rouge_scorer.RougeScorer(
        ['rouge1', 'rouge2', 'rougeL'],
        use_stemmer=True
    )

    # For supervised evaluation, compare generated summary to reference
    score = rouge_scorer_obj.score(texts[0], texts[1])

    return {
        'rouge1': score['rouge1'].fmeasure,
        'rouge2': score['rouge2'].fmeasure,
        'rougeL': score['rougeL'].fmeasure
    }

def calculate_content_coverage(lyrics: str, summary: str) -> float:
    """Calculate content coverage between lyrics and summary"""
    # Handle NaN or float values
    if isinstance(lyrics, float) or isinstance(summary, float):
        return 0.0

    try:
        lyrics_tokens = set(str(lyrics).lower().split())
        summary_tokens = set(str(summary).lower().split())
        overlap = len(lyrics_tokens.intersection(summary_tokens))
        coverage = overlap / len(lyrics_tokens) if lyrics_tokens else 0.0
        return coverage
    except Exception as e:
        print(f"Error processing lyrics/summary: {e}")
        return 0.0

def calculate_semantic_similarity(lyrics: str, summary: str) -> float:
    """Calculate semantic similarity using token overlap"""
    # Handle NaN or float values
    if isinstance(lyrics, float) or isinstance(summary, float):
        return 0.0

    try:
        lyrics_tokens = set(str(lyrics).lower().split())
        summary_tokens = set(str(summary).lower().split())
        intersection = len(lyrics_tokens.intersection(summary_tokens))
        union = len(lyrics_tokens.union(summary_tokens))
        return intersection / union if union > 0 else 0.0
    except Exception as e:
        print(f"Error processing lyrics/summary: {e}")
        return 0.0

def print_evaluation_results(metrics: Dict[str, float], examples: List[Dict]):
    """Print evaluation results and examples"""
    print("\nEvaluation Results:")
    print(f"Average Content Coverage: {metrics['avg_content_coverage']:.3f}")
    print(f"Average Semantic Similarity: {metrics['avg_semantic_similarity']:.3f}")
    print(f"Average ROUGE-1: {metrics['avg_rouge1']:.3f}")
    print(f"Average ROUGE-2: {metrics['avg_rouge2']:.3f}")
    print(f"Average ROUGE-L: {metrics['avg_rougeL']:.3f}")
    print(f"Average BERTScore: {metrics['avg_bert_score']:.3f}")

    print("\nExample Generations:")
    for i, example in enumerate(examples, 1):
        print(f"\nExample {i}:")
        print(f"Original Lyrics (truncated): {example['lyrics'][:200]}...")
        print(f"\nReference Annotation: {example['reference_annotation']}")
        print(f"\nGenerated Summary: {example['generated_summary']}")
        print("\nMetrics:")
        for metric, value in example['metrics'].items():
            print(f"{metric}: {value:.3f}")

# Usage example:
def run_evaluation(model_path: str, test_df: pd.DataFrame):
    """Run complete evaluation pipeline"""
    tokenizer = T5Tokenizer.from_pretrained(model_path)
    model = T5ForConditionalGeneration.from_pretrained(model_path)
    print("Model and tokenizer loaded successfully!")

    metrics, examples = evaluate_supervised_model(
        model,
        tokenizer,
        test_data=test_df,
        batch_size=16
    )

    print_evaluation_results(metrics, examples)
    return metrics, examples

In [None]:
# Run the evaluation
metrics, examples = run_evaluation(model_path, test_df)

Model and tokenizer loaded successfully!


  0%|          | 0/40 [00:00<?, ?it/s]Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  2%|▎         | 1/40 [01:56<1:15:27, 116.09s/it]Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were no


Evaluation Results:
Average Content Coverage: 0.121
Average Semantic Similarity: 0.095
Average ROUGE-1: 0.213
Average ROUGE-2: 0.043
Average ROUGE-L: 0.135
Average BERTScore: 0.810

Example Generations:

Example 1:
Original Lyrics (truncated): (Someone for me) 
(Someone for me) 
I'm here alone on a Friday night 
Waiting here beside the phone 
The TV, radio, and me 
Really ain't been getting along 

I wish that I could find a way 

To party ...

Reference Annotation: “Someone for Me” is the third track from Whitney Houston’s debut self-titled studio album,  , written by Raymond Jones & Freddie Washington. It appeared as the B-side to Houston’s 1985 single, “Thinking About You”.

Generated Summary: Meaning and themes: “Curricus” is a song about a woman who has been dating for a while. The song is based on a relationship with a man who is not just a singer, but a person who can be a part of a dream. It’s the first time this song has been re-released.

Metrics:
content_coverage: 0.102
sem


