# Install and Import Libraries

In [1]:
!pip install --upgrade transformers --quiet
!pip install evaluate --quiet
!pip install --upgrade sentencepiece --quiet

import pandas as pd
import numpy as np

import evaluate

from transformers import T5ForConditionalGeneration, T5Tokenizer

# Instantiate T5 Models & Tokenizers

In [2]:
baseline_checkpoint = "t5-large"
finetuned_checkpoint = "/Users/mikelu/extssd/git/ucb_mids_W266_project/t5-large-lyric-generation/"

max_length = 512

baseline_model = T5ForConditionalGeneration.from_pretrained(
    baseline_checkpoint
)
baseline_tokenizer = T5Tokenizer.from_pretrained(
    baseline_checkpoint,
    model_max_length=max_length
)

finetuned_model = T5ForConditionalGeneration.from_pretrained(
    finetuned_checkpoint
)
finetuned_tokenizer = T5Tokenizer.from_pretrained(
    finetuned_checkpoint,
    model_max_length=max_length
)

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thouroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


# Load Test Data

Each record represents a song, with lyrics stored as multi-line text in the "lyrics" column.

In [3]:
df = pd.read_csv("test_no_markers.csv")
df.drop(columns=["Unnamed: 0"], inplace=True)
df

Unnamed: 0,id,title,tag,artist,year,lyrics
0,5465961,Be Where We Are,country,Julia Cole,2021,Cash only bar with a juke box\nPut a dollar in...
1,1781039,Only Daddy That’ll Walk the Line,country,Waylon Jennings,1968,Everybody knows you've been stepping on my toe...
2,672903,I Cant Be Your Sweetheart,country,The Carter Family,1998,Last night I told my heart's love\nAll under t...
3,934006,Fire Line Road,country,James McMurtry,2008,"My name is Alice Walker, they never told me wh..."
4,1962549,Run Around,country,Austin Lucas,2011,You want answers; I don't have any\nJust more ...
...,...,...,...,...,...,...
19995,2391145,Life,rap,Criss Lyric,2015,My niggas all about cash\nMy niggas tryna rack...
19996,7456521,Dumptruck,rap,Full Tac,2021,"Dump, dump, dump, dump, dump\nDump it\nBack it..."
19997,2412522,Real Nigga Files,rap,Kodak Black,2015,Real nigga files (Hell yeah)\nProject files (Y...
19998,7799558,Science Project,rap,Fly Anakin,2018,My bitch a choosy lover never fuck without a r...


In [4]:
df[["tag", "id"]].groupby("tag").count()

Unnamed: 0_level_0,id
tag,Unnamed: 1_level_1
country,5000
rap,5000
rb,5000
rock,5000


20,000 records is too big of a dataset to run the model against (too time consuming). So we'll take a stratified sample of 250 songs from each of the 4 genres.

In [5]:
df = df.groupby("tag", as_index=False, group_keys=False).apply(lambda x: x.sample(250, random_state=1868))
df = df.reset_index(drop=True)
df

Unnamed: 0,id,title,tag,artist,year,lyrics
0,6728186,Live and Forget,country,FRIEDRIICH,2020,Pick one of my random memories\nI can precisel...
1,5057485,October,country,Kody West,2019,"It's October\nThe leaves are falling down, fal..."
2,7128594,Something Goods Gonna Happen,country,The Wolfe Brothers,2021,"Well, you don't like my smoking\nYou don't lik..."
3,194843,Fine Line,country,Little Big Town,2007,Completely complacent\nSo excitedly vacant\nI ...
4,7764202,Best Of Me,country,Josh Ramsay,2022,Feels like home doesn’t look right\nThe truth ...
...,...,...,...,...,...,...
995,5636396,State Of Mind,rock,Silence the Crow,2020,Take your hands off me\nI'm beyond your touch\...
996,7333221,Rising Seas,rock,Midnight Oil,2021,Every child put down your toys\nAnd come insid...
997,1024796,Lights Camera Action,rock,Nonpoint,2012,Light up the room with a little more light\nI ...
998,6785137,Whats Going On,rock,Metal Orizon,2001,I don’t what the world is coming to\nEverythin...


In [6]:
df[["tag", "id"]].groupby("tag").count()

Unnamed: 0_level_0,id
tag,Unnamed: 1_level_1
country,250
rap,250
rb,250
rock,250


# Generate Lyrics

For each song in the test dataset:

1. Construct a prompt consisting of a task prefix and up to 8 lines of lyrics
2. Tokenize the prompt
3. Have the model generate tokens representing the next line of song lyrics
4. Repeat steps 1-3 for the finetuned model.
5. Store results in a dictionary (to be converted to a DataFrame later)

The reference for each model prediction is the next line from the song's lyrics.



In [7]:
output_dict = [None] * len(df)

max_input_lyrics = 8        # number of lines of the song's lyrics to pass to the model

for i, row in df.iterrows():

    print(f"{i} of {len(df)}")
    
    lyric_lines = row.lyrics.split("\n")

    # if the song has fewer than max_input_lyrics,
    # then use all but the last line of the lyrics
    # in the input prompt
    num_lyric_lines = max_input_lyrics
    if len(lyric_lines) < max_input_lyrics + 1:
         num_lyric_lines = len(lyric_lines) - 1

    # calculate the maximum number of words contained
    # in each line of the song's lyrics -- we'll pass
    # this to the model as the maximum number of new
    # tokens to generate
    num_words = [len(x.split(" ")) for x in lyric_lines]
    max_words = int(np.max([20, np.max(num_words)]))

    snippet = "\n".join(lyric_lines[0:num_lyric_lines])
    reference = lyric_lines[num_lyric_lines]

    # the baseline T5 model has a task prefix of "summarize:"
    # for text generation
    baseline_prompt = "summarize: " + snippet

    # tokenize the input
    baseline_input_tokens = baseline_tokenizer(
        baseline_prompt,
        max_length=max_length,
        truncation=True,
        return_tensors="pt"
    )

    # generate the output tokens
    baseline_output_tokens = baseline_model.generate(
        baseline_input_tokens["input_ids"],
        max_new_tokens=max_words,
        num_beams=2,
        repetition_penalty=1.2
    )

    # convert the output to a text string
    baseline_output = "".join(
        baseline_tokenizer.batch_decode(
            baseline_output_tokens,
            skip_special_tokens=True,
            clean_up_tokenization_spaces=False
        )
    )

    # repeat the steps above for the finetuned model,
    # which uses a different task prefix
    finetuned_prompt = "write next line for: " + snippet

    finetuned_input_tokens = finetuned_tokenizer(
        finetuned_prompt,
        max_length=max_length,
        truncation=True,
        return_tensors="pt"
    )

    finetuned_output_tokens = finetuned_model.generate(
        finetuned_input_tokens["input_ids"],
        max_new_tokens=max_words,
        num_beams=2,
        repetition_penalty=1.2
    )

    finetuned_output = "".join(
        finetuned_tokenizer.batch_decode(
            finetuned_output_tokens,
            skip_special_tokens=True,
            clean_up_tokenization_spaces=False
        )
    )

    # store the results to the output dictionary array
    output_dict[i] = {
        "song_id" : row.id,
        "input" : snippet,
        "reference" : reference,
        "baseline_output" : baseline_output,
        "finetuned_output" : finetuned_output
    }

0 of 1000
1 of 1000
2 of 1000
3 of 1000
4 of 1000
5 of 1000
6 of 1000
7 of 1000
8 of 1000
9 of 1000
10 of 1000
11 of 1000
12 of 1000
13 of 1000
14 of 1000
15 of 1000
16 of 1000
17 of 1000
18 of 1000
19 of 1000
20 of 1000
21 of 1000
22 of 1000
23 of 1000
24 of 1000
25 of 1000
26 of 1000
27 of 1000
28 of 1000
29 of 1000
30 of 1000
31 of 1000
32 of 1000
33 of 1000
34 of 1000
35 of 1000
36 of 1000
37 of 1000
38 of 1000
39 of 1000
40 of 1000
41 of 1000
42 of 1000
43 of 1000
44 of 1000
45 of 1000
46 of 1000
47 of 1000
48 of 1000
49 of 1000
50 of 1000
51 of 1000
52 of 1000
53 of 1000
54 of 1000
55 of 1000
56 of 1000
57 of 1000
58 of 1000
59 of 1000
60 of 1000
61 of 1000
62 of 1000
63 of 1000
64 of 1000
65 of 1000
66 of 1000
67 of 1000
68 of 1000
69 of 1000
70 of 1000
71 of 1000
72 of 1000
73 of 1000
74 of 1000
75 of 1000
76 of 1000
77 of 1000
78 of 1000
79 of 1000
80 of 1000
81 of 1000
82 of 1000
83 of 1000
84 of 1000
85 of 1000
86 of 1000
87 of 1000
88 of 1000
89 of 1000
90 of 1000
91 of 100

692 of 1000
693 of 1000
694 of 1000
695 of 1000
696 of 1000
697 of 1000
698 of 1000
699 of 1000
700 of 1000
701 of 1000
702 of 1000
703 of 1000
704 of 1000
705 of 1000
706 of 1000
707 of 1000
708 of 1000
709 of 1000
710 of 1000
711 of 1000
712 of 1000
713 of 1000
714 of 1000
715 of 1000
716 of 1000
717 of 1000
718 of 1000
719 of 1000
720 of 1000
721 of 1000
722 of 1000
723 of 1000
724 of 1000
725 of 1000
726 of 1000
727 of 1000
728 of 1000
729 of 1000
730 of 1000
731 of 1000
732 of 1000
733 of 1000
734 of 1000
735 of 1000
736 of 1000
737 of 1000
738 of 1000
739 of 1000
740 of 1000
741 of 1000
742 of 1000
743 of 1000
744 of 1000
745 of 1000
746 of 1000
747 of 1000
748 of 1000
749 of 1000
750 of 1000
751 of 1000
752 of 1000
753 of 1000
754 of 1000
755 of 1000
756 of 1000
757 of 1000
758 of 1000
759 of 1000
760 of 1000
761 of 1000
762 of 1000
763 of 1000
764 of 1000
765 of 1000
766 of 1000
767 of 1000
768 of 1000
769 of 1000
770 of 1000
771 of 1000
772 of 1000
773 of 1000
774 of 1000
775 

# Convert Output Dictionary to DataFrame

In [8]:
results_df = pd.DataFrame(output_dict)
results_df

Unnamed: 0,song_id,input,reference,baseline_output,finetuned_output
0,6728186,Pick one of my random memories\nI can precisel...,I need to live and forget,pick one of my random memories i can precisely...,It's hard sometimes not to be able to forget
1,5057485,"It's October\nThe leaves are falling down, fal...",Listen to me,"it's October The leaves are falling down, fall...","Is this just a dream, is this just a dream?"
2,7128594,"Well, you don't like my smoking\nYou don't lik...","Try a tiny bit of crazy, I'll try to toe the line","i like a lot, you like a little Let's take a s...",Let's take a shot and we can meet in the middle
3,194843,Completely complacent\nSo excitedly vacant\nI ...,Baby its a fine line,completely complacent so excitedly vacant i ke...,You call this comfortably normal but i call it...
4,7764202,Feels like home doesn’t look right\nThe truth ...,"Forgive me love, forgive me love",ain’t it funny how things you tell yourself Re...,"Can you forgive me love, forgive me love"
...,...,...,...,...,...
995,5636396,Take your hands off me\nI'm beyond your touch\...,"Love is so good, so good",take your hands off me i'm beyond your touch a...,I'm dancing with the demon
996,7333221,Every child put down your toys\nAnd come insid...,Climate denying,every child put down your toys and come inside...,Temperature rising
997,1024796,Light up the room with a little more light\nI ...,"Can’t decide, Gaga or Bieber?",pot commited is a new album from the british s...,Yellow or pink?
998,6785137,I don’t what the world is coming to\nEverythin...,There is never gonna be a bright side,i don’t what the world is coming to everything...,I don’t like the way things turn around here


In [9]:
results_df.to_csv("t5_results.csv")

# Calculate rouge and bleu Scores

In [14]:
rouge = evaluate.load("rouge")
bleu = evaluate.load("bleu")

baseline_rouge_scores = rouge.compute(
    predictions=results_df["baseline_output"],
    references=results_df["reference"]
)

baseline_bleu_scores = bleu.compute(
    predictions=results_df["baseline_output"],
    references=results_df["reference"]
)

finetuned_rouge_scores = rouge.compute(
    predictions=results_df["finetuned_output"],
    references=results_df["reference"]
)

finetuned_bleu_scores = bleu.compute(
    predictions=results_df["finetuned_output"],
    references=results_df["reference"]
)

score_dict = [None] * 2
score_dict[0] = {
    "model" : "baseline",
    "bleu" : baseline_bleu_scores["bleu"],
    "rouge1" : baseline_rouge_scores["rouge1"],
    "rouge2" : baseline_rouge_scores["rouge2"],
    "rougeL" : baseline_rouge_scores["rougeL"]
}
score_dict[1] = {
    "model" : "finetuned",
    "bleu" : finetuned_bleu_scores["bleu"],
    "rouge1" : finetuned_rouge_scores["rouge1"],
    "rouge2" : finetuned_rouge_scores["rouge2"],
    "rougeL" : finetuned_rouge_scores["rougeL"]
}

score_df = pd.DataFrame(score_dict)
score_df

Unnamed: 0,model,bleu,rouge1,rouge2,rougeL
0,baseline,0.020612,0.117941,0.036777,0.107551
1,finetuned,0.066825,0.173543,0.08324,0.166543


In [11]:
score_df.to_csv("t5_scores.csv")