In [None]:
!pip install transformers


In [None]:
import pandas as pd
import numpy as np

from transformers import GPT2Tokenizer, GPT2LMHeadModel, AdamW, get_linear_schedule_with_warmup
from torch.utils.data import DataLoader
from tqdm import tqdm,trange
import torch.nn.functional as F

In [None]:
lyrics = pd.read_csv('/content/lyrics-data.csv.zip') ## read_csv reads the zipped file.
artists = pd.read_csv('/content/artists-data.csv')


In [None]:
print(list(artists.columns))
print(list(lyrics.columns))

['Artist', 'Songs', 'Popularity', 'Link', 'Genre', 'Genres']
['ALink', 'SName', 'SLink', 'Lyric', 'Idiom']


In [None]:
artist_to_predict= "Ariana Grande"
link_to_predict = artists[artists['Artist']==artist_to_predict]['Link']

In [None]:
artist_lyrics = lyrics[lyrics['ALink'] == link_to_predict.values[0]]
artist_lyrics

Unnamed: 0,ALink,SName,SLink,Lyric,Idiom
102373,/ariana-grande/,7 Rings,/ariana-grande/7-rings.html,"Yeah, breakfast at Tiffany's. And bottles of b...",ENGLISH
102374,/ariana-grande/,"Break Up With Your Girlfriend, I'm Bored",/ariana-grande/break-up-with-your-girlfriend-i...,You got me some type of way (Hmm). Ain't used ...,ENGLISH
102375,/ariana-grande/,"Thank U, Next",/ariana-grande/thank-u-next.html,Thought I'd end up with Sean. But he wasn't a ...,ENGLISH
102376,/ariana-grande/,Needy,/ariana-grande/needy.html,If you take too long to hit me back. I can't p...,ENGLISH
102377,/ariana-grande/,7 Rings (Feat. 2 Chainz) (Remix),/ariana-grande/7-rings-feat-2-chainz-remix.html,"Yeah, breakfast at Tiffany's. And bottles of b...",ENGLISH
...,...,...,...,...,...
102548,/ariana-grande/,Wizard and I (Wicked Tribute),/ariana-grande/wizard-and-i-wicked-tribute.html,"Oh, Miss Elphaba. Many years I have waited. Fo...",ENGLISH
102549,/ariana-grande/,You Don't Know Me,/ariana-grande/you-dont-know-me.html,Center of attention once again. They don't und...,ENGLISH
102550,/ariana-grande/,You'll Never Know,/ariana-grande/youll-never-know.html,Now you're falling tell me how you feel. This ...,ENGLISH
102551,/ariana-grande/,You're My Only Shawty,/ariana-grande/youre-my-only-shawty.html,"Ariana! iyaz!. Last night was crazy, and today...",ENGLISH


In [None]:
artist_lyrics_sized = artist_lyrics[artist_lyrics['Lyric'].apply(lambda x: len(x.split(' ')) < 370)] ### TODO: CHECK why is this needed?

In [None]:
artist_lyrics_sized.shape

(97, 5)

In [None]:
#Create a very small test set to compare generated text with the reality
test_set = artist_lyrics_sized.sample(n = round(artist_lyrics_sized.shape[0]* 0.1)) ## making the test set as 10% of the training set
artist_lyrics_sized = artist_lyrics_sized.loc[~artist_lyrics_sized.index.isin(test_set.index)]


In [None]:
#Reset the indexes
test_set = test_set.reset_index()
artist_lyrics_sized = artist_lyrics_sized.reset_index()


In [None]:
#For the test set only, keep last 20 words in a new column, then remove them from original column
test_set['True_end_lyrics'] = test_set['Lyric'].str.split().str[-20:].apply(' '.join)
test_set['Lyric'] = test_set['Lyric'].str.split().str[:-20].apply(' '.join)

In [None]:
test_set

Unnamed: 0,index,ALink,SName,SLink,Lyric,Idiom,True_end_lyrics
0,102456,/ariana-grande/,Higher,/ariana-grande/higher.html,Been round n' round in circles. Tryn'a find th...,ENGLISH,"I got that fire, you got me open. You dig it d..."
1,102459,/ariana-grande/,I Don't Care,/ariana-grande/i-dont-care.html,Used to cry 'bout some crazy shit before. I us...,ENGLISH,I don't care about it anymore. Yeah. I-I-I. I-...
2,102413,/ariana-grande/,Be Alright,/ariana-grande/be-alright.html,Midnight shadows. Where find love is a battle....,ENGLISH,decide it. We're gonna be alright. We're gonna...
3,102402,/ariana-grande/,Abc,/ariana-grande/abc.html,A buh-buh buh-buh buh-buh. You went to school ...,ENGLISH,"simple as do re mi. Abc, 123, baby you and me,..."
4,102404,/ariana-grande/,All My Love,/ariana-grande/all-my-love.html,"Sometimes, I think we're the brightest stars. ...",ENGLISH,"love up on the mountain top. All my love, all ..."
5,102401,/ariana-grande/,​Goodnight n Go,/ariana-grande/goodnight-n-go.html,Tell me why you gotta look at me that way. You...,ENGLISH,"goodnight and go. Want to say goodnight. Baby,..."
6,102540,/ariana-grande/,True Love,/ariana-grande/true-love.html,On the first day of christmas. When you gave m...,ENGLISH,love!. My true love!. You are my true love. Ah...
7,102465,/ariana-grande/,In Your Hands,/ariana-grande/in-your-hands.html,In your hands. I'll turn into something better...,ENGLISH,"you. Body talk, 'cause you understand. With yo..."
8,102415,/ariana-grande/,Beauty and The Beast (Feat. John Legend),/ariana-grande/beauty-and-the-beast-feat-john-...,Tale as old as time. True as it can be. Barely...,ENGLISH,old as time). Song as old as rhyme. Beauty and...
9,102389,/ariana-grande/,Dangerous Woman,/ariana-grande/dangerous-woman.html,Don't need permission. Made my decision to tes...,ENGLISH,"boy. Yeah, there's somethin' 'bout you boy. Ye..."


In [None]:
import torch

In [None]:
class SongLyrics():  
    def __init__(self, control_code, truncate=False, gpt2_type="gpt2", max_length=1024):

        self.tokenizer = GPT2Tokenizer.from_pretrained(gpt2_type)
        self.lyrics = []

        # count = 0
        for row in artist_lyrics_sized['Lyric']:
          # if count == 0:
          #   self.tokenizer.from_pretrained.
          #   print(self.tokenizer.encode(f"<|{control_code}|>{row[:max_length]}<|endoftext|>"))
            # print(self.tokenizer.encode(f"{row[:max_length]}<|endoftext|>"))
            # count +=1

          # TODO: why are we appending the whole song set to encode the row ? with or without it the encoding for the row does not change
          self.lyrics.append(torch.tensor(
                self.tokenizer.encode(f"<|{control_code}|>{row[:max_length]}<|endoftext|>")
            ))
        if truncate:
            self.lyrics = self.lyrics[:20000]
            # print(self.lyrics)
        self.lyrics_count = len(self.lyrics)
        
    def __len__(self):
        return self.lyrics_count

    def __getitem__(self, item):
        return self.lyrics[item]
    
dataset = SongLyrics(artist_lyrics_sized['Lyric'], truncate=True, gpt2_type="gpt2")      

Downloading:   0%|          | 0.00/0.99M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.29M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/665 [00:00<?, ?B/s]

In [None]:
len(dataset.lyrics) # tokenised lyrics for the 87 training rows.

87

In [None]:
a = [l.shape for l in dataset.lyrics]
a

In [None]:
#https://towardsdatascience.com/what-is-gradient-accumulation-in-deep-learning-ec034122cfa#:~:text=Gradient%20accumulation%20means%20running%20a,to%20compute%20the%20variable%20updates.

In [None]:
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')

Downloading:   0%|          | 0.00/523M [00:00<?, ?B/s]

In [None]:
def pack_tensor(new_tensor, packed_tensor, max_seq_len):
    if packed_tensor is None:
        return new_tensor, True, None
    if new_tensor.size()[1] + packed_tensor.size()[1] > max_seq_len:
        return packed_tensor, False, new_tensor
    else:
        packed_tensor = torch.cat([new_tensor, packed_tensor[:, 1:]], dim=1)
        return packed_tensor, True, None

In [None]:
# train_dataloader = DataLoader(dataset, batch_size=1, shuffle=True)
# input_tensor = None
# for idx, entry in enumerate(train_dataloader):
#   print(entry.shape)
#   (input_tensor, carry_on, remainder) = pack_tensor(entry, input_tensor, 768)
#   if carry_on and idx != len(train_dataloader) - 1:
#     print("first:", idx)
#   else:
#     print("second:", idx)  

In [None]:
def train(
    dataset, model, tokenizer,
    batch_size=10, epochs=5, lr=2e-5,
    #max_seq_len=400,
    warmup_steps=200,
    gpt2_type="gpt2", output_dir="/content/", output_prefix="bhsingha",
    #test_mode=False,
    save_model_on_epoch=False,
):
    #acc_steps = 100
    device=torch.device("cuda")
    model = model.cuda() ##TODO: cuda ??
    model.train() ##TODO: why this?

    optimizer = AdamW(model.parameters(), lr=lr)
    scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=warmup_steps, num_training_steps=-1
    )##TODO: why num_training_steps=-1?

    ##TODO:batch_size = 1 as one row as enough data? 
    train_dataloader = DataLoader(dataset, batch_size=1, shuffle=True)
    loss=0
    accumulating_batch_count = 0
    input_tensor = None

    for epoch in range(epochs):
        print(f"Training epoch {epoch}")
        print("loss", loss)
        for idx, entry in tqdm(enumerate(train_dataloader)):
            # (input_tensor, carry_on, remainder) = pack_tensor(entry, input_tensor, 768)

            # if carry_on and idx != len(train_dataloader) - 1:
            #     continue
            input_tensor = entry
            input_tensor = input_tensor.to(device) #TODO: study this
            outputs = model(input_tensor, labels=input_tensor)
            loss = outputs[0]
            loss.backward()

            if (accumulating_batch_count % batch_size) == 0:
                optimizer.step()
                scheduler.step()
                optimizer.zero_grad()
                model.zero_grad() # zero_grad for all ??

            accumulating_batch_count += 1
            # input_tensor = None
        if save_model_on_epoch:
            torch.save(
                model.state_dict(),
                os.path.join(output_dir, f"{output_prefix}-{epoch}.pt"),
            )
    return model

In [None]:
model = train(dataset, model, tokenizer)

Training epoch 0
loss 0


87it [00:29,  2.91it/s]


Training epoch 1
loss tensor(3.1217, device='cuda:0', grad_fn=<NllLossBackward0>)


87it [00:29,  2.93it/s]


Training epoch 2
loss tensor(2.8444, device='cuda:0', grad_fn=<NllLossBackward0>)


87it [00:29,  2.92it/s]


Training epoch 3
loss tensor(2.8639, device='cuda:0', grad_fn=<NllLossBackward0>)


87it [00:29,  2.96it/s]


Training epoch 4
loss tensor(2.5540, device='cuda:0', grad_fn=<NllLossBackward0>)


87it [00:29,  2.95it/s]


In [None]:
##todo
## zero_grad vs no_grad
## cuda and assigning to it
##temperature

In [None]:
generated = torch.tensor(tokenizer.encode("Bhumika is singher")).unsqueeze(0)
# print(tokenizer.encode("Bhumika is singher")) >> [33, 17047, 9232, 318, 1702, 372]
# print(tokenizer.encode("asdf asd"))  >> [292, 7568, 355, 67]
# TODO: how is the tokenizer working?

device=torch.device("cuda")
generated = generated.to(device)
model = model.cuda()
outputs = model(generated, labels=generated) #TODO: what is labels?
print(generated.shape)
print(len(outputs))

loss, logits = outputs[:2]
print(logits.shape)
print(logits) # TODO: what vocabulary being used?

c = logits[:, -1, :]
print(c.shape)
print(c)

sorted_logits, sorted_indices = torch.sort(c, descending=True)
print(sorted_logits)
print("&&&&", sorted_indices)
print(F.softmax(sorted_logits, dim=-1))

cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
print(cumulative_probs)
sorted_indices_to_remove = cumulative_probs > 0.9

print(sorted_indices_to_remove)
print(len(sorted_indices_to_remove[0]))

torch.Size([1, 6])
3
torch.Size([1, 6, 50257])
tensor([[[ -51.6173,  -50.5283,  -51.1241,  ...,  -59.1834,  -56.0182,
           -51.0668],
         [ -56.0894,  -56.0840,  -63.1137,  ...,  -65.5587,  -63.2758,
           -58.3757],
         [ -64.1122,  -63.8112,  -70.2693,  ...,  -75.5044,  -72.4760,
           -67.8741],
         [ -94.8158,  -96.0311,  -99.4971,  ..., -104.0391, -101.4579,
           -97.7280],
         [ -86.3068,  -87.3774,  -90.7714,  ...,  -98.4792,  -95.1202,
           -89.2485],
         [ -68.5085,  -71.4218,  -76.8920,  ...,  -81.3192,  -80.0261,
           -72.3964]]], device='cuda:0', grad_fn=<UnsafeViewBackward0>)
torch.Size([1, 50257])
tensor([[-68.5085, -71.4218, -76.8920,  ..., -81.3192, -80.0261, -72.3964]],
       device='cuda:0', grad_fn=<SliceBackward0>)
tensor([[-64.9159, -66.3125, -66.7860,  ..., -89.8602, -90.3806, -92.2849]],
       device='cuda:0', grad_fn=<SortBackward0>)
&&&& tensor([[  621,    11,    13,  ..., 47490, 13945, 39374]], devic

In [None]:
print(sorted_indices_to_remove.shape)

torch.Size([1, 50257])


In [None]:
sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
sorted_indices_to_remove[..., 0] = 0
print(len(sorted_indices_to_remove[0]))

50257


In [None]:
weights = torch.tensor([0, 10, 21, 10], dtype=torch.float)
print(torch.multinomial(weights, 1)) ### multinomial gives a tensor of 3 elements * rows of weights tensor where weights is the probability of each number
## change the weight value and see

tensor([3])


In [None]:
def generate(
    model,
    tokenizer,
    prompt,
    entry_count=10,
    entry_length=30, #maximum number of words
    top_p=0.8,
    temperature=1.,
):
    model.eval() # TODO : what is this
    generated_num = 0
    generated_list = []

    filter_value = -float("Inf")

    with torch.no_grad():
        for entry_idx in trange(entry_count):
            entry_finished = False
            generated = torch.tensor(tokenizer.encode(prompt)).unsqueeze(0)

            for i in range(entry_length):
                outputs = model(generated, labels=generated)
                loss, logits = outputs[:2]

                #batch_size, sequence_length, config.vocab_size)
                #TODO: why take only the last sequence?
                logits = logits[:, -1, :] / (temperature if temperature > 0 else 1.0)

                sorted_logits, sorted_indices = torch.sort(logits, descending=True)
                cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1) # why cumulative and not direct threshhold check?

                ### REF : https://gist.github.com/thomwolf/1a5a29f6962089e871b94cbd09daf317
                sorted_indices_to_remove = cumulative_probs > top_p
                # Shift the indices to the right to keep also the first token above the threshold
                sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
                sorted_indices_to_remove[..., 0] = 0

                indices_to_remove = sorted_indices[sorted_indices_to_remove]
                logits[:, indices_to_remove] = filter_value

                next_token = torch.multinomial(F.softmax(logits, dim=-1), num_samples=1)
                generated = torch.cat((generated, next_token), dim=1)

                if next_token in tokenizer.encode("<|endoftext|>"):
                    entry_finished = True

                if entry_finished:
                    generated_num = generated_num + 1
                    output_list = list(generated.squeeze().numpy())
                    output_text = tokenizer.decode(output_list)
                    generated_list.append(output_text)
                    break
            
            if not entry_finished:
              output_list = list(generated.squeeze().numpy())
              output_text = f"{tokenizer.decode(output_list)}<|endoftext|>" 
              generated_list.append(output_text)
                
    return generated_list


#Function to generate multiple sentences. Test data should be a dataframe
def text_generation(test_data):
  generated_lyrics = []
  for i in range(len(test_data)):
    x = generate(model.to('cpu'), tokenizer, test_data['Lyric'][i], entry_count=1)
    generated_lyrics.append(x)
  return generated_lyrics

#Run the functions to generate the lyrics
generated_lyrics = text_generation(test_set)

100%|██████████| 1/1 [01:20<00:00, 80.94s/it]
100%|██████████| 1/1 [00:59<00:00, 59.26s/it]
100%|██████████| 1/1 [00:52<00:00, 52.46s/it]
100%|██████████| 1/1 [00:38<00:00, 38.48s/it]
100%|██████████| 1/1 [00:30<00:00, 30.17s/it]
100%|██████████| 1/1 [00:15<00:00, 15.89s/it]
100%|██████████| 1/1 [00:58<00:00, 58.00s/it]
100%|██████████| 1/1 [01:04<00:00, 64.83s/it]
100%|██████████| 1/1 [00:53<00:00, 53.74s/it]
100%|██████████| 1/1 [00:27<00:00, 27.34s/it]


In [None]:
test_set

Unnamed: 0,index,ALink,SName,SLink,Lyric,Idiom,True_end_lyrics
0,102442,/ariana-grande/,Fake Smile,/ariana-grande/fake-smile.html,After laughter comes tears. After laughter com...,ENGLISH,"through, I can't lie. Ooh, ah (Ooh, ah). Fuck ..."
1,102552,/ariana-grande/,Zero to Hero,/ariana-grande/zero-to-hero.html,Hercules. Bless my soul. Herc was on a roll. P...,ENGLISH,hit the heights at breakneck speed. From zero ...
2,102401,/ariana-grande/,​Goodnight n Go,/ariana-grande/goodnight-n-go.html,Tell me why you gotta look at me that way. You...,ENGLISH,"goodnight and go. Want to say goodnight. Baby,..."
3,102458,/ariana-grande/,I Believe In You & Me,/ariana-grande/i-believe-in-you-me.html,I believe in you and me. I believe that we wil...,ENGLISH,"me. See I was lost, now I'm free. 'Cause I bel..."
4,102415,/ariana-grande/,Beauty and The Beast (Feat. John Legend),/ariana-grande/beauty-and-the-beast-feat-john-...,Tale as old as time. True as it can be. Barely...,ENGLISH,old as time). Song as old as rhyme. Beauty and...
5,102543,/ariana-grande/,Where The Boys Are,/ariana-grande/where-the-boys-are.html,"Where the boys are, someone waits for me. A sm...",ENGLISH,"me I'll wait impatiently. Where the boys are, ..."
6,102540,/ariana-grande/,True Love,/ariana-grande/true-love.html,On the first day of christmas. When you gave m...,ENGLISH,love!. My true love!. You are my true love. Ah...
7,102445,/ariana-grande/,Ghostin,/ariana-grande/ghostin.html,I know you hear me when I cry. I try to hold i...,ENGLISH,"of baggage. But I love you, we'll get past thi..."
8,102531,/ariana-grande/,The Heart Of The Matter,/ariana-grande/the-heart-of-the-matter.html,"I got the call today, I didn't wanna hear. But...",ENGLISH,"it's about forgiveness. Forgiveness. Even if, ..."
9,102395,/ariana-grande/,Beauty and The Beast (Feat. John Legend),/ariana-grande/beauty-and-the-beast-feat-john-...,Tale as old as time. True as it can be. Barely...,ENGLISH,old as time). Song as old as rhyme. Beauty and...


In [None]:
generated_lyrics

[["After laughter comes tears. After laughter comes tears. Another night, another party, sayin' hi to everybody. I'm sorry, I start to leave, I gotta leave now. Got somewhere I gotta be now, I'm starving. Can somebody walk me to my car?. If I go alone, I'm not gon' make it very far. I'm happy for the love and all of the above. If I'm being honest, I done been through way too much. I can't fake another smile. I can't fake like I'm alright. Ooh, ah (Ooh, ah). And I won't say I'm feeling fine. After what I been through, I can't lie. Ooh, ah (Ooh, ah). Fuck a fake smile, smile. Fuck a fake smile, fake smile. I read the things they write about me. Hear what they're sayin' on the Tv, it's crazy. It's gettin' hard for them to shock me. But every now and then, it's shocking, don't blame me. I know it's the life that I chose. But baby, I'm grateful, I want you to know. I'm happy for the love and all of the above. If I'm being honest, I done been through way too much. I can't fake another smile.

In [None]:
my_generations=[]

for i in range(len(generated_lyrics)):
  a = test_set['Lyric'][i].split()[-30:]
  b = ' '.join(a)
  c = ' '.join(generated_lyrics[i]) 
  my_generations.append(c.split(b)[-1])

test_set['Generated_lyrics'] = my_generations


#Finish the sentences when there is a point, remove after that
final=[]

for i in range(len(test_set)):
  to_remove = test_set['Generated_lyrics'][i].split('.')[-1]
  final.append(test_set['Generated_lyrics'][i].replace(to_remove,''))

test_set['Generated_lyrics'] = final

In [None]:
test_set

Unnamed: 0,index,ALink,SName,SLink,Lyric,Idiom,True_end_lyrics,Generated_lyrics
0,102442,/ariana-grande/,Fake Smile,/ariana-grande/fake-smile.html,After laughter comes tears. After laughter com...,ENGLISH,"through, I can't lie. Ooh, ah (Ooh, ah). Fuck ...","through, I can't lie. Ooh, ah (Ooh, ah). Fuck..."
1,102552,/ariana-grande/,Zero to Hero,/ariana-grande/zero-to-hero.html,Hercules. Bless my soul. Herc was on a roll. P...,ENGLISH,hit the heights at breakneck speed. From zero ...,'s a star. This guy showed up at the same time...
2,102401,/ariana-grande/,​Goodnight n Go,/ariana-grande/goodnight-n-go.html,Tell me why you gotta look at me that way. You...,ENGLISH,"goodnight and go. Want to say goodnight. Baby,...",goodnight and go? We'll have drinks and talk ...
3,102458,/ariana-grande/,I Believe In You & Me,/ariana-grande/i-believe-in-you-me.html,I believe in you and me. I believe that we wil...,ENGLISH,"me. See I was lost, now I'm free. 'Cause I bel...",me. I will never leave your side. I will neve...
4,102415,/ariana-grande/,Beauty and The Beast (Feat. John Legend),/ariana-grande/beauty-and-the-beast-feat-john-...,Tale as old as time. True as it can be. Barely...,ENGLISH,old as time). Song as old as rhyme. Beauty and...,old as time). Natural color. Little parts of ...
5,102543,/ariana-grande/,Where The Boys Are,/ariana-grande/where-the-boys-are.html,"Where the boys are, someone waits for me. A sm...",ENGLISH,"me I'll wait impatiently. Where the boys are, ...",me tenderly.\n\nTHE COLLEGE ACTIVITY: The Cri...
6,102540,/ariana-grande/,True Love,/ariana-grande/true-love.html,On the first day of christmas. When you gave m...,ENGLISH,love!. My true love!. You are my true love. Ah...,love. Ah ah ah ah ah..... (You are) My true l...
7,102445,/ariana-grande/,Ghostin,/ariana-grande/ghostin.html,I know you hear me when I cry. I try to hold i...,ENGLISH,"of baggage. But I love you, we'll get past thi...","of baggage. But I love you, we'll get past th..."
8,102531,/ariana-grande/,The Heart Of The Matter,/ariana-grande/the-heart-of-the-matter.html,"I got the call today, I didn't wanna hear. But...",ENGLISH,"it's about forgiveness. Forgiveness. Even if, ...","it's about forgiveness. Forgiveness. Even if,..."
9,102395,/ariana-grande/,Beauty and The Beast (Feat. John Legend),/ariana-grande/beauty-and-the-beast-feat-john-...,Tale as old as time. True as it can be. Barely...,ENGLISH,old as time). Song as old as rhyme. Beauty and...,old as time). Tale as old as love. Music as o...


In [None]:
import statistics
from nltk.translate.bleu_score import sentence_bleu

scores=[]

for i in range(len(test_set)):
  reference = test_set['True_end_lyrics'][i]
  candidate = test_set['Generated_lyrics'][i]
  scores.append(sentence_bleu(reference, candidate))

statistics.mean(scores)

Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


0.6801564227063518