To run the final model, just go to the "Final Model" section and run the cells.

In [None]:
!pip install transformers



In [None]:
import pandas as pd
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import numpy as np
import random
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import GPT2Tokenizer, GPT2LMHeadModel, AdamW, get_linear_schedule_with_warmup
from tqdm import tqdm, trange
import torch.nn.functional as F
import csv

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# GPT2 with Fine Tuning

### Prepare data

In [None]:
lyrics_pre = pd.read_csv('/content/drive/MyDrive/cis530_data/final_project/lyrics-data.csv')
lyrics = lyrics_pre[lyrics_pre['language']=='en']

In [None]:
lyrics.head()

Unnamed: 0,ALink,SName,SLink,Lyric,language
69,/ivete-sangalo/,Careless Whisper,/ivete-sangalo/careless-whisper.html,I feel so unsure\nAs I take your hand and lead...,en
86,/ivete-sangalo/,Could You Be Loved / Citação Musical do Rap: S...,/ivete-sangalo/could-you-be-loved-citacao-musi...,"Don't let them fool, ya\nOr even try to school...",en
88,/ivete-sangalo/,Cruisin' (Part. Saulo),/ivete-sangalo/cruisin-part-saulo.html,"Baby, let's cruise, away from here\nDon't be c...",en
111,/ivete-sangalo/,Easy,/ivete-sangalo/easy.html,"Know it sounds funny\nBut, I just can't stand ...",en
140,/ivete-sangalo/,For Your Babies (The Voice cover),/ivete-sangalo/for-your-babies-the-voice-cover...,You've got that look again\nThe one I hoped I ...,en


In [None]:
artists_pre = pd.read_csv('/content/drive/MyDrive/cis530_data/final_project/artists-data.csv')

In [None]:
artists_pre.head()

Unnamed: 0,Artist,Genres,Songs,Popularity,Link
0,Ivete Sangalo,Pop; Axé; Romântico,313.0,4.4,/ivete-sangalo/
1,Chiclete com Banana,Axé,268.0,3.8,/chiclete-com-banana/
2,Banda Eva,Axé; Romântico; Reggae,215.0,2.3,/banda-eva/
3,É O Tchan,Axé,129.0,1.6,/e-o-tchan/
4,Claudia Leitte,Pop; Axé; Romântico,167.0,1.5,/claudia-leitte/


In [None]:
len(lyrics)

191814

In [None]:
len(artists_pre)

4168

Extract Rock, Pop, Rap, and Gospel genre songs

In [None]:
artists_pre = pd.read_csv('/content/drive/MyDrive/cis530_data/final_project/artists-data.csv')
artists_rock = artists_pre[(artists_pre['Genres'].str.contains('Rock', na=False, regex=False)) & (artists_pre['Popularity'] > 5)
&(~artists_pre['Genres'].str.contains('Pop|Rap|Gospel',na=False))]
artists_pop = artists_pre[(artists_pre['Genres'].str.contains('Pop', na=False, regex=False)) & (artists_pre['Popularity'] > 5)
&(~artists_pre['Genres'].str.contains('Rap|Gospel',na=False))]
artists_rap = artists_pre[(artists_pre['Genres'].str.contains('Rap', na=False, regex=False))
&(~artists_pre['Genres'].str.contains('Gospel',na=False))]
artists_gospel = artists_pre[(artists_pre['Genres'].str.contains('Gospel', na=False, regex=False))]
print(len(artists_rock))
print(len(artists_pop))
print(len(artists_rap))
print(len(artists_gospel))

56
93
288
557


In [None]:
df1 = lyrics.merge(artists_pop[['Artist', 'Genres', 'Link','Popularity']], left_on='ALink', right_on='Link', how='inner')
df2 = lyrics.merge(artists_rap[['Artist', 'Genres', 'Link','Popularity']], left_on='ALink', right_on='Link', how='inner')
df3 = lyrics.merge(artists_rock[['Artist', 'Genres', 'Link','Popularity']], left_on='ALink', right_on='Link', how='inner')
df4 = lyrics.merge(artists_gospel[['Artist', 'Genres', 'Link','Popularity']], left_on='ALink', right_on='Link', how='inner')

In [None]:
df1 = df1.drop(columns=['ALink','SLink','language','Link'])
df2 = df2.drop(columns=['ALink','SLink','language','Link'])
df3 = df3.drop(columns=['ALink','SLink','language','Link'])
df4 = df4.drop(columns=['ALink','SLink','language','Link'])

In [None]:
# df = df[df['Lyric'].apply(lambda x: len(x.split(' ')) < 350)][:12500]
df1 = df1[df1['Lyric'].apply(lambda x: len(x.split(' ')) < 350)]
df2 = df2[df2['Lyric'].apply(lambda x: len(x.split(' ')) < 350)]
df3 = df3[df3['Lyric'].apply(lambda x: len(x.split(' ')) < 350)]
df4 = df4[df4['Lyric'].apply(lambda x: len(x.split(' ')) < 350)]

In [None]:
print(len(df1),len(df2),len(df3),len(df4))

13487 4549 10307 7020


In [None]:
df1 = df1.sort_values(by='Popularity', ascending=False)[:4500]
df2 = df2.sort_values(by='Popularity', ascending=False)[:4500]
df3 = df3.sort_values(by='Popularity', ascending=False)[:4500]
df4 = df4.sort_values(by='Popularity', ascending=False)[:4500]

In [None]:
df1['Lyric'] = df1['Lyric'].apply(lambda x: '<POP>: ' + x)
df2['Lyric'] = df2['Lyric'].apply(lambda x: '<RAP>: ' + x)
df3['Lyric'] = df3['Lyric'].apply(lambda x: '<ROCK>: ' + x)
df4['Lyric'] = df4['Lyric'].apply(lambda x: '<GOSPEL>: ' + x)

Combine the genre splits together

In [None]:
dfs = [df1, df2, df3, df4]
df = pd.concat(dfs,ignore_index=True)

In [None]:
#test set
test_set = df.sample(n = 500)
df = df.loc[~df.index.isin(test_set.index)]

test_set = test_set.reset_index()
df = df.reset_index()

In [None]:
test_set['True_end_lyrics'] = test_set['Lyric'].str.split().str[-10:].apply(' '.join)
test_set['Lyric'] = test_set['Lyric'].str.split().str[:-10].apply(' '.join)

In [None]:
test_set.head()

Unnamed: 0,index,SName,Lyric,Artist,Genres,Popularity,True_end_lyrics
0,5854,I See a Victory (Feat. Kim Burrell),<RAP>: They'll call it a mystery But we're gon...,Pharrell Williams,Pop; Hip Hop; Rap,9.9,history It's saying victory is with me | | |
1,2539,Butch,<POP>: this song goes out to jack's mother you...,Phil Collins,Soft Rock; Romântico; Pop/Rock,51.9,tempt me [bang] oops i shot you too sorry jack
2,340,Domino,<POP>: I'm feeling sexy and free like glitter'...,Anitta,Pop; Dance; Funk Carioca,119.6,in the moonlight take me down like I'm a domino
3,14106,Draw Me Close To You,<GOSPEL>: Draw me close to You Never let me go...,Hillsong United,Gospel/Religioso; Pop/Rock; Rock,25.8,You're all I want Help me know You are near
4,5183,Selfish (Feat. Josh X),<RAP>: I gave you more than I give myself So l...,Cardi B,Hip Hop; Rap,13.9,"first, I'm picking me I finally learned to be ..."


Error: Runtime no longer has a reference to this dataframe, please re-run this cell and try again.


### Prepare the dataset

In [None]:
class SongLyrics(Dataset):

    def __init__(self, control_code, truncate=False, gpt2_type="gpt2", max_length=1024):

        self.tokenizer = GPT2Tokenizer.from_pretrained(gpt2_type)
        self.lyrics = []

        for row in df['Lyric']:
            self.lyrics.append(torch.tensor(
                self.tokenizer.encode(f"<|endoftext|>{row[:max_length]}<|endoftext|>")
            ))

        if truncate:
            self.lyrics = self.lyrics[:20000]
        self.lyrics_count = len(self.lyrics)

    def __len__(self):
        return self.lyrics_count

    def __getitem__(self, item):
        return self.lyrics[item]

In [None]:
dataset = SongLyrics(df['Lyric'], truncate=True, gpt2_type="gpt2")

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

### Prepare training

In [None]:
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
#model = GPT2LMHeadModel.from_pretrained('gpt2')
#model = torch.load('/content/drive/MyDrive/Colab Notebooks/model_20epochs_edward1.pt')

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

In [None]:
#Accumulated batch size (since GPT2 is so big)
def pack_tensor(new_tensor, packed_tensor, max_seq_len):
    if packed_tensor is None:
        return new_tensor, True, None
    if new_tensor.size()[1] + packed_tensor.size()[1] > max_seq_len:
        return packed_tensor, False, new_tensor
    else:
        packed_tensor = torch.cat([new_tensor, packed_tensor[:, 1:]], dim=1)
        return packed_tensor, True, None

In [None]:
def train(
    dataset, model, tokenizer,
    batch_size=16, epochs=20, lr=2e-5,
    max_seq_len=400, warmup_steps=200,
    gpt2_type="gpt2", output_dir=".", output_prefix="wreckgar",
    test_mode=False,save_model_on_epoch=False,
):

    acc_steps = 100
    device=torch.device("cuda")
    model = model.cuda()
    model.train()

    optimizer = AdamW(model.parameters(), lr=lr)
    scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=warmup_steps, num_training_steps=-1
    )

    train_dataloader = DataLoader(dataset, batch_size=1, shuffle=True)
    loss=0
    accumulating_batch_count = 0
    input_tensor = None

    for epoch in range(epochs):

        print(f"Training epoch {epoch}")
        print(loss)
        for idx, entry in tqdm(enumerate(train_dataloader)):
            (input_tensor, carry_on, remainder) = pack_tensor(entry, input_tensor, 768)

            if carry_on and idx != len(train_dataloader) - 1:
                continue

            input_tensor = input_tensor.to(device)
            outputs = model(input_tensor, labels=input_tensor)
            loss = outputs[0]
            loss.backward()

            if (accumulating_batch_count % batch_size) == 0:
                optimizer.step()
                scheduler.step()
                optimizer.zero_grad()
                model.zero_grad()

            accumulating_batch_count += 1
            input_tensor = None
        if save_model_on_epoch:
            torch.save(
                model.state_dict(),
                os.path.join(output_dir, f"{output_prefix}-{epoch}.pt"),
            )
    return model

### Actual Training

In [None]:
#Train the model on the specific data we have
model = train(dataset, model, tokenizer)



Training epoch 0
0


17500it [05:54, 49.39it/s]


Training epoch 1
tensor(4.0740, device='cuda:0', grad_fn=<NllLossBackward0>)


17500it [05:42, 51.05it/s]


Training epoch 2
tensor(3.7099, device='cuda:0', grad_fn=<NllLossBackward0>)


17500it [05:40, 51.33it/s]


Training epoch 3
tensor(3.0655, device='cuda:0', grad_fn=<NllLossBackward0>)


17500it [05:41, 51.20it/s]


Training epoch 4
tensor(3.4668, device='cuda:0', grad_fn=<NllLossBackward0>)


17500it [05:40, 51.45it/s]


Training epoch 5
tensor(2.8955, device='cuda:0', grad_fn=<NllLossBackward0>)


17500it [05:40, 51.35it/s]


Training epoch 6
tensor(2.2746, device='cuda:0', grad_fn=<NllLossBackward0>)


17500it [05:40, 51.43it/s]


Training epoch 7
tensor(3.0028, device='cuda:0', grad_fn=<NllLossBackward0>)


17500it [05:40, 51.36it/s]


Training epoch 8
tensor(2.8158, device='cuda:0', grad_fn=<NllLossBackward0>)


17500it [05:40, 51.34it/s]


Training epoch 9
tensor(2.5854, device='cuda:0', grad_fn=<NllLossBackward0>)


17500it [05:40, 51.40it/s]


Training epoch 10
tensor(2.7025, device='cuda:0', grad_fn=<NllLossBackward0>)


17500it [05:41, 51.29it/s]


Training epoch 11
tensor(1.9531, device='cuda:0', grad_fn=<NllLossBackward0>)


17500it [05:42, 51.14it/s]


Training epoch 12
tensor(2.5603, device='cuda:0', grad_fn=<NllLossBackward0>)


17500it [05:40, 51.39it/s]


Training epoch 13
tensor(3.3667, device='cuda:0', grad_fn=<NllLossBackward0>)


17500it [05:41, 51.30it/s]


Training epoch 14
tensor(2.5859, device='cuda:0', grad_fn=<NllLossBackward0>)


17500it [05:41, 51.28it/s]


Training epoch 15
tensor(2.8844, device='cuda:0', grad_fn=<NllLossBackward0>)


17500it [05:42, 51.05it/s]


Training epoch 16
tensor(5.6363, device='cuda:0', grad_fn=<NllLossBackward0>)


17500it [05:42, 51.13it/s]


Training epoch 17
tensor(2.7897, device='cuda:0', grad_fn=<NllLossBackward0>)


17500it [05:43, 50.92it/s]


Training epoch 18
tensor(2.5216, device='cuda:0', grad_fn=<NllLossBackward0>)


17500it [05:40, 51.40it/s]


Training epoch 19
tensor(2.7222, device='cuda:0', grad_fn=<NllLossBackward0>)


17500it [05:41, 51.20it/s]


In [None]:
torch.save(model, '/content/drive/MyDrive/Colab Notebooks/model_4genres.pt')

In [None]:
test_set.to_csv('/content/drive/MyDrive/Colab Notebooks/test_set_4genres.csv')

### Text generation

In [None]:
#Load the model to use it
model = torch.load('/content/drive/MyDrive/Colab Notebooks/model_4genres.pt')

In [None]:
def generate(
    model,
    tokenizer,
    prompt,
    entry_count=10,
    entry_length=30, #maximum number of words
    top_p=0.8,
    temperature=1.,
):

    model.eval()

    generated_list = []

    filter_value = -float("Inf")

    with torch.no_grad():

        for entry_idx in trange(entry_count):

            entry_finished = False

            generated = torch.tensor(tokenizer.encode(prompt)).unsqueeze(0)
            if generated.size(1)>1024:
              generated = generated[:,-1024:]

            if generated.size(1)<=0:
              generated = torch.tensor(tokenizer.encode("<|startoftext|>")).unsqueeze(0)

            generated = generated.to('cuda')

            for i in range(entry_length):
                outputs = model(generated, labels=generated)
                loss, logits = outputs[:2]
                logits = logits[:, -1, :] / (temperature if temperature > 0 else 1.0)

                sorted_logits, sorted_indices = torch.sort(logits, descending=True)
                cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)

                sorted_indices_to_remove = cumulative_probs > top_p
                sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[
                    ..., :-1
                ].clone()
                sorted_indices_to_remove[..., 0] = 0

                indices_to_remove = sorted_indices[sorted_indices_to_remove]
                logits[:, indices_to_remove] = filter_value

                next_token = torch.multinomial(F.softmax(logits, dim=-1), num_samples=1)
                generated = torch.cat((generated, next_token), dim=1)

                if next_token in tokenizer.encode("<|endoftext|>"):
                    entry_finished = True

                if entry_finished:

                    output_list = list(generated.squeeze())
                    output_text = tokenizer.decode(output_list)
                    generated_list.append(output_text)
                    break

            if not entry_finished:
              output_list = list(generated.squeeze())
              output_text = f"{tokenizer.decode(output_list)}<|endoftext|>"
              generated_list.append(output_text)

    return generated_list

In [None]:
test_set = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/test_set_4genres.csv')

In [None]:
#Function to generate multiple sentences. Test data should be a dataframe
def text_generation(test_data):
  generated_lyrics = []
  for i in range(len(test_data)):
    x = generate(model, tokenizer, test_data['Lyric'][i], entry_count=1)
    generated_lyrics.append(x)
  return generated_lyrics

In [None]:
generated_lyrics = text_generation(test_set)

100%|██████████| 1/1 [00:00<00:00,  1.57it/s]
100%|██████████| 1/1 [00:00<00:00,  1.56it/s]
100%|██████████| 1/1 [00:00<00:00,  1.88it/s]
100%|██████████| 1/1 [00:00<00:00,  1.88it/s]
100%|██████████| 1/1 [00:00<00:00,  1.68it/s]
100%|██████████| 1/1 [00:00<00:00,  3.38it/s]
100%|██████████| 1/1 [00:00<00:00,  1.48it/s]
100%|██████████| 1/1 [00:00<00:00,  1.53it/s]
100%|██████████| 1/1 [00:00<00:00,  1.62it/s]
100%|██████████| 1/1 [00:00<00:00,  1.67it/s]
100%|██████████| 1/1 [00:00<00:00,  2.06it/s]
100%|██████████| 1/1 [00:00<00:00,  1.94it/s]
100%|██████████| 1/1 [00:00<00:00, 28.28it/s]
100%|██████████| 1/1 [00:00<00:00,  1.97it/s]
100%|██████████| 1/1 [00:00<00:00,  2.08it/s]
100%|██████████| 1/1 [00:00<00:00,  1.85it/s]
100%|██████████| 1/1 [00:00<00:00,  2.76it/s]
100%|██████████| 1/1 [00:00<00:00,  1.83it/s]
100%|██████████| 1/1 [00:00<00:00,  2.02it/s]
100%|██████████| 1/1 [00:00<00:00,  2.00it/s]
100%|██████████| 1/1 [00:00<00:00,  1.82it/s]
100%|██████████| 1/1 [00:00<00:00,

In [None]:
#Loop to keep only generated text and add it as a new column in the dataframe
my_generations=[]
raw_output = []# todelete later

for i in range(len(generated_lyrics)):
  raw_output.append(generated_lyrics[i])
  a = test_set['Lyric'][i].split()[-30:] #Get the matching string we want (30 words)
  b = ' '.join(a)
  c = ' '.join(generated_lyrics[i]) #Get all that comes after the matching string
  if b== '':
    print(i)
    my_generations.append(test_set['True_end_lyrics'][i])
    continue
  d = c.split(b)[-1]
  d = d.split('<|endoftext|>')[0]
  my_generations.append(d)

test_set['Generated_lyrics'] = my_generations
test_set['raw_output'] = raw_output # todelete later

In [None]:
test_set.to_csv('/content/drive/MyDrive/Colab Notebooks/test_set_4genre_withraw.csv')

Recreating the generated_lyrics array from raw_output column:

In [None]:
test1 = []
for i in range(len(test_set)):
  test1.append(test_set['raw_output'][i])

print(test1)
print(generated_lyrics)



In [None]:
test_set.head()

Unnamed: 0,index,SName,Lyric,Artist,Genres,True_end_lyrics,Generated_lyrics,raw_output
0,4172,Dance Like This,Bamboooooooo Tembaleuoluwaaluweee Anubaleee le...,Shakira,Pop; Pop/Rock; Dance,"chants, sounds like spirits from africa Bamboo...",cheers It is so sudden Let's go La mirada de ...,[Bamboooooooo Tembaleuoluwaaluweee Anubaleee l...
1,13980,Heartache All Over The World,Music by Elton John Lyrics by Bernie Taupin Av...,Elton John,Soft Rock; Romântico; Pop/Rock,do on a weekend honey When your heart's on fire,do on a weekend honey When your heart's on fi...,[Music by Elton John Lyrics by Bernie Taupin A...
2,17997,Secret Friend,Feel like you've never fit before Here we are ...,Paul McCartney,Rock; Pop/Rock,friend I need ya I need ya I need ya,"friend Feel like you've never fell before, on...",[Feel like you've never fit before Here we are...
3,2738,Southampton Dock,They disembarked in '45 And no-one spoke and n...,Pink Floyd,Progressivo; Rock; Psicodelia,the bottom of our hearts We felt the final cut.,their place stood what looks like a grim ston...,[They disembarked in '45 And no-one spoke and ...
4,14499,I Don't Give a Damn,"Everytime you go away, It actually kinda makes...",Avril Lavigne,Rock Alternativo; Rock; Pop/Punk,"a damn What you say about that, (what you say)","a damn, What you say about that, You know I d...","[Everytime you go away, It actually kinda make..."


# Testing different genre
looks like its working!

In [None]:
input = "<Rap>: A man looking in the mirror."
while(len(input.split())<100):
  input = input.split("<|endoftext|>")[0]
  input = generate(model, tokenizer, input, entry_count=1)[0]
print(input)

100%|██████████| 1/1 [00:00<00:00,  1.33it/s]
100%|██████████| 1/1 [00:01<00:00,  1.05s/it]
100%|██████████| 1/1 [00:00<00:00,  1.06it/s]
100%|██████████| 1/1 [00:00<00:00,  1.00it/s]

<Rap>: A man looking in the mirror.
When I say that the man looks in the mirror
I'm speaking of the man who says it's my turn.
Yeah, and I was wondering what I was doing
When the door opened and I saw my shoe...
Btw, I had no idea how I was feeling
Then I thought
What a shame, you had to admit
So why am I letting it slip in?
Why am I not going through with my plan?
And this time the guy who is sitting in the mirror

Was that a mistake?
Yes, I remember what happened
<|endoftext|>





In [None]:
input = "<Gospel>: A man looking in the mirror."
while(len(input.split())<100):
  input = input.split("<|endoftext|>")[0]
  input = generate(model, tokenizer, input, entry_count=1)[0]
print(input)


100%|██████████| 1/1 [00:01<00:00,  1.26s/it]
100%|██████████| 1/1 [00:01<00:00,  1.23s/it]
100%|██████████| 1/1 [00:01<00:00,  1.28s/it]
100%|██████████| 1/1 [00:00<00:00,  1.02it/s]
100%|██████████| 1/1 [00:00<00:00,  1.34it/s]

<Gospel>: A man looking in the mirror. You're my savior
I won't hide from you, man. I'll tell you everything. I'll keep your life.

I'll just see the world
through your eyes
For your part, I'll tell you everything

Life in this world, man.

This world is the path of my life
in your arms, the blind man, the left hand of the holy prophet.
My life in this world, man.

I'll see the world
through your eyes
For your part, I'll tell you everything

Life in this world, man.

I'll see the world
through your eyes
For your part, I'll tell you everything

Life in this world<|endoftext|>





### Analyze performance

In [None]:
test_set

Unnamed: 0,index,SName,Lyric,Artist,Genres,True_end_lyrics,Generated_lyrics
0,4172,Dance Like This,Bamboooooooo Tembaleuoluwaaluweee Anubaleee le...,Shakira,Pop; Pop/Rock; Dance,"chants, sounds like spirits from africa Bamboo...",cheers It is so sudden Let's go La mirada de ...
1,13980,Heartache All Over The World,Music by Elton John Lyrics by Bernie Taupin Av...,Elton John,Soft Rock; Romântico; Pop/Rock,do on a weekend honey When your heart's on fire,do on a weekend honey When your heart's on fi...
2,17997,Secret Friend,Feel like you've never fit before Here we are ...,Paul McCartney,Rock; Pop/Rock,friend I need ya I need ya I need ya,"friend Feel like you've never fell before, on..."
3,2738,Southampton Dock,They disembarked in '45 And no-one spoke and n...,Pink Floyd,Progressivo; Rock; Psicodelia,the bottom of our hearts We felt the final cut.,their place stood what looks like a grim ston...
4,14499,I Don't Give a Damn,"Everytime you go away, It actually kinda makes...",Avril Lavigne,Rock Alternativo; Rock; Pop/Punk,"a damn What you say about that, (what you say)","a damn, What you say about that, You know I d..."
...,...,...,...,...,...,...,...
495,4033,Just Another Day,"Yeah man, play that New York drum, Homer Show ...",Lady Gaga,Dance; Pop; Pop/Rock,"oo, ooh oo And after all It's just another day",oo [Chorus] We both know I could learn a thin...
496,17516,Find The Right Man,You found the right man in the right place Onc...,Alanis Morissette,Pop/Rock; Rock,"For his love, for his love (repeat 2X until end)","For his love, for his love He feels the same ..."
497,17656,Let Me Roll It,"You gave me something, I understand, You gave ...",Paul McCartney,Rock; Pop/Rock,Let me roll it to you Let me roll it,Let me roll it to you Let me roll it to you l...
498,20332,Atomica,"(trecho) I'm just a rockstar, stabbin' away I ...",David Bowie,Rock,get atomica Let's rock 'till we explode Let's ...,get atomica Let's rock 'till we explode Let's...


truncate reference and candidate into the same length

In [None]:
#Using BLEU score to compare the real sentences with the generated ones
import statistics
from nltk.translate.bleu_score import sentence_bleu

scores=[]
for i in range(len(test_set)):
  reference = [test_set['True_end_lyrics'][i].split()]
  candidate = test_set['Generated_lyrics'][i].split()[:len(reference[0])]
  if len(reference[0])>len(candidate):
    reference[0] = reference[0][:len(candidate)]

  score = sentence_bleu(reference, candidate)
  scores.append(sentence_bleu(reference, candidate))

statistics.mean(scores)

The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


0.25164240205218275

without making them the same length

In [None]:
scores=[]
for i in range(len(test_set)):
  reference = [test_set['True_end_lyrics'][i].split()]
  candidate = test_set['Generated_lyrics'][i].split()

  score = sentence_bleu(reference, candidate)
  scores.append(sentence_bleu(reference, candidate))

statistics.mean(scores)

0.12092438438877175

# GPT2 without any fine Tuning

In [None]:
import transformers
import torch

In [None]:
tokenizer = transformers.GPT2Tokenizer.from_pretrained('gpt2')
model = transformers.GPT2LMHeadModel.from_pretrained('gpt2')
model = model.to('cuda')

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [None]:
## Making a function that will generate text for us ##
def gen_text(prompt_text, tokenizer, model, n_seqs=1, max_length=374):
  # n_seqs is the number of sequences to generate
  # max_length is the maximum length of the sequence
  encoded_prompt = tokenizer.encode(prompt_text, add_special_tokens=False, return_tensors="pt")
  if encoded_prompt.size(1) == 0:
    encoded_prompt = torch.tensor(tokenizer.encode("<|startoftext|>")).unsqueeze(0)
  if encoded_prompt.size(1)>1024:
    encoded_prompt = encoded_prompt[:,-1024:]

  encoded_prompt = encoded_prompt.to('cuda')
  # We are encoding the text using the gpt tokenizer. The return tensors are of type "pt"
  # since we are using PyTorch, not tensorflow


  output_sequences = model.generate(
      input_ids=encoded_prompt,
      max_length=max_length+len(encoded_prompt), # The model has to generate something,
      # so we add the length of the original sequence to max_length
      temperature=1.0,
      top_k=0,
      top_p=0.9,
      repetition_penalty=1.2, # To ensure that we dont get repeated phrases
      do_sample=True,
      num_return_sequences=n_seqs
  ) # We feed the encoded input into the model.
  ## Getting the output ##
  if len(output_sequences.shape) > 2:
    output_sequences.squeeze_() # the _ indicates that the operation will be done in-place
  generated_sequences = []
  for generated_sequence_idx, generated_sequence in enumerate(output_sequences):
    generated_sequence = generated_sequence.tolist()
    text = tokenizer.decode(generated_sequence)
    total_sequence = (
        prompt_text + text[len(tokenizer.decode(encoded_prompt[0], clean_up_tokenization_spaces=True, )) :]
    )
    generated_sequences.append(total_sequence)
  return generated_sequences

In [None]:
#Function to generate multiple sentences. Test data should be a dataframe
def text_generation(test_data):
  generated_lyrics = []
  for i in range(len(test_data)):
    x = gen_text(test_data['Lyric'][i], tokenizer, model)
    generated_lyrics.append(x)
  return generated_lyrics

generated_lyrics2 = text_generation(test_set)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

In [None]:
#Loop to keep only generated text and add it as a new column in the dataframe
my_generations=[]

for i in range(len(generated_lyrics2)):
  a = test_set['Lyric'][i].split()[-30:] #Get the matching string we want (30 words)
  b = ' '.join(a)
  c = ' '.join(generated_lyrics2[i]) #Get all that comes after the matching string
  if b== '':
    print(i)
    my_generations.append(test_set['True_end_lyrics'][i])
    continue
  d = c.split(b)[-1]
  d = d.split('<|endoftext|>')[0]
  my_generations.append(d)

test_set['Generated_lyrics'] = my_generations

430


In [None]:
#Using BLEU score to compare the real sentences with the generated ones
import statistics
from nltk.translate.bleu_score import sentence_bleu

scores=[]

for i in range(len(test_set)):
  reference = [test_set['True_end_lyrics'][i]]
  candidate = test_set['Generated_lyrics'][i][:len(reference[0])]

  if len(reference[0])>len(candidate):
    reference[0] = reference[0][:len(candidate)]

  scores.append(sentence_bleu(reference, candidate))

statistics.mean(scores)

0.06600835197933186

# Added image to text pretrained model

In [None]:
from PIL import Image
import requests
from transformers import AutoProcessor, TFBlipForConditionalGeneration

processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = TFBlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

tf_model.h5:   0%|          | 0.00/990M [00:00<?, ?B/s]

All model checkpoint layers were used when initializing TFBlipForConditionalGeneration.

All the layers of TFBlipForConditionalGeneration were initialized from the model checkpoint at Salesforce/blip-image-captioning-base.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBlipForConditionalGeneration for predictions without further training.


In [None]:
url = "https://a.cdn-hotels.com/gdcs/production186/d800/95688a50-1214-4e16-812c-7d5115e28cf3.jpg"
image = Image.open(requests.get(url, stream=True).raw)
text = "what?"
inputs = processor(images=image, text = text, return_tensors="tf")
pixel_values = inputs.pixel_values
generated_ids = model.generate(pixel_values=pixel_values, max_length=500)
generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

In [None]:
generated_caption

'a brick wall with a sign on it'

#Final Model

In [None]:
!pip install transformers



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import torch
from PIL import Image
import requests
from transformers import AutoProcessor, TFBlipForConditionalGeneration, GPT2LMHeadModel, GPT2Tokenizer
from tqdm import tqdm, trange
import torch.nn.functional as F

tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
text_model = torch.load('/content/drive/MyDrive/Colab Notebooks/model_4genres.pt')

processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
image_model = TFBlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/287 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/506 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/4.56k [00:00<?, ?B/s]

tf_model.h5:   0%|          | 0.00/990M [00:00<?, ?B/s]

All model checkpoint layers were used when initializing TFBlipForConditionalGeneration.

All the layers of TFBlipForConditionalGeneration were initialized from the model checkpoint at Salesforce/blip-image-captioning-base.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBlipForConditionalGeneration for predictions without further training.


In [None]:
def image_to_text(image_url):
  url = image_url
  image = Image.open(requests.get(url, stream=True).raw)
  inputs = processor(images=image, return_tensors="tf")
  pixel_values = inputs.pixel_values
  generated_ids = image_model.generate(pixel_values=pixel_values, max_length=500)
  generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
  return generated_caption

In [None]:
def generate(
    model,
    tokenizer,
    prompt,
    entry_count=10,
    entry_length=30, #maximum number of words
    top_p=0.8,
    temperature=1.,
):

    model.eval()

    generated_list = []

    filter_value = -float("Inf")

    with torch.no_grad():

        for entry_idx in trange(entry_count):

            entry_finished = False

            generated = torch.tensor(tokenizer.encode(prompt)).unsqueeze(0)
            if generated.size(1)>1024:
              generated = generated[:,-1024:]

            if generated.size(1)<=0:
              generated = torch.tensor(tokenizer.encode("<|startoftext|>")).unsqueeze(0)

            generated = generated.to('cuda')

            for i in range(entry_length):
                outputs = model(generated, labels=generated)
                loss, logits = outputs[:2]
                logits = logits[:, -1, :] / (temperature if temperature > 0 else 1.0)

                sorted_logits, sorted_indices = torch.sort(logits, descending=True)
                cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)

                sorted_indices_to_remove = cumulative_probs > top_p
                sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[
                    ..., :-1
                ].clone()
                sorted_indices_to_remove[..., 0] = 0

                indices_to_remove = sorted_indices[sorted_indices_to_remove]
                logits[:, indices_to_remove] = filter_value

                next_token = torch.multinomial(F.softmax(logits, dim=-1), num_samples=1)
                generated = torch.cat((generated, next_token), dim=1)

                if next_token in tokenizer.encode("<|endoftext|>"):
                    entry_finished = True

                if entry_finished:

                    output_list = list(generated.squeeze())
                    output_text = tokenizer.decode(output_list)
                    generated_list.append(output_text)
                    break

            if not entry_finished:
              output_list = list(generated.squeeze())
              output_text = f"{tokenizer.decode(output_list)}<|endoftext|>"
              generated_list.append(output_text)

    return generated_list

In [None]:
def lyric_generation(url, genre, length):
  input = image_to_text(url)
  input = "<"+genre+">: "+input
  while(len(input.split())<length):
    input = input.split("<|endoftext|>")[0]
    input = generate(text_model, tokenizer, input, entry_count=1)[0]
  return input

Examples:

In [None]:
url = "https://a.cdn-hotels.com/gdcs/production186/d800/95688a50-1214-4e16-812c-7d5115e28cf3.jpg"
genre = "Pop"
length = 100
lyric_generation(url, genre, length)

100%|██████████| 1/1 [00:00<00:00,  1.43it/s]
100%|██████████| 1/1 [00:00<00:00,  1.53it/s]
100%|██████████| 1/1 [00:00<00:00,  1.41it/s]
100%|██████████| 1/1 [00:01<00:00,  1.24s/it]
100%|██████████| 1/1 [00:01<00:00,  1.19s/it]


"<Pop>: a brick wall with a sign on it\n\nPop:\n\nJump on a street that will only stand for my song\nCome out in droves, try and fight and get down\n\nPop:\nBut wait for the train to run off by now\nI don't care what the hell you say, it'll be the same 'cause we'll all try to get out alive\n\nPop:\nIf you can't hit a rock\nIf you can't break a guitar\nIf you can't smash a record\nThen take this love, make it easy to be strong\n\nPop:\nYour name is Sonics\nI'll dance, sing and sing and sing\nI won't change it, they're yours\n\nPop:\nYou know, I<|endoftext|>"

In [None]:
url = "https://a.cdn-hotels.com/gdcs/production186/d800/95688a50-1214-4e16-812c-7d5115e28cf3.jpg"
genre = "Gospel"
length = 100
lyric_generation(url, genre, length)

100%|██████████| 1/1 [00:00<00:00,  2.15it/s]
100%|██████████| 1/1 [00:00<00:00,  2.22it/s]
100%|██████████| 1/1 [00:00<00:00,  2.06it/s]
100%|██████████| 1/1 [00:00<00:00,  2.02it/s]


'<Gospel>: a brick wall with a sign on it, a wall with pictures of saints\nFrom which the light of this chapel would fall and I would go\nThe youth, when the door opened in my name would do away with it\nTo Christ Jesus for salvation in all our minds\nI know that Christ stands before me now\nI know that He will come and deliver me\nAnd I know that He will save me\nI know that He will save me\nI know that He will save me\nI know that He will save me\nI know that He will save me\nI know that He will save me\nI know that He<|endoftext|>'

In [None]:
url = "https://a.cdn-hotels.com/gdcs/production186/d800/95688a50-1214-4e16-812c-7d5115e28cf3.jpg"
genre = "Rock"
length = 100
lyric_generation(url, genre, length)

100%|██████████| 1/1 [00:00<00:00,  1.81it/s]
100%|██████████| 1/1 [00:00<00:00,  1.32it/s]
100%|██████████| 1/1 [00:00<00:00,  1.50it/s]
100%|██████████| 1/1 [00:00<00:00,  1.38it/s]
100%|██████████| 1/1 [00:00<00:00,  1.38it/s]


'<Rock>: a brick wall with a sign on it\nTelling you that this is you\nAnd all the time I\'ve been there\nI\'ve seen you dream and you never give a damn\nIt\'s because of what you did\n\n(beat)\nWake up tomorrow, all the right\nAnd I can hear you saying, "Dear God, God\nThis is a word from you\nThat you would really do for us\nA word from God that we could never hear\nIf you would give us this chance\nWe\'ll do something for you\n\n(beat)\nAnd I don\'t think the people in your town\nwould understand, or know,\nwhat you\'ve done\n\n(beat)\nSo yeah\n(beat)\nThat\'s all I<|endoftext|>'

In [None]:
url = "https://a.cdn-hotels.com/gdcs/production186/d800/95688a50-1214-4e16-812c-7d5115e28cf3.jpg"
genre = "Rap"
length = 100
lyric_generation(url, genre, length)

100%|██████████| 1/1 [00:00<00:00,  1.65it/s]
100%|██████████| 1/1 [00:00<00:00,  1.56it/s]
100%|██████████| 1/1 [00:00<00:00,  1.52it/s]
100%|██████████| 1/1 [00:00<00:00,  2.12it/s]


"<Rap>: a brick wall with a sign on it\n\nThere's nothing out there, nobody knows\nSo my mom used to tell me\nYou go to the store and pick up somethin'\n\nYou do everything in your power to save the world\nI mean, what else can I do?\nYou say I'll go in the next cab, and\nthe cab comes by, you go in the next cab\n\nYeah, you're gonna be the winner\nAll this other shit just make you cry\nI don't wanna let you go\nAnd I want you gone\nYou better stay home\nCause your mind just got<|endoftext|>"

different images

In [None]:
url = "https://i.ytimg.com/vi/NgsWGfUlwJI/hqdefault.jpg?v=62a2c1cd"
genre = "Pop"
length = 100
lyric_generation(url, genre, length)

100%|██████████| 1/1 [00:00<00:00,  2.59it/s]
100%|██████████| 1/1 [00:00<00:00,  2.46it/s]
100%|██████████| 1/1 [00:00<00:00,  2.54it/s]
100%|██████████| 1/1 [00:00<00:00,  2.28it/s]
100%|██████████| 1/1 [00:00<00:00,  2.04it/s]


"<Pop>: a man standing in front of a car on fire\nI am\nthe lion\nI'm a man standing in front of a car\nIn the skies\nYes, we're very fast\nThe god of my day\n\nAnd I see this to-night\n\nChorus\n\nHail the Devil\nOh my Lord God\nWe are the devil\nIn our place\nWe are the road to salvation\nWe are the pain of our face\n\nThe Devil\n\nYes, we are very fast\nThe God of my day\n\nAnd I see this to-night\n\nChorus\n\n\nSo let us keep the lights of day\nYou live\nBut this time\nIn the darkness\nIf we should never see you again\n\nI know you live\n<|endoftext|>"

In [None]:
url = "https://www.shutterstock.com/image-photo/dawn-over-snow-capped-mountains-600nw-2233176223.jpg"
genre = "Pop"
length = 100
lyric_generation(url, genre, length)

100%|██████████| 1/1 [00:00<00:00,  2.65it/s]
100%|██████████| 1/1 [00:00<00:00,  2.57it/s]
100%|██████████| 1/1 [00:00<00:00,  2.42it/s]
100%|██████████| 1/1 [00:00<00:00,  2.43it/s]


"<Pop>: a mountain range with a sunset in the background\nWe're down on the mountains of the night\nThey're as steep as ice\nAnd their mouths are the same as gold\nWhen we've been sitting here all day\nWe're old, but the wind blows cold\nWe're lost in the land of shadows\n\nThis way we can change our minds\n\nThis way we can change our minds\n\n\nWe will tell you the truth\nDo you think there's a place that we're not at\n\nDo you think there's a place that we're not at\n\nDo you think there's a place that we're<|endoftext|>"

In [None]:
url = "https://www.treehugger.com/thmb/nSp8ESJ1N6zq_bsTVL_MoSrKAqA=/1500x0/filters:no_upscale():max_bytes(150000):strip_icc()/GettyImages-1273584292-cbcd5f85f4c646d58f7a7fa158dcaaeb.jpg"
genre = "Pop"
length = 100
lyric_generation(url, genre, length)

100%|██████████| 1/1 [00:00<00:00,  1.79it/s]
100%|██████████| 1/1 [00:00<00:00,  1.74it/s]
100%|██████████| 1/1 [00:00<00:00,  2.47it/s]
100%|██████████| 1/1 [00:00<00:00,  2.38it/s]
100%|██████████| 1/1 [00:00<00:00,  2.09it/s]


"<Pop>: a forest with sun shining through the trees\nOnly one way out of life, another step away from this city\nWon't you get along?\nThe change you'll make\n\n\n[chorus]\nWell, we're good, I'm sure you'll be proud of that\nWe're not weak, I'm sure you'll be proud of that\nWe're not alone, it's a real train of thought\nWe're all we know\n\n\n[solo]\nYou're not a single voice\nWe're all we know\nSo we're all we know\nWe're all we know\n\n(Solo)\nYou're not a single voice\nWe're all we know\nSo we're all we know\nWe're all we know\n<|endoftext|>"

In [None]:
url = "https://www.safehavenforcats.org/wp-content/uploads/2023/05/SH-Ultimate-Cat-Supply-List-1080x640-1-1.jpg"
genre = "Pop"
length = 100
lyric_generation(url, genre, length)

100%|██████████| 1/1 [00:00<00:00,  1.72it/s]
100%|██████████| 1/1 [00:00<00:00,  1.65it/s]
100%|██████████| 1/1 [00:00<00:00,  1.80it/s]
100%|██████████| 1/1 [00:00<00:00,  2.38it/s]
100%|██████████| 1/1 [00:00<00:00,  2.03it/s]


"<Pop>: a cat is looking up at toys on a green background\nLooks like a human\nThen the dream fades away\nAnd everything's bright\nThe bird is waving its wings\n\nEverybody who touches it\nIs happy\nAnd makes the dings go away\n\nYes, you can sing\nYes, you can sing\n\nPeople in the street love you\nYou're more than just a fox\nYou're bigger than a hill\nAnd you're in a box\nAnd the birds are singing\nYes, you can sing\nYes, you can sing\n\nBirds sing and some people sing\nThere are birds singing\nYes, you can sing\nYes, you can sing\n\n(Pause)\n\nLet's get to the point\nThat people love you\nYou're more than<|endoftext|>"