In [None]:
# scripts for getting all song lyrics from a given artist

import lyricsgenius as lg
import os
from dotenv import load_dotenv
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence
import matplotlib.pyplot as plt


## Downloading the songs

In [None]:
download_lyrics = False
artist = "Radiohead"
if download_lyrics:
    
    load_dotenv()
    access_token = os.getenv("GENIUS_ACCESS_TOKEN")

    try:
        genius = lg.Genius(access_token, skip_non_songs=True,
                            excluded_terms=["(Remix)", "(Live)", "Demo", "Version", ], 
                              remove_section_headers=True, timeout=15, sleep_time=0.5, verbose=True)

        artist_songs = genius.search_artist(artist, sort="popularity", get_full_info=False, max_songs=200)

    except Exception as e:
        print(e)
        print("Error in getting artist")
        



In [None]:
if download_lyrics:    
    with open(f'lyrics_{artist}.txt', "w", encoding="utf-8") as f:
        for song in artist_songs.songs:
            f.write(song.lyrics)
            f.write("\n\n")

In [None]:
with open(f'lyrics/lyrics_{artist}.txt', "r", encoding="utf-8") as f:
    lyrics = f.read()



In [None]:
songs =lyrics.split("Lyrics")
songs
    

## Cleaning

We basically remove all the useless data, like contributors and translations.

In [None]:
# for each line, put the last \n and following text in a separate line

for i, song in enumerate(songs):
    # identify the last \n
    last_n = song.rfind("\n")
    if last_n!=-1:
        # split the song
        songs[i] = song[:last_n] 
        songs.insert(i+1, song[last_n+1:])
        
        

In [None]:
print(len(songs))
# remove empty songs
songs = [song for song in songs if song.strip()!=""]
print(len(songs))

In [None]:

songs = [songs[i]+ songs[i+1] for i in range(0, len(songs), 2)]
print(len(songs))

In [None]:
songs

In [None]:
# delete first line of each song
songs = [song[song.find("\n")+1:] for song in songs]
songs

In [None]:
songs = [song for song in songs if song != ""]

In [None]:
songs

In [None]:
for i in range(len(songs)):

    # if song ends with Embed
    #cancel Embed and eventual numbers before it
    songs[i] = songs[i][:songs[i].rfind("Embed")]
    while songs[i][-1].isnumeric():
        songs[i] = songs[i][:-1]


## Encoding the chars

In [None]:
# index the characters
chars = list(set("".join(songs)))
char_to_index = {c:i for i, c in enumerate(chars)}
index_to_char = {i:c for i, c in enumerate(chars)}

In [None]:
# we add two special characters for start and end of song
if "<" not in chars:
    print("Adding <")
    chars.append("<")

if ">" not in chars:
    print("Adding >")
    chars.append(">")
char_to_index['<'] = len(chars)-2
char_to_index['>'] = len(chars)-1

index_to_char[len(chars)-2] = "<"
index_to_char[len(chars)-1] = ">"
songs = [f"<{song}>" for song in songs]


In [None]:
encoded_songs = [[char_to_index[c] for c in song] for song in songs]

In [None]:
input_sequences = [song[:-1] for song in encoded_songs]
output_sequences = [song[1:] for song in encoded_songs]

In [None]:
# one hot encode the sequences
dim_one_hot = len(chars)
input_sequences = [torch.tensor(seq).long() for seq in input_sequences]
output_sequences = [torch.tensor(seq).long() for seq in output_sequences]

In [None]:
# create the dataset
class LyricsDataset(Dataset):
    def __init__(self, input_sequences, output_sequences):
        self.input_sequences = input_sequences
        self.output_sequences = output_sequences
        
    def __len__(self):
        return len(self.input_sequences)
    
    def __getitem__(self, idx):
        return self.input_sequences[idx], self.output_sequences[idx], len(self.input_sequences[idx])

In [None]:
def collate_fn(batch):
    input_sequences = [item[0] for item in batch]
    output_sequences = [item[1] for item in batch]
    lengths= [item[2] for item in batch]
    input_sequences = nn.utils.rnn.pad_sequence(input_sequences, batch_first=True)
    output_sequences = nn.utils.rnn.pad_sequence(output_sequences, batch_first=True)
    return input_sequences, output_sequences, lengths

In [None]:
loader = DataLoader(LyricsDataset(input_sequences, output_sequences), batch_size=2, shuffle=True, collate_fn=collate_fn)

# create the model

class LyricsModel(nn.Module):
    def __init__(self, vocab_size, embedding_size, hidden_dim, n_layers_lstm=1, dropout=0.):
        super(LyricsModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.n_layers_lstm = n_layers_lstm
        self.embed = nn.Embedding(vocab_size, embedding_size)
        self.lstm = nn.LSTM(embedding_size, hidden_dim, batch_first=True, num_layers=n_layers_lstm, dropout=dropout)
        self.fc = nn.Linear(hidden_dim, vocab_size)
        self.softmax = nn.functional.softmax
        
    def forward(self, x, hidden_states =None):
        x = self.embed(x)
        x, [h, c] = self.lstm(x, hidden_states)
       # x, _ = pad_packed_sequence(x, batch_first=True)
        x = self.fc(x)
        return x, [h,c]
    
    def generate_text(self, max_length, temperature):
        generated = "<"
        last_char = generated
        h = torch.zeros(self.n_layers_lstm, 1, self.hidden_dim, dtype=torch.float32)
        c = torch.zeros(self.n_layers_lstm, 1, self.hidden_dim, dtype=torch.float32)
        with torch.no_grad():
            for i in range(max_length):
                x = torch.tensor(char_to_index[last_char])
                x = x.unsqueeze(0).unsqueeze(0)
                x, [h, c] = self.forward(x, [h, c])
                x = torch.divide(x, temperature)
                x = self.softmax(x, dim=2)
                # draw a sample from x
                last_char = torch.distributions.Categorical(x).sample()
                last_char = index_to_char[last_char.item()]
                generated += last_char
                if last_char == ">":
                    break 
        print(generated)
        return generated


In [None]:
model = LyricsModel(dim_one_hot, 256, 256, n_layers_lstm=2, dropout=0.5)
dataset=LyricsDataset(input_sequences, output_sequences)
optimizer = optim.RMSprop(model.parameters(), lr=0.001)
batch_size =1
dataloader = DataLoader(dataset, batch_size=batch_size, collate_fn=collate_fn, shuffle=True)

In [56]:
# training loop
from tqdm import tqdm

n_epochs = 5
def train_model(model, dataloader, optimizer, n_epochs):
    train_bar =tqdm(range(n_epochs),leave=False )
    for epoch in train_bar:
        running_loss =0
        n_seen=0
        for i, (x, y, lens) in enumerate(dataloader):   
            optimizer.zero_grad()
            y_pred, _ = model(x)
            loss = nn.functional.cross_entropy(y_pred.permute(0,2,1), y.data)
            loss.backward()
            optimizer.step()
            loss = loss * batch_size
            n_seen += batch_size
            running_loss +=loss
            train_bar.set_postfix({'running_loss': f'{running_loss/n_seen}', 'batch':f'{i+1}/{len(dataloader)}'})
        print(f"Epoch {epoch}, loss = {running_loss/n_seen}")
        model.generate_text(max_length =1000, temperature = 0.5)

train_model(model, dataloader, optimizer, n_epochs)

  0%|          | 0/5 [00:44<?, ?it/s, running_loss=2.4540529251098633, batch=183/183]

Epoch 0, loss = 2.4540529251098633


 20%|██        | 1/5 [00:47<03:10, 47.68s/it, running_loss=2.4540529251098633, batch=183/183]

<In a me sand and me the wat bild of all warl some hough your sut wand the fall the we me come

I dore
I the be cheac you to are sore stor in the waped be in the we hing and of the to beand me
And in the son the the been of thee the can the carl your you fat me come warl the me bam tor in this noing sonone be on the hard
I wam me youn saald I kard an it me dorn the wall thad ther not the marry shere heall wins the beer at the car the sel the dams fee stare
I beand me the the cout the ther med bery you in the card
I dowrow wan the bea got sind
I wall ald son't a down beace are cand it seare

You wan the the to core
ne

I dam hap the the list the ill hear your beand
I sto back me the the to mere to >


 20%|██        | 1/5 [01:28<03:10, 47.68s/it, running_loss=2.0662784576416016, batch=183/183]

Epoch 1, loss = 2.0662784576416016


 40%|████      | 2/5 [01:30<02:14, 44.89s/it, running_loss=1.9074400663375854, batch=1/183]  

<I to sorn

Thet and mot in all the hears it me a me fround
We not the leal
Thes a me the the supter to to warns
I'm lown

Dour be to best a like your to me and me the sundend
If to want a want a wall
Now don't to got me out
I got be the in in a don't turt to gond I wart frong

I want your sell

Gowe me cand

But the bet on to stares
You gow lay me are re your so sonntres my your son't can't hake to sme cand on a are the not it we are your and leate the let be thing mones
You >


 40%|████      | 2/5 [02:09<02:14, 44.89s/it, running_loss=1.9281063079833984, batch=183/183]

Epoch 2, loss = 1.9281063079833984


 60%|██████    | 3/5 [02:13<01:27, 43.80s/it, running_loss=1.9281063079833984, batch=183/183]

<
In clain it for wants I start the live

One warks don't got to you 
And what the let a do fore
If wall to me be the do come
The can the ever on the should I was all is feel the bease of you're to got the cald verong to be to all me the pean
The hastle the real the the reall

It's no blow in the sare
I don't to come be from the fish you what to will me
I wast the dean the shase
So the star alallt of plow
A me to come
I dorn cans in of the come

Where was you don't no be and the to stre all stishers all likes
And I'm me back

And the fase to hom for trey like firns and finet be bet a frack

In the wan thing up me
I was in the be leats the sing the whon't to strins

The so care a not strees and sill it the kear
I want the stor go a no no the cand
A linet the can't for bave the came the sinet no stat no a go she go come

So no the down as a wark but your beall
And to me the this chull the shashing the do no no will the stand the be of you, crack rake are the home

Kell no no surns
I'm th

 60%|██████    | 3/5 [02:50<01:27, 43.80s/it, running_loss=1.8303911685943604, batch=183/183]

Epoch 3, loss = 1.8303911685943604


 80%|████████  | 4/5 [02:51<00:41, 41.68s/it, running_loss=1.6678546667099, batch=1/183]     

<I'm hore an into hears
And chan't reart
It want you stis in anlate
I am you song think on a will my arm come
If word the somest wanting the spece me the don't leal the gontrome
I cont won't what come the come the crike
You >


 80%|████████  | 4/5 [03:30<00:41, 41.68s/it, running_loss=1.7521202564239502, batch=183/183]

Epoch 4, loss = 1.7521202564239502


                                                                                             

<
I can a look the go are to here

I contright

I find one are are wack
You >




In [61]:
train_model(model, dataloader, optimizer, 3)

  0%|          | 0/3 [00:42<?, ?it/s, running_loss=1.6185073852539062, batch=183/183]

Epoch 0, loss = 1.6185073852539062


 33%|███▎      | 1/3 [00:46<01:31, 45.88s/it, running_loss=1.5644395351409912, batch=1/183]  

<I can't hurt
We're the need in the free and nothing the runners
Up the me to the happer that your see in the see and the really the massed
The the read and the fut the seeds and the beat up the spin
Into hearms hurt flace

I will never home in the stindrops
You 
I was the let my sentic, no, I can't leave the the flain

In you car me
It am the back in the need in a changers
When you're with me heads

When in call, the the the pird on a morn
I hand of the child of the read the wind, the put the treaking and the live

The they see a day in the pack of the chome
I get me on the the friend the stop the man
I hurt the start to ears the the pot the flace
And heards and white in the know the walk

I want you can't shee down and like here
But the know when I get the really me on

I wink the start be the fritter a way
I chollate

I really son't get the think it white I want and the care
I'm not crazy cars
When you such a spening of the want
When the hand and the want the the klood
I messing and

 33%|███▎      | 1/3 [01:31<01:31, 45.88s/it, running_loss=1.5416948795318604, batch=183/183]

Epoch 1, loss = 1.5416948795318604


 67%|██████▋   | 2/3 [01:35<00:48, 48.11s/it, running_loss=1.5416948795318604, batch=183/183]

<
I want the see drise of coming

I start to me the skilled to betters
(Why we love the see strake of the ready on

I will me when you've give your light
I want the sould the moon
Little don't leave be walking on the hore

One when you sick the mound

I can't to the end, we all where
You 
I want the walk the strick of the better me

Lut this is and into the moon
You want you the really say the starts in the should from the shack
I just to the cromesses
The hast you surpries
We are your should the starts on the string on the boot
How want the love me want
We talking of the next to the world

I think you start the see the stry shad
I get you the blood
When you've been better mes
Are you spay be the craming and the tart

I am not that you say that the some
You 
They want the freest the back out
The waits for your spared and never going to shear

Don't leave you they hears

I want you don't leave the cars
I want the world, shines on the see is the still

I will dears off all the shildrors


 67%|██████▋   | 2/3 [02:23<00:48, 48.11s/it, running_loss=1.4725643396377563, batch=183/183]

Epoch 2, loss = 1.4725643396377563


                                                                                             

<I want the really come
And me out you alive
I was the end you can a line
Where are you can a lough a lost me
And I will be tree in a line
And you can a burns in the glad in a polies
The best we hand of the crould
The can a well of the lone
I was seep you to see

I don't hurt you can the best
When you go the bood baby
I want you can see you wall
I'm a want the botter
I want me from the back
I want me me down
There's a give anywhere I can something up things
I'm not a spectre be not good the look around
There'll be gonna do in the burns
The end you can the said out the be around

I will here you can

Letter me alarms
From the lift the earth
The raindropses and the light
I supplies, this us you don't really like now coming a burns
There'll do stupprops
And I start a call the can a life
I lies and the lone in your hide
I was the earth a could you can the strison
You 
I was should be the world
The light is an a triend
I sust the born by in a line
Now the paris the fall of the sing out
The 



In [62]:
def generate_text_from_seed(model, seed, temperature, max_length=1000):
    generated = seed
    seed = [char_to_index[c] for c in seed]
    seed = torch.tensor(seed).long()
    seed = seed.unsqueeze(0)
    model.eval()
    logits, [h, c] = model(seed)
    logits = torch.divide(logits, temperature)
    logits=logits[:, -1,:]
    softmax_values = model.softmax(logits, dim=1)

    last_char = torch.distributions.Categorical(softmax_values).sample()
    last_char = index_to_char[last_char.item()]
    generated += last_char
    for i in range(max_length):
        x = torch.tensor(char_to_index[last_char])
        x = x.unsqueeze(0).unsqueeze(0)
        x, [h, c] = model.forward(x, [h, c])
        x = torch.divide(x, temperature)
        x = model.softmax(x, dim=2)
        # draw a sample from x
        last_char = torch.distributions.Categorical(x).sample()
        last_char = index_to_char[last_char.item()]
        generated += last_char
        if last_char == ">":
            break
    print(generated)

In [63]:
temperatures = [0.1, 0.5, 0.9]
for temp in temperatures:
    print(f"Temperature = {temp}")
    generate_text_from_seed(model, "Karma Police, arrest this man", temp)
    print("\n\n")
    print("-------------------------------------------------------------------------")

Temperature = 0.1
Karma Police, arrest this man
I want the be around
I want the be the paranough
The raindrops and the best you can
I want the cars and the best you can
I want the car are a line
I will be the best you can a line

I want the cars and the light
The really care the best you can a light
The best you can the be around
I will be the best you can the be the best
I want the be the say the see

I will be the best you can the best
I want the be around

I will see you can the be the say the see

I will be the best you can a light
The raindrops and the really see it coming and the see

I will be the stre all the stres
I will be the strient the say the say the see

I will be the best you can a bullet me
I want the cars and the cars
The raindrops and the raindrops
The best you can a bullet the see

I want the be the wards
The was something out the say the best
I want the cars and spectre
I will be a line

I want the be around
I will be the stre along
The care a bullet the say the sa

"Karma Police, arrest this man out

No little do you can

And thell will me out in the coming

There's a burn a really leave lest

I was such a lone

I something in a little out of me

Before your hautiout you the the born

I'm a carbolasses"

Pure Radiohead poetry!
