In [79]:
from sentence_transformers import SentenceTransformer
import torch
import numpy as np
import pandas as pd

In [80]:
model = SentenceTransformer('all-MiniLM-L6-v2')

In [3]:
data = pd.read_csv('../../data/processed/english_lyrics_cleaned.csv')

In [4]:
# show the 10 most common artists
data['artist'].value_counts().head(10)


artist
Genius English Translations    13582
The Grateful Dead               2103
Emily Dickinson                 1617
Abraham Lincoln                 1488
Lil B                           1280
Frank Zappa                     1255
William Shakespeare             1228
KIDZ BOP Kids                   1194
Guided by Voices                1187
Holy Bible (KJV)                1186
Name: count, dtype: int64

In [5]:
# find how many songs have the artist 'the beatles'
data[data["artist"] == "Prince"]["title"].nunique()

804

In [4]:
# prolifically produced artists (< 200 songs) that write their own songs (selected by hand)
anchor_artists = [
    "The Beatles",
    "Queen",
    "Bob Dylan",
    "Taylor Swift",
    "Ed Sheeran",
    "Demi Lovato",
    "Sia",
    "Eminem",
    "Johnny Cash",
    "Lady Gaga",
    "Kanye West",
    "Prince",
]
anchor_artist_data = data[data["artist"].isin(anchor_artists)]

In [5]:
model.get_max_seq_length()

256

In [8]:
# average token length of anchor artists
tokenizer = model.tokenizer
data[data["artist"].isin(anchor_artists)]["lyrics"].apply(
    lambda x: len(tokenizer.encode(x))
).mean()

Token indices sequence length is longer than the specified maximum sequence length for this model (886 > 256). Running this sequence through the model will result in indexing errors


np.float64(334.4970204541794)

In [6]:
import sys

sys.path.append("../")
from features.build_features import create_artist_embeddings

artist_embeddings, artists_names = create_artist_embeddings(
    anchor_artist_data, "./artist"
)

Processing songs: 100%|██████████| 359/359 [00:06<00:00, 52.48it/s]
Processing songs: 100%|██████████| 862/862 [00:13<00:00, 63.13it/s]
Processing songs: 100%|██████████| 436/436 [00:07<00:00, 60.23it/s]
Processing songs: 100%|██████████| 446/446 [00:06<00:00, 64.55it/s]
Processing songs: 100%|██████████| 316/316 [00:05<00:00, 54.52it/s]
Processing songs: 100%|██████████| 762/762 [00:11<00:00, 68.66it/s]
Processing songs: 100%|██████████| 974/974 [00:13<00:00, 69.79it/s]
Processing songs: 100%|██████████| 492/492 [00:08<00:00, 58.92it/s]
Processing songs: 100%|██████████| 804/804 [00:13<00:00, 61.34it/s]
Processing songs: 100%|██████████| 258/258 [00:04<00:00, 58.89it/s]
Processing songs: 100%|██████████| 234/234 [00:03<00:00, 60.72it/s]
Processing songs: 100%|██████████| 266/266 [00:05<00:00, 52.10it/s]
Processing artists: 100%|██████████| 12/12 [01:40<00:00,  8.36s/it]


In [81]:
import pickle
artist_embeddings = np.load("../embeddings/artist_embeddings.npy")
artists_names = pickle.load(open("../embeddings/artist_names.pkl", "rb"))

In [82]:
artists_names

['Eminem',
 'Bob Dylan',
 'Kanye West',
 'Queen',
 'Lady Gaga',
 'The Beatles',
 'Johnny Cash',
 'Taylor Swift',
 'Prince',
 'Ed Sheeran',
 'Sia',
 'Demi Lovato']

In [41]:
beatles_fake_lyrics = """
In the neon glow of midnight streets,
I hear your whispers in the fading heat,
Every echo brings your name to life,
Dancing memories cut like a knife.
Chasing shadows, where we used to run,
Under moonlit skies, believing we were young,
Every step forward feels like a step back,
In the silence, I still trace the tracks.
Worn-out pictures on my bedroom floor,
Each smile a promise, but I want more,
You left a mark, but the scars have faded,
Still I wander, dreaming, feeling jaded.
Chasing shadows, where we used to run,
Under moonlit skies, believing we were young,
Every step forward feels like a step back,
In the silence, I still trace the tracks.
But I hear the whispers, through the fallin’ rain,
Every drop a heartbeat, singing out your name,
Maybe one day, I’ll find the light,
Break these chains, step into the night.
So here I go, just a heart in the dark,
Chasing shadows, chasing sparks,
I’ll keep running 'til the dawn breaks free,
In the chase, I’ll find the best of me.
"""

In [9]:
taylor_swift_fake_lyrics = """
We were young with a dream and a midnight sky,
Dancing on rooftops, trying to touch the fireflies,
You whispered promises, like secrets in the dark,
Every heartbeat echoed like a wild, shooting star.

But seasons change, and so did we,
Like autumn leaves falling, oh can't you see?
But I'll hold the moments, even when they fade,
You were my anthem, a sweet serenade.

So I'm chasing starlight, running through the night,
With every step I take, I can feel your light.
Fingers intertwined, we were writing our story,
In a world of chaos, you were my glory.
Oh, I'm chasing starlight, through the twists and turns,
In the ashes of our love, it's you that forever burns.

We painted skies with laughter, wrote our names in the sand,
With every laugh and every tear, I was your biggest fan,
But time slipped like water, and we drifted apart,
Yet echoes of your laughter still linger in my heart.

But I'll keep the sparks, even if they fade,
In a sepia memory, where none are betrayed.
You were my symphony, a song so divine,
Even in the silence, our hearts still align.

So I'm chasing starlight, running through the night,
With every step I take, I can feel your light.
Fingers intertwined, we were writing our story,
In a world of chaos, you were my glory.
Oh, I'm chasing starlight, through the twists and turns,
In the ashes of our love, it's you that forever burns.

So here's to the nights when we danced in the rain,
Through every heartbreak, through every sweet pain,
You may be a whisper, but I'll never forget,
The way we wrote our destiny, a love without regret.

So I'm chasing starlight, running through the night,
With every step I take, I can feel your light.
Fingers intertwined, we were writing our story,
In a world of chaos, you were my glory.
Oh, I'm chasing starlight, through the twists and turns,
In the ashes of our love, it's you that forever burns.

So if you see me, looking up at the stars,
Just know I'm still dreaming, chasing you from afar.
In the galaxy of hearts, where love never dies,
I'll keep chasing starlight, till the end of time.
"""

In [83]:
hey_lyrics

"Yo, I’m boxed in a cage, rage like a lion,  \nFighting shadows in the dark, while the world’s on fire,  \nEvery word's a bullet, each verse a choir,  \nSpitting venom on these tracks, fueled by desire.  \n\nI’m the kid in the corner, scribbling dreams,  \nStitching seams of reality, ripping at the seams,  \nLife’s a twisted puzzle, melting at the beams,  \nRunning laps ‘round the block, chasing lost themes.  \n\nGot a mind like a weapon, loaded with regret,  \nPushing limits, bending bars, you ain't seen nothin' yet,  \nGraffiti on my heart, a masterpiece of threat,  \nIn the game of chess, but I’m betting on the check.  \n\nClimbing out the gutter, grabbing at the sky,  \nEvery tear’s a badge, can’t tell me how to fly,  \nThey told me I was done, but I answered with a sigh,  \n‘Cause the phoenix in my chest ain’t ever said goodbye.  \n\nSo I’m kicking down the doors, breaking through the haze,  \nEvery beat a heartbeat, in this lyrical maze,  \nSpitting truth like a prophet, in a wor

In [84]:
# query_embedding = model.encode(taylor_swift_fake_lyrics, convert_to_numpy=True)
query_embedding = model.encode(hey_lyrics, convert_to_numpy=True)


In [85]:
similarity_scores = model.similarity(query_embedding, artist_embeddings)[0]

In [86]:
scores, indices = torch.topk(similarity_scores, k=5)

In [87]:
scores

tensor([0.6904, 0.6829, 0.6686, 0.6649, 0.6636])

In [88]:
indices


tensor([ 3,  1,  7, 10,  9])

In [89]:
artists_names[7]

'Taylor Swift'

In [53]:
import pandas as pd

df = pd.read_csv("../../data/generated/validation_lyrics.csv")

In [63]:
hey_lyrics = df.iloc[7]['lyrics']

In [75]:
df

Unnamed: 0,artist,lyrics
0,The Beatles,"In a town where the shadows play, \nWhispers ..."
1,Queen,"In the heart of the city, where the shadows pl..."
2,Bob Dylan,"In the shadow of the rusty train, \nWhere whi..."
3,Taylor Swift,"In the corner of a crowded room, \nYou brushe..."
4,Ed Sheeran,"In the glow of city lights, where shadows soft..."
5,Demi Lovato,"I fell down like a comet, \nBurned bright but..."
6,Sia,"In the shadows where the lost hearts dance, \..."
7,Eminem,"Yo, I’m boxed in a cage, rage like a lion, \n..."
8,Johnny Cash,"In the shadows of the evening, where the lonel..."
9,Lady Gaga,"In the neon glow, where the shadows dance, \n..."
