# MusicRec
## This is a mini project to recommend songs/artists based on lyrics

### Load required packages

In [1]:
import sys
import sklearn
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import os
np.random.seed(0)
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import pickle

### Load data
Using data from ['55,000+ Song Lyrics'](https://www.kaggle.com/mousehead/songlyrics) dataset on Kaggle

In [2]:
songdata= pd.read_csv('../data/songdata.csv')

In [3]:
songdata.head()

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \nAnd..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \nTouch me gentl..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \nWhy I had t...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [4]:
len(songdata)

57650

### First try using only ABBA songs

In [5]:
minisongdata=songdata[songdata['artist']=='ABBA']

In [6]:
len(minisongdata)

113

In [7]:
# Change this cache directory to something generic later
TFHUB_CACHE_DIR = "../data/my_tfhub_cache"
os.environ["TFHUB_CACHE_DIR"] = TFHUB_CACHE_DIR

In [8]:
# Using the NNLM (Neural Net Language Model) English 50 dimensional embedding trained on English Google News 7B Corpus
# Can play around with different embeddings here
import tensorflow_hub as hub
embed = hub.load("https://tfhub.dev/google/tf2-preview/nnlm-en-dim50/1")

In [9]:
# Test embedding
embeddings = embed(["cat is on the mat", "dog is in the fog"])

In [10]:
len(embeddings[1])

50

In [11]:
lyricslist = list(minisongdata['text'])

In [12]:
lyrics_embeddings = embed(lyricslist)

In [13]:
# Length of an embedding
len(lyrics_embeddings[0])

50

### Semantic search for songs

In [14]:
# Not that great. Possibly due to large mismatch between length of input query and actual lyrics
import scipy
query = 'Shes just my kind of girl, without her Im blue'

queries = [query]
query_embeddings = embed(queries)

number_top_matches = 10

print("Semantic Search Results")

for query, query_embedding in zip(queries, query_embeddings):
    distances = scipy.spatial.distance.cdist([query_embedding], lyrics_embeddings, "cosine")[0]

    results = zip(range(len(distances)), distances)
    results = sorted(results, key=lambda x: x[1])

    print("\n\n======================\n\n")
    print("Query:", query)
    print(f"\nTop {number_top_matches} most similar songs:")

    for idx, distance in results[0:number_top_matches]:
        print(minisongdata.iloc[idx]['song'].strip(), "(Cosine Score: %.4f)" % (1-distance))

Semantic Search Results




Query: Shes just my kind of girl, without her Im blue

Top 10 most similar songs:
Love Isn't Easy (Cosine Score: 0.6501)
Lay All Your Love On Me (Cosine Score: 0.6498)
My Love, My Life (Cosine Score: 0.6480)
Crazy World (Cosine Score: 0.6360)
Gonna Sing You My Lovesong (Cosine Score: 0.6319)
Me And Bobby And Bobby's Brother (Cosine Score: 0.6262)
Rubber Ball Man (Cosine Score: 0.6235)
Tiger (Cosine Score: 0.6209)
So Long (Cosine Score: 0.6190)
Ahe's My Kind Of Girl (Cosine Score: 0.6132)


In [15]:
minisongdata.iloc[0]['song']

"Ahe's My Kind Of Girl"

In [16]:
minisongdata.iloc[0]['text']

"Look at her face, it's a wonderful face  \nAnd it means something special to me  \nLook at the way that she smiles when she sees me  \nHow lucky can one fellow be?  \n  \nShe's just my kind of girl, she makes me feel fine  \nWho could ever believe that she could be mine?  \nShe's just my kind of girl, without her I'm blue  \nAnd if she ever leaves me what could I do, what could I do?  \n  \nAnd when we go for a walk in the park  \nAnd she holds me and squeezes my hand  \nWe'll go on walking for hours and talking  \nAbout all the things that we plan  \n  \nShe's just my kind of girl, she makes me feel fine  \nWho could ever believe that she could be mine?  \nShe's just my kind of girl, without her I'm blue  \nAnd if she ever leaves me what could I do, what could I do?\n\n"

### Now let's try using all songs

In [17]:
lyricslist = list(songdata['text'])

In [18]:
lyrics_embeddings = embed(lyricslist)

In [21]:
# Save the embeddings
pickle.dump(lyrics_embeddings, open('../models/lyrics_embeddings.pkl', 'wb'))

In [22]:
query = 'Shes just my kind of girl, without her Im blue'

queries = [query]
query_embeddings = embed(queries)

number_top_matches = 10

print("Semantic Search Results")

for query, query_embedding in zip(queries, query_embeddings):
    distances = scipy.spatial.distance.cdist([query_embedding], lyrics_embeddings, "cosine")[0]

    results = zip(range(len(distances)), distances)
    results = sorted(results, key=lambda x: x[1])

    print("\n\n======================\n\n")
    print("Query:", query)
    print(f"\nTop {number_top_matches} most similar songs:")

    for idx, distance in results[0:number_top_matches]:
        print(songdata.iloc[idx]['song'].strip(),'by', songdata.iloc[idx]['artist'].strip(), "(Cosine Score: %.4f)" % (1-distance))

Semantic Search Results




Query: Shes just my kind of girl, without her Im blue

Top 10 most similar songs:
Since I Lost My Baby by The Temptations (Cosine Score: 0.7654)
My Baby's Good To Me by Fleetwood Mac (Cosine Score: 0.7501)
Pride And Joy by Stevie Ray Vaughan (Cosine Score: 0.7432)
Prince Of Peace by Amy Grant (Cosine Score: 0.7427)
Sugar Sweet by Freddie King (Cosine Score: 0.7213)
Out Of Time by Ramones (Cosine Score: 0.7211)
My Baby by Neil Sedaka (Cosine Score: 0.7189)
Make Up by Lou Reed (Cosine Score: 0.7185)
My Little Shitzu by Ramones (Cosine Score: 0.7174)
Two Of A Kind, Workin' On A Full House by Garth Brooks (Cosine Score: 0.7174)


## Song recommendations

### But first, some helper blocks to search for songs/artists for testing

In [23]:
# Search for all songs by an artist
songdata[songdata['artist'].str.contains('metallica', case=False)]

Unnamed: 0,artist,song,link,text
12787,Metallica,2X4,/m/metallica/2x4_20092062.html,"I'm gonna make you, shake you, take you \nI'm..."
12788,Metallica,All Nightmare Long,/m/metallica/all+nightmare+long_20756984.html,"(One, two...) \n \nLuck runs out \n \nCraw..."
12789,Metallica,All Within My Hands,/m/metallica/all+within+my+hands_10178918.html,"All within my hands \nSqueeze it in, crush it..."
12790,Metallica,Battery,/m/metallica/battery_20092039.html,"Lashing out the action, returning the reaction..."
12791,Metallica,Brandenburg Gate,/m/metallica/brandenburg+gate_20982888.html,I would cut my legs and tits off \nWhen I thi...
...,...,...,...,...
43561,Metallica,Spit Out The Bone,/m/metallica/spit+out+the+bone_21111998.html,Come unto me and you will feel perfection \nC...
43562,Metallica,Stone Dead Forever,/m/metallica/stone+dead+forever_20092019.html,And didn't you see me in the glass \nAnd didn...
43563,Metallica,Wherever I May Roam,/m/metallica/wherever+i+may+roam_20092060.html,And the road becomes my bride \nI have stripp...
43564,Metallica,Whiplash,/m/metallica/whiplash_20092037.html,"Jason, you're too fucking metal man \nToo met..."


In [24]:
# Search for all songs by song name
songdata[songdata['song'].str.contains('Beautiful', case=False)]

Unnamed: 0,artist,song,link,text
707,Amy Grant,Beautiful Music,/a/amy+grant/beautiful+music_20007617.html,"Once my life had no direction, I was lost as c..."
1036,Barbra Streisand,"America, The Beautiful",/b/barbra+streisand/america+the+beautiful_1002...,"O beautiful for spacious skies, \nFor amber w..."
1082,Barbra Streisand,His Love Makes Me Beautiful,/b/barbra+streisand/his+love+makes+me+beautifu...,Fanny is now part of the Ziegfeld Follies. She...
1234,Beautiful South,Big Beautiful South,/b/beautiful+south/big+beautiful+south_2059228...,My pod is bigger than your pod \nMy life is b...
1468,Bing Crosby,Oh What A Beautiful Mornin,/b/bing+crosby/oh+what+a+beautiful+mornin_2052...,"There's a bright golden haze on the meadow, \..."
...,...,...,...,...
55630,Waterboys,She Is So Beautiful,/w/waterboys/she+is+so+beautiful_20145462.html,She is so beautiful \nI've got no words to de...
55963,Westlife,Beautiful In White,/w/westlife/beautiful+in+white_21106550.html,Not sure if you know this \nBut when we first...
56522,Willie Nelson,America The Beautiful,/w/willie+nelson/america+the+beautiful_2080765...,O beautiful for spacious skies \nFor amber wa...
56562,Willie Nelson,Everything's Beautiful (In It's Own Way),/w/willie+nelson/everythings+beautiful+in+its+...,When I look out over a green field of clover ...


In [25]:
# Search for song and artist combination
songdata[songdata['song'].str.contains('beautiful', case=False) & songdata['artist'].str.contains('streisand', case=False)]

Unnamed: 0,artist,song,link,text
1036,Barbra Streisand,"America, The Beautiful",/b/barbra+streisand/america+the+beautiful_1002...,"O beautiful for spacious skies, \nFor amber w..."
1082,Barbra Streisand,His Love Makes Me Beautiful,/b/barbra+streisand/his+love+makes+me+beautifu...,Fanny is now part of the Ziegfeld Follies. She...
24570,Barbra Streisand,Beautiful,/b/barbra+streisand/beautiful_20012564.html,You've gotta get up every morning \nPut a smi...


### Song recommendations based on lyrics

In [26]:
# Choosing a song based on the index number
pos=24213

query = songdata.iloc[pos]['text']

queries = [query]
query_embeddings = embed(queries)

number_top_matches = 10

print("Song recommendations")

for query, query_embedding in zip(queries, query_embeddings):
    distances = scipy.spatial.distance.cdist([query_embedding], lyrics_embeddings, "cosine")[0]

    results = zip(range(len(distances)), distances)
    results = sorted(results, key=lambda x: x[1])

    print("\n\n======================\n\n")
    print("Query:", query)
    print(f"\nTop {number_top_matches} most similar songs:")

    for idx, distance in results[1:number_top_matches+1]:
        print(songdata.iloc[idx]['song'].strip(),'by', songdata.iloc[idx]['artist'].strip(), "(Similarity Score: %.4f)" % (1-distance))

Song recommendations




Query: [Verse 1]  
Don't need permission  
Made my decision to test my limits  
Cause it's my business, God as my witness  
Start what I finished  
Don't need no hold up  
Taking control of this kind of moment  
I'm locked and loaded  
Completely focused, my mind is open  
  
[Pre-Chorus]  
All that you got, skin to skin, oh my God  
Don't ya stop, boy  
  
[Chorus]  
Somethin' 'bout you makes me feel like a dangerous woman  
Somethin' 'bout, somethin' 'bout, somethin' 'bout you  
Makes me wanna do things that I shouldn't  
Somethin' 'bout, somethin' 'bout, somethin' 'bout  
  
[Verse 2]  
Nothing to prove and I'm bulletproof and  
Know what I'm doing  
The way we're movin' like introducing  
Us to a new thing  
I wanna savor, save it for later  
To taste the flavor, cause I'm a taker  
Cause I'm a giver, it's only nature  
I live for danger  
  
[Pre-Chorus]  
All that you got, skin to skin, oh my God  
Don't ya stop, boy  
  
[Chorus]  
Somethin' 'bout you ma

In [27]:
songdata[songdata['song'].str.contains('girl', case=False) & songdata['artist'].str.contains('prince', case=False)].iloc[0]['text']

"Girl  \nYou excite me so  \nOoh wee baby, your body's like no other  \n  \nGirl  \nIt's you I gotta know (Gotta know)  \nOoh wee baby, I bet you're quite a lover  \n  \nGirl (Ooh girl)  \nI want to take you home  \nOoh wee baby you dream I scream inside you  \n  \nI want you in the worst way  \nYou make me  \n  \nGirl  \nGirl  \n  \nGirl  \nWon't you kiss me?  \nOoh wee baby, my lips they want you so  \n  \nGirl  \nHow can you resist me? (How can you resist me?)  \nThe smell of animal lust is a-all over me  \n(The smell of animal lust is a-all over me)  \n  \nOh, girl  \nIf I could hold your hand  \nI'd make you touch my body until you understand  \n  \nI'm your man (I'm your man)  \nAll night, all day (All night, all day)  \nI want you in the worst way  \nYou make me (You make me)  \n  \nCaress the flower  \nWarm, warm  \nBring it to the garden  \nThe garden  \nBe poetic  \nTell me what it feels like,  \nA sea of electricity?  \nNow that's wonderful  \nTalk to you? Talk to you?  \nWh

## Artist Recommendations

### Recommend similar artists based on lyrics

In [28]:
# Join all the lyrics for artists together
songdata.groupby('artist').agg({'text': ' '.join})

Unnamed: 0_level_0,text
artist,Unnamed: 1_level_1
'n Sync,Girl don't say that it's over \n'Cause you ar...
ABBA,"Look at her face, it's a wonderful face \nAnd..."
Ace Of Base,"Always have, always will \nI was mesmerized w..."
Adam Sandler,"Hello mudda, hello fadda \nHere I am at camp ..."
Adele,[Verse 1] \nI will leave my heart at the door...
...,...
Zoegirl,I've been thinking lately about you \nWhen it...
Zornik,"Is a dream a lie, when you know it won't come ..."
Zox,If I could write you a song \nWith all the ri...
Zucchero,"I tell ya brother, man shit happens \nBy the ..."


In [29]:
artistdata = songdata.groupby('artist').agg({'text': ' '.join})

In [30]:
artistdata.loc['ABBA']['text']



In [16]:
#import tensorflow_hub as hub
#embed = hub.load("https://tfhub.dev/google/tf2-preview/nnlm-en-dim50/1")

In [31]:
lyricslist_artist = list(artistdata['text'])

In [32]:
artist_embeddings = embed(lyricslist_artist)

In [33]:
len(artist_embeddings)

643

In [34]:
pickle.dump(artist_embeddings, open('../models/artist_embeddings.pkl', 'wb'))

In [36]:
import scipy
artist='Metallica'

query = artistdata.loc[artist]['text']

queries = [query]
query_embeddings = embed(queries)

number_top_matches = 10

print("Artist Recommendations")

for query, query_embedding in zip(queries, query_embeddings):
    distances = scipy.spatial.distance.cdist([query_embedding], artist_embeddings, "cosine")[0]

    results = zip(range(len(distances)), distances)
    results = sorted(results, key=lambda x: x[1])

    print("\n\n======================\n\n")
    print("Query:", artist)
    print(f"\nTop {number_top_matches} most similar artists:")

    for idx, distance in results[1:number_top_matches+1]:
        print(artistdata.iloc[idx].name, "(Similarity Score: %.4f)" % (1-distance))

Artist Recommendations




Query: Metallica

Top 10 most similar artists:
Alice In Chains (Similarity Score: 0.9947)
Radiohead (Similarity Score: 0.9916)
INXS (Similarity Score: 0.9910)
U2 (Similarity Score: 0.9910)
Judas Priest (Similarity Score: 0.9909)
Rainbow (Similarity Score: 0.9905)
Marilyn Manson (Similarity Score: 0.9904)
Alice Cooper (Similarity Score: 0.9902)
Extreme (Similarity Score: 0.9901)
Misfits (Similarity Score: 0.9899)


In [37]:
# Artist lookup
artistdata[artistdata.index.str.contains('swift', case=False)]

Unnamed: 0_level_0,text
artist,Unnamed: 1_level_1
Taylor Swift,You have a way of coming easily to me \nAnd w...


## Next steps
### We have a simple model to recommend songs or artists based on similar lyrics, but how can we improve it?
1. Can try other kinds of word embeddings.
2. Engineer other features like genre, sentiment
3. Can use a larger dataset of songs and artists, this is quite limited