# Song Recommendations

추준호(20224224)

Source: https://github.com/jalammar/jalammar.github.io/blob/master/notebooks/nlp/02_Song_Embeddings.ipynb

## Import data

In [2]:
import requests

In [3]:
url = 'https://storage.googleapis.com/maps-premium/dataset/yes_complete/train.txt'

In [5]:
resp = requests.get(url)

In [8]:
lines = resp.text.split('\n')[2:]

In [9]:
len(lines)

11138

In [10]:
lines[0]

'0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 2 42 43 44 45 46 47 48 20 49 8 50 51 52 53 54 55 56 57 25 58 59 60 61 62 3 63 64 65 66 46 47 67 2 48 68 69 70 57 50 71 72 53 73 25 74 59 20 46 75 76 77 59 20 43 '

In [15]:
playlists = [x.strip().split() for x in lines]
len(playlists)

11138

In [16]:
playlists = [x for x in playlists if len(x) > 1]
len(playlists)

11088

## Training the Word2Vec Model

In [17]:
from gensim.models import Word2Vec

In [18]:
model = Word2Vec(
    playlists, vector_size=32, window=20, negative=50, min_count=1, workers=4
)

## Song Title and Artist File

In [19]:
url = 'https://storage.googleapis.com/maps-premium/dataset/yes_complete/song_hash.txt'

In [20]:
resp = requests.get(url)

In [21]:
songs = resp.text.split('\n')
songs = [x.strip().split('\t') for x in songs]
songs[:3]

[['0 ', 'Gucci Time (w\\/ Swizz Beatz)', 'Gucci Mane'],
 ['1 ', 'Aston Martin Music (w\\/ Drake & Chrisette Michelle)', 'Rick Ross'],
 ['2 ', 'Get Back Up (w\\/ Chris Brown)', 'T.I.']]

In [22]:
import pandas as pd

In [23]:
df_songs = pd.DataFrame(data=songs, columns =['id','title','artist'])

In [24]:
df_songs

Unnamed: 0,id,title,artist
0,0,Gucci Time (w\/ Swizz Beatz),Gucci Mane
1,1,Aston Martin Music (w\/ Drake & Chrisette Mich...,Rick Ross
2,2,Get Back Up (w\/ Chris Brown),T.I.
3,3,Hot Toddy (w\/ Jay-Z & Ester Dean),Usher
4,4,Whip My Hair,Willow
...,...,...,...
75258,75258,USA Today,Alan Jackson
75259,75259,Superstar,Raul Malo
75260,75260,Romancin' The Blues,Giacomo Gates
75261,75261,Inner Change,The Jazzmasters


In [25]:
df_songs['id'] = df_songs['id'].apply(lambda x: x.strip())

In [26]:
df_songs = df_songs.set_index('id')

In [27]:
df_songs

Unnamed: 0_level_0,title,artist
id,Unnamed: 1_level_1,Unnamed: 2_level_1
0,Gucci Time (w\/ Swizz Beatz),Gucci Mane
1,Aston Martin Music (w\/ Drake & Chrisette Mich...,Rick Ross
2,Get Back Up (w\/ Chris Brown),T.I.
3,Hot Toddy (w\/ Jay-Z & Ester Dean),Usher
4,Whip My Hair,Willow
...,...,...
75258,USA Today,Alan Jackson
75259,Superstar,Raul Malo
75260,Romancin' The Blues,Giacomo Gates
75261,Inner Change,The Jazzmasters


In [28]:
df_songs[df_songs['artist'] == 'Norah Jones']

Unnamed: 0_level_0,title,artist
id,Unnamed: 1_level_1,Unnamed: 2_level_1
4846,Don't Know Why,Norah Jones
4896,Chasing Pirates,Norah Jones
4905,Sunrise,Norah Jones
4978,Come Away With Me,Norah Jones
5126,Thinking About You,Norah Jones
7122,Tell Yer Mama,Norah Jones
11133,Turn Me On,Norah Jones
11155,Feelin' The Same Way,Norah Jones
12981,Be My Somebody,Norah Jones
13039,It's Gonna Be,Norah Jones


In [30]:
df_songs.iloc[[1,10,100]]

Unnamed: 0_level_0,title,artist
id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Aston Martin Music (w\/ Drake & Chrisette Mich...,Rick Ross
10,Shake It,Elephant Man
100,I'm Yours,Jason Mraz


# Recommend similar songs

In [31]:
song_id = '4846'

In [32]:
df_songs.loc[song_id]

title     Don't Know Why
artist       Norah Jones
Name: 4846, dtype: object

In [34]:
similars = model.wv.most_similar(positive=song_id)
similars

[('5219', 0.992164134979248),
 ('1199', 0.9915337562561035),
 ('3938', 0.988633930683136),
 ('5260', 0.9882075190544128),
 ('8405', 0.9879229068756104),
 ('8379', 0.9864093065261841),
 ('4791', 0.9860796928405762),
 ('8535', 0.9859747290611267),
 ('3856', 0.9835509657859802),
 ('5176', 0.9828998446464539)]

In [35]:
import numpy as np

In [40]:
similars_ids = np.array(similars)[:,0]

In [41]:
df_songs.loc[similars_ids]

Unnamed: 0_level_0,title,artist
id,Unnamed: 1_level_1,Unnamed: 2_level_1
5219,Fly Like An Eagle,Seal
1199,Put Your Records On,Corinne Bailey Rae
3938,Missing,Everything But The Girl
5260,White Flag,Dido
8405,Angel,Sarah McLachlan
8379,True,Spandau Ballet
4791,Sunday Morning,Maroon 5
8535,I Can't Tell You Why,The Eagles
3856,It Ain't Over 'til It's Over,Lenny Kravitz
5176,Say,John Mayer


In [46]:
def print_recommendations(song_id):
    print(df_songs.loc[song_id])
    
    similars = model.wv.most_similar(positive=song_id)
    similar_idx = np.array(similars)[:,0]
    
    return df_songs.loc[similar_idx]

# More Examples

### Master of Puppets = metallica

In [47]:
print_recommendations('2068')

title     Master Of Puppets
artist            Metallica
Name: 2068, dtype: object


Unnamed: 0_level_0,title,artist
id,Unnamed: 1_level_1,Unnamed: 2_level_1
2104,Life Won't Wait,Ozzy Osbourne
1849,Bad Company,Five Finger Death Punch
6685,The Trooper,Iron Maiden
2164,Voodoo,Godsmack
2062,Outshined,Soundgarden
5479,Wild Side,Motley Crue
2014,Youth Gone Wild,Skid Row
1792,Love Hate Sex Pain,Godsmack
2066,Whatever,Godsmack
1922,One,Metallica


### Billi Jean - Michael Jackson

In [49]:
print_recommendations('3822')

title         Billie Jean
artist    Michael Jackson
Name: 3822, dtype: object


Unnamed: 0_level_0,title,artist
id,Unnamed: 1_level_1,Unnamed: 2_level_1
4187,I Wanna Dance With Somebody (Who Loves Me),Whitney Houston
15660,Let The Music Play,Shannon
4181,Kiss,Prince & The Revolution
12749,Wanna Be Startin' Somethin',Michael Jackson
4157,P.Y.T. (Pretty Young Thing),Michael Jackson
3942,I Would Die 4 U,Prince & The Revolution
3384,Hungry Eyes,Eric Carmen
4013,Down Under,Men At Work
1506,The Way You Make Me Feel,Michael Jackson
3357,Manic Monday,The Bangles
