In [4]:
import numpy as np 
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


In [5]:
df = pd.read_csv("spotify_millsongdata.csv")

df.head()

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [6]:
df.isnull().sum()

artist    0
song      0
link      0
text      0
dtype: int64

In [7]:
df.head(5)

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [8]:
df.describe()

Unnamed: 0,artist,song,link,text
count,57650,57650,57650,57650
unique,643,44824,57650,57494
top,Donna Summer,Have Yourself A Merry Little Christmas,/a/abba/ahes+my+kind+of+girl_20598417.html,I just came back from a lovely trip along the ...
freq,191,35,1,6


In [9]:
df = df.sample(5000).drop('link', axis=1).reset_index(drop=True)

In [10]:

df.shape


(5000, 3)

In [11]:
df['text']

0       Michelle, little girl I need you baby  \r\nMor...
1       I remember when you were born  \r\nI felt like...
2       I know it's late, I know you're weary  \r\nI k...
3       Key- P-E-Chother  \r\nFrom-distress-of-the-hea...
4       Let me apologize to begin with  \r\nLet me apo...
                              ...                        
4995    Oh Danny boy, the pipes, the pipes are calling...
4996    Something has changed within me  \r\nSomething...
4997    Seems the time is near for finding a place out...
4998    Woke up this morning, my baby's gone  \r\nWoke...
4999    Through the windswept coastal trees  \r\nWhere...
Name: text, Length: 5000, dtype: object

In [12]:
df['text'] = df['text'].str.replace(r'^\w\s', ' ').replace(r'\n', ' ', regex=True)

df['text'].head()

0    Michelle, little girl I need you baby  \r More...
1    I remember when you were born  \r I felt like ...
2    I know it's late, I know you're weary  \r I kn...
3    Key- P-E-Chother  \r From-distress-of-the-hear...
4    Let me apologize to begin with  \r Let me apol...
Name: text, dtype: object

In [13]:
import nltk
from nltk.stem.porter import PorterStemmer

stemmer = PorterStemmer()

def token(txt):
    token = nltk.word_tokenize(txt)
    a = [stemmer.stem(word) for word in token] 
    return " ".join(a)

In [14]:
token("you are beautiful, beauty is in the eyes of the beholder")

'you are beauti , beauti is in the eye of the behold'

In [15]:
df['text'].apply(lambda x: token(x))

0       michel , littl girl i need you babi more than ...
1       i rememb when you were born i felt like one lu...
2       i know it 's late , i know you 're weari i kno...
3       key- p-e-choth from-distress-of-the-heart for-...
4       let me apolog to begin with let me apolog for ...
                              ...                        
4995    oh danni boy , the pipe , the pipe are call fr...
4996    someth ha chang within me someth is not the sa...
4997    seem the time is near for find a place outsid ...
4998    woke up thi morn , my babi 's gone woke up thi...
4999    through the windswept coastal tree where the d...
Name: text, Length: 5000, dtype: object

In [16]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


In [17]:
tfid = TfidfVectorizer(analyzer= 'word', stop_words='english')
matrix = tfid.fit_transform(df['text'])

In [18]:
similarity = cosine_similarity(matrix)

In [19]:
def recommender(song_name):
    index = df[df['song'] == song_name].index[0]
    distance = sorted(list(enumerate(similarity[index])) , reverse = True, key = lambda x: x[1])
    song = []

    for s_id in distance[1:21]:
        song.append(df.iloc[s_id[0]].song)

    return song   

In [20]:
df.tail(5)

Unnamed: 0,artist,song,text
4995,Roy Orbison,Danny Boy,"Oh Danny boy, the pipes, the pipes are calling..."
4996,Glee,Defying Gravity,Something has changed within me \r Something ...
4997,Nick Drake,Outside,Seems the time is near for finding a place out...
4998,Stevie Wonder,My Baby's Gone,"Woke up this morning, my baby's gone \r Woke ..."
4999,Nick Cave,Supernaturally,Through the windswept coastal trees \r Where ...


In [23]:
recommender('Danny Boy')

['Choose Your Friend',
 'Come Back To Me',
 'Boy',
 'You Took The Words Right Out Of My Mouth',
 'World Of Love',
 'Stronger Than Before',
 'Our Time Is Here',
 'Million Dollar Bill',
 'Come On Get It',
 'Last Chance',
 'Uh Oh',
 'O Holy Night',
 'Ten Minutes Ago',
 'Because We Can',
 'And No More Shall We Part',
 'Red Camaro',
 "If I Can't Have You",
 'Heartache For Everyone',
 'Future Boy (live)',
 "Summer's Almost Gone"]

In [24]:
import pickle

In [25]:
pickle.dump(similarity, open('similarity.pkl', 'wb'))

In [26]:
pickle.dump(df, open('df.pkl', 'wb'))