In [48]:
import pandas as pd

In [49]:
df = pd.read_csv("spotify_millsongdata.csv")

In [50]:
df.head()

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [51]:
df.shape

(57650, 4)

In [52]:
df.isnull().sum()

artist    0
song      0
link      0
text      0
dtype: int64

In [53]:
df = df.sample(5000).drop("link", axis = 1).reset_index(drop=True)
df

Unnamed: 0,artist,song,text
0,Donna Summer,Journey To The Center Of Your Heart,Take me on a journey \r\nOn a journey to the ...
1,Tom Jones,Strange Boat,We're sailing on a strange boat \r\nHeading f...
2,The Killers,Glamorous Indie Rock And Roll,Glamorous \r\nIndie rock'n'roll is what I wan...
3,David Pomeranz,I Learned It All From You,I was a man of convinction \r\nItchin' in pur...
4,Pearl Jam,Severed Hand,"Big man stands behind an open door \r\nSaid, ..."
...,...,...,...
4995,Ween,Even If You Don't,I'm goin' crazy trying to keep you sane \r\nT...
4996,Fiona Apple,Get Gone,How many times do I have to say \r\nTo get aw...
4997,Kirsty Maccoll,Camel Crossing,In a dream of a desolate land \r\nFinding my ...
4998,Lana Del Rey,Dangerous Girl,"He's my drugstore cowboy, \r\nItalian aloic, ..."


In [54]:
df["text"][0]

"Take me on a journey  \r\nOn a journey to the centre of you heart  \r\nLet me make the journey  \r\nwant to journey to the centre of your heart  \r\nI want to go, i want to go  \r\nOh let me go, i want to go  \r\nBaby want to travel  \r\nwant to travel 'cross the borders of your mind  \r\nBaby want to travel  \r\nwant to travel 'cross the borders of your mind  \r\nI want to go, please let me go  \r\nI want to go, please let me go  \r\nTake me on a journey  \r\nOn a journey to the bottom of your soul  \r\nTake me on a journey  \r\nOn a journey to the bottom of your soul  \r\nI want to go, oh let me go  \r\nI want to go, oh let me go  \r\nBaby want to travel  \r\nwant to travel 'cross the borders of your mind  \r\nBaby want to travel  \r\nwant to travel 'cross the borders of your mind  \r\nI want to go, oh let me go  \r\nI want to go, oh let me go  \r\nTake me on a journey  \r\nOn a journey to the centre of your heart  \r\nLet me make the journey  \r\nwant to journey to the centre of yo

**Text Cleaning and Preprocessing**

In [55]:
df['text'] = df["text"].str.lower().replace(r"^\w\s", " ").replace(r"\n", " ", regex = True)

In [56]:
import nltk
from nltk.stem.porter import PorterStemmer

In [57]:
stemmer = PorterStemmer()

In [58]:
def token(txt):
  token = nltk.word_tokenize(txt)
  a = [stemmer.stem(w) for w in token]
  return " ".join(a)

In [59]:
token("you are beautiful, beauty")

'you are beauti , beauti'

In [60]:
df["text"].apply(lambda x: token(x))

0       take me on a journey on a journey to the centr...
1       we 're sail on a strang boat head for a strang...
2       glamor indi rock'n'rol is what i want it 's in...
3       i wa a man of convinct itchin ' in pure over-d...
4       big man stand behind an open door said , leav ...
                              ...                        
4995    i 'm goin ' crazi tri to keep you sane takin '...
4996    how mani time do i have to say to get away-get...
4997    in a dream of a desol land find my feet on the...
4998    he 's my drugstor cowboy , italian aloic , cla...
4999    you 're the queen of your own littl world , yo...
Name: text, Length: 5000, dtype: object

In [61]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [62]:
tfid = TfidfVectorizer(analyzer = "word", stop_words = "english")

In [63]:
matrix = tfid.fit_transform(df["text"])

In [64]:
matrix

<5000x23308 sparse matrix of type '<class 'numpy.float64'>'
	with 266640 stored elements in Compressed Sparse Row format>

In [65]:
similarity = cosine_similarity(matrix)

In [66]:
def recommender(song_name):
  idx = df[df["song"] == song_name].index[0]
  distance = sorted(list(enumerate(similarity[idx])), reverse = True, key = lambda x:x[1])
  song = []
  for s_id in distance[1:5]:
    song.append(df.iloc[s_id[0]].song)
    return song

In [68]:
recommender("In Your Eyes")

['Keep Coming Back']

In [69]:
import pickle

In [70]:
pickle.dump(similarity, open("similarity", "wb"))

In [71]:
pickle.dump(df, open("df", "wb"))