In [4]:
import pandas as pd
import numpy as np
import nltk
import pickle
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


In [5]:
df=pd.read_csv("spotify_millsongdata.csv")

In [6]:
# checking data:
df.shape

(57650, 4)

In [7]:
df.head(50)

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...
5,ABBA,Burning My Bridges,/a/abba/burning+my+bridges_20003011.html,"Well, you hoot and you holler and you make me ..."
6,ABBA,Cassandra,/a/abba/cassandra_20002811.html,Down in the street they're all singing and sho...
7,ABBA,Chiquitita,/a/abba/chiquitita_20002978.html,"Chiquitita, tell me what's wrong \r\nYou're e..."
8,ABBA,Crazy World,/a/abba/crazy+world_20003013.html,I was out with the morning sun \r\nCouldn't s...
9,ABBA,Crying Over You,/a/abba/crying+over+you_20177611.html,I'm waitin' for you baby \r\nI'm sitting all ...


In [8]:
df.tail(50)

Unnamed: 0,artist,song,link,text
57600,Zayn Malik,PillowTalk,/z/zayn+malik/pillowtalk_21106267.html,[Verse 1] \r\nClimb on board \r\nWe'll go sl...
57601,Zayn Malik,Rear View,/z/zayn+malik/rear+view_21107627.html,[Verse 1] \r\nCan your heart be mine in searc...
57602,Zayn Malik,She,/z/zayn+malik/she_21107624.html,[Verse 1] \r\nShe puts her spirit in a nightc...
57603,Zazie,Duo,/z/zazie/duo_20720322.html,Oui \r\nJe sens le vent \r\nJe sens la pluie...
57604,Zazie,Snowball,/z/zazie/snowball_20287523.html,Red lights on the skyscrapers \r\nSnow white ...
57605,Zebra,As I Said Before,/z/zebra/as+i+said+before_10198473.html,And I said before \r\nI don't want no more \...
57606,Zebra,Bears,/z/zebra/bears_20456403.html,In the middle of winter \r\nThe trees are bar...
57607,Zebra,But No More,/z/zebra/but+no+more_20162515.html,The time is now or never \r\nThe crashing is ...
57608,Zebra,Hard Living Without You,/z/zebra/hard+living+without+you_10198468.html,Nothing to say no place to hide \r\nI can't f...
57609,Zebra,When You Get There,/z/zebra/when+you+get+there_10198475.html,You wake up in the morning \r\nAnd you're not...


In [9]:
# checking null values:
df.isnull().sum()
# here no null value is present

artist    0
song      0
link      0
text      0
dtype: int64

In [10]:
df.drop(columns='link',inplace=True)

In [11]:
df

Unnamed: 0,artist,song,text
0,ABBA,Ahe's My Kind Of Girl,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante","Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,Making somebody happy is a question of give an...
...,...,...,...
57645,Ziggy Marley,Good Old Days,Irie days come on play \r\nLet the angels fly...
57646,Ziggy Marley,Hand To Mouth,Power to the workers \r\nMore power \r\nPowe...
57647,Zwan,Come With Me,all you need \r\nis something i'll believe \...
57648,Zwan,Desire,northern star \r\nam i frightened \r\nwhere ...


In [12]:
df['text']

0        Look at her face, it's a wonderful face  \r\nA...
1        Take it easy with me, please  \r\nTouch me gen...
2        I'll never know why I had to go  \r\nWhy I had...
3        Making somebody happy is a question of give an...
4        Making somebody happy is a question of give an...
                               ...                        
57645    Irie days come on play  \r\nLet the angels fly...
57646    Power to the workers  \r\nMore power  \r\nPowe...
57647    all you need  \r\nis something i'll believe  \...
57648    northern star  \r\nam i frightened  \r\nwhere ...
57649    come in  \r\nmake yourself at home  \r\ni'm a ...
Name: text, Length: 57650, dtype: object

In [13]:
# taking sampleof tghe data set because the data set is very big :
df=df.sample(10000)
df.reset_index(inplace=True)

In [14]:
df

Unnamed: 0,index,artist,song,text
0,30983,Ella Fitzgerald,Come Rain Or Come Shine,"I'm gonna love you, like nobody's loved you \..."
1,20557,Van Morrison,Crazy Face,All the people were waiting for crazy face \r...
2,43603,Michael Bolton,She Did The Same Thing,I see you finally found a set of rules \r\nTh...
3,21080,Warren Zevon,Worrier King,Written by Warren Zevon 1993 Zevon Music BMI ...
4,46148,Noa,Path To Follow,I found a path to follow \r\nI knew it straig...
...,...,...,...,...
9995,1213,The Beatles,Carol,"Oh Carol, don't let him \r\nSteal your heart ..."
9996,11313,Leann Rimes,She's Got You,I've got your picture \r\nThat you gave to me...
9997,22764,ZZ Top,Lizard Life,I got the attitude \r\nAnd that's all I ever ...
9998,15479,Pat Benatar,Shadows Of The Night,We're running with the shadows of the night \...


In [15]:
df.shape

(10000, 4)

In [16]:
# cleaning text 
df['text']=df['text'].str.lower()

In [17]:
df['text']=df['text'].str.replace(r'^\w,\s',' ').replace(r'\n',' ',regex=True)

  df['text']=df['text'].str.replace(r'^\w,\s',' ').replace(r'\n',' ',regex=True)


In [18]:
df['text']

0       i'm gonna love you, like nobody's loved you  \...
1       all the people were waiting for crazy face  \r...
2       i see you finally found a set of rules  \r tha...
3       written by warren zevon 1993 zevon music bmi  ...
4       i found a path to follow  \r i knew it straigh...
                              ...                        
9995    oh carol, don't let him  \r steal your heart a...
9996    i've got your picture  \r that you gave to me ...
9997    i got the attitude  \r and that's all i ever s...
9998    we're running with the shadows of the night  \...
9999    well we walk, down the road, everyday  \r with...
Name: text, Length: 10000, dtype: object

In [19]:
stemmer =PorterStemmer()

In [20]:
#stemming :
# this is the process of converting the word to its root form :

def token(txt):
    wordlist=[];
    token =nltk.word_tokenize(txt)
    for word in token:
        wordlist.append(stemmer.stem(word))
    return " ".join(wordlist)

In [21]:
# now this will convert the words with similar meaning to the root words:

token("likes liked likely liking")

'like like like like'

In [22]:
# this will apply the token function on the all text column (time consumming)
df['text'].apply(lambda x:token(x))
    

0       i 'm gon na love you , like nobodi 's love you...
1       all the peopl were wait for crazi face he said...
2       i see you final found a set of rule that came ...
3       written by warren zevon 1993 zevon music bmi i...
4       i found a path to follow i knew it straight aw...
                              ...                        
9995    oh carol , do n't let him steal your heart awa...
9996    i 've got your pictur that you gave to me and ...
9997    i got the attitud and that 's all i ever say d...
9998    we 're run with the shadow of the night so bab...
9999    well we walk , down the road , everyday with a...
Name: text, Length: 10000, dtype: object

In [23]:
df

Unnamed: 0,index,artist,song,text
0,30983,Ella Fitzgerald,Come Rain Or Come Shine,"i'm gonna love you, like nobody's loved you \..."
1,20557,Van Morrison,Crazy Face,all the people were waiting for crazy face \r...
2,43603,Michael Bolton,She Did The Same Thing,i see you finally found a set of rules \r tha...
3,21080,Warren Zevon,Worrier King,written by warren zevon 1993 zevon music bmi ...
4,46148,Noa,Path To Follow,i found a path to follow \r i knew it straigh...
...,...,...,...,...
9995,1213,The Beatles,Carol,"oh carol, don't let him \r steal your heart a..."
9996,11313,Leann Rimes,She's Got You,i've got your picture \r that you gave to me ...
9997,22764,ZZ Top,Lizard Life,i got the attitude \r and that's all i ever s...
9998,15479,Pat Benatar,Shadows Of The Night,we're running with the shadows of the night \...


In [24]:
#  the tfidf will convert each sentence and take every single word and calculate the freqency of their occurence 
tfidf=TfidfVectorizer(analyzer='word',stop_words='english')

In [25]:
# this will apply the vectorization and convert the data into form of a sparse matrix
matrix=tfidf.fit_transform(df['text'])

In [26]:
#  this will allow to access the matrix
similar =cosine_similarity(matrix)

In [27]:
similar[0]

array([1.        , 0.01107807, 0.05061073, ..., 0.01394486, 0.09605146,
       0.08057054])

In [29]:
df[df['song']=='Crazy Face'].index[0]

1

In [30]:
# recommendation :
def recommend(song_name):
    idx= df[df['song']==song_name].index[0]
    distance=list(sorted(enumerate(similar[idx]), reverse=True,key=lambda x:x[1]))    
    song=[]
    for sid in distance[1:5]:
        song.append(df.iloc[sid[0]].song)
    return song;
        

In [33]:
recommend('Crazy Face')

['Jesse James',
 'Face To Face',
 'The Restroom Door Said,',
 'You Never Can Tell With Friends']

In [34]:
pickle.dump(similar,open("similarity","wb"))

In [35]:
pickle.dump(df,open("df","wb"))