In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("spotify_millsongdata.csv")

In [3]:
df.head(5)

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [4]:
df.tail(5)

Unnamed: 0,artist,song,link,text
57645,Ziggy Marley,Good Old Days,/z/ziggy+marley/good+old+days_10198588.html,Irie days come on play \r\nLet the angels fly...
57646,Ziggy Marley,Hand To Mouth,/z/ziggy+marley/hand+to+mouth_20531167.html,Power to the workers \r\nMore power \r\nPowe...
57647,Zwan,Come With Me,/z/zwan/come+with+me_20148981.html,all you need \r\nis something i'll believe \...
57648,Zwan,Desire,/z/zwan/desire_20148986.html,northern star \r\nam i frightened \r\nwhere ...
57649,Zwan,Heartsong,/z/zwan/heartsong_20148991.html,come in \r\nmake yourself at home \r\ni'm a ...


In [5]:
df.shape

(57650, 4)

In [6]:
df.isnull().sum()

artist    0
song      0
link      0
text      0
dtype: int64

In [7]:
df =df.sample(5000).drop('link', axis=1).reset_index(drop=True)

In [8]:
df.head(10)

Unnamed: 0,artist,song,text
0,Bing Crosby,Count Your Blessings,When I'm worried and I can't sleep \r\nI coun...
1,Peter Gabriel,Eindringling,"Ich find ueberall einlass, ich knacke fenster ..."
2,Violent Femmes,Jesus Of Rio,Christ the Redeemer towering high over Rio \r...
3,Aerosmith,Get It Up,Take me on your rocking horse \r\nHit the lig...
4,Everlast,Change Is Gonna Come,I was born by the river in a little tent \r\n...
5,Peter Gabriel,Blood Of Eden,I caught sight of my reflection \r\nI caught ...
6,David Guetta,Lovers On The Sun,"Let's light it up, let's light it up \r\nUnti..."
7,Pearl Jam,In The Moonlight,"In the moonlight \r\nGettin', why I ought to ..."
8,Ray Charles,"Gee, Baby Ain't I Good To You",Love makes me treat you the way that I do \r\...
9,Jimi Hendrix,If 6 Was 9,"Yeah, sing the song, Bro \r\n \r\nIf the sun..."


In [9]:
df['text'][0]

"When I'm worried and I can't sleep  \r\nI count my blessings instead of sheep  \r\nAnd I fall asleep counting my blessings  \r\nWhen my bankroll is getting small  \r\nI think of when I had none at all  \r\nAnd I fall asleep counting my blessings  \r\n  \r\nI think about a nursery and I picture curly heads  \r\nAnd one by one I count them as they slumber in their beds  \r\nIf you're worried and you can't sleep  \r\nJust count your blessings instead of sheep  \r\nAnd you'll fall asleep counting your blessings  \r\n  \r\nI think about a nursery and I picture curly heads  \r\nAnd one by one I count them as they slumber in their beds  \r\nIf you're worried and you can't sleep  \r\nJust count your blessings instead of sheep  \r\nAnd you'll fall asleep counting your blessings\r\n\r\n"

In [10]:
df.shape

(5000, 3)

Text Cleaning/ Text Preprocessing

In [11]:
df['text'] = df['text'].str.lower().replace(r'^\w\s', ' ').replace(r'\n', ' ', regex = True)

In [12]:
import nltk
from nltk.stem.porter import PorterStemmer
stemmer = PorterStemmer()

def tokenization(txt):
    tokens = nltk.word_tokenize(txt)
    stemming = [stemmer.stem(w) for w in tokens]
    return " ".join(stemming)

In [13]:
df['text'] = df['text'].apply(lambda x: tokenization(x))

In [14]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [15]:
tfidvector = TfidfVectorizer(analyzer='word',stop_words='english')
matrix = tfidvector.fit_transform(df['text'])
similarity = cosine_similarity(matrix)

In [16]:
similarity[0]

array([1.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
       3.05216065e-03, 0.00000000e+00, 7.93659360e-04])

In [20]:
df[df['song'] == 'Count Your Blessings']

Unnamed: 0,artist,song,text
0,Bing Crosby,Count Your Blessings,when i 'm worri and i ca n't sleep i count my ...


In [21]:
def recommendation(song_df):
    idx = df[df['song'] == song_df].index[0]
    distances = sorted(list(enumerate(similarity[idx])),reverse=True,key=lambda x:x[1])
    
    songs = []
    for m_id in distances[1:21]:
        songs.append(df.iloc[m_id[0]].song)
        
    return songs

In [22]:
recommendation('Count Your Blessings')

['Count On Me',
 'The Blessings',
 'Little Girl Blue',
 'All The Lovely Ladies',
 'Lets Make Every Moment Count',
 'I Will',
 'Let It Go',
 "I'm Alive",
 "Don't Give Up",
 'Center Stage',
 'Counting On Me',
 'Pump Up The Valium',
 'Falling Into You',
 'The Door Into Summer',
 'Perfect World',
 'Lonely Is The Word',
 'God Bless The Models',
 'You Are Beautiful To Me',
 'I Get No Sleep',
 'Close Your Eyes']

In [23]:
import pickle
pickle.dump(similarity,open('similarity.pkl','wb'))
pickle.dump(df,open('df.pkl','wb'))