In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('Songs Dataset.csv')

In [3]:
df.head(5)

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [4]:
df.tail(5)

Unnamed: 0,artist,song,link,text
57645,Ziggy Marley,Good Old Days,/z/ziggy+marley/good+old+days_10198588.html,Irie days come on play \r\nLet the angels fly...
57646,Ziggy Marley,Hand To Mouth,/z/ziggy+marley/hand+to+mouth_20531167.html,Power to the workers \r\nMore power \r\nPowe...
57647,Zwan,Come With Me,/z/zwan/come+with+me_20148981.html,all you need \r\nis something i'll believe \...
57648,Zwan,Desire,/z/zwan/desire_20148986.html,northern star \r\nam i frightened \r\nwhere ...
57649,Zwan,Heartsong,/z/zwan/heartsong_20148991.html,come in \r\nmake yourself at home \r\ni'm a ...


In [5]:
df.shape

(57650, 4)

In [6]:
df.isnull().sum()

artist    0
song      0
link      0
text      0
dtype: int64

In [7]:
df =df.sample(5000).drop('link', axis=1).reset_index(drop=True)

In [8]:
df.head(10)

Unnamed: 0,artist,song,text
0,Bob Dylan,Floater,Down over the window \r\nFrom the dazzling su...
1,Willie Nelson,A New Way To Cry,"All my tears have fallen, I can cry no more. ..."
2,Waylon Jennings,Grey Eyes You Know,That cold north wind is blowing grey eyes you ...
3,Great Big Sea,"Dance, Dance","Friday night, seventeen \r\nGot my hands on t..."
4,Olivia Newton-John,Gaia,(Olivia Newton-John) \r\nI am your mother--bo...
5,Offspring,Come Out Swing,You brace and hold it all inside \r\nIt's mor...
6,Perry Como,Lollipops And Roses,"Tell her you care, each time you speak \r\nMa..."
7,Within Temptation,Forgiven,Couldn't save you from the start \r\nLove you...
8,Utada Hikaru,Another Chance,I'm playing back \r\nThe melody you hummed \...
9,John Mellencamp,Suzanne And The Jewels,Suzanne was the jewel keeper \r\nIn a word sh...


In [9]:
df['text'][0]

"Down over the window  \r\nFrom the dazzling sunlit rays  \r\nThrough the back alleys, through the blinds  \r\nAnother one of them endless days  \r\n  \r\nHoney bees are buzzing  \r\nLeaves begin to stir  \r\nI'm in love with my second cousin  \r\nI tell myself I could be happy forever with her  \r\n  \r\nI keep listening for footsteps  \r\nBut I ain't never hearing any  \r\nFrom the boat, I fish for bullheads  \r\nI catch a lot, sometimes too many  \r\n  \r\nA summer breeze is blowin'  \r\nA squall is setting in  \r\nSometimes it's just plain stupid  \r\nTo get into any kind of wind  \r\n  \r\nWell the old men 'round here  \r\nSometimes they get on bad terms  \r\nWith the younger men,  \r\nOld, young, age don't carry weight  \r\nIt doesn't matter in the end  \r\n  \r\nOne of the boss' hangers-on  \r\nSometimes comes to call  \r\nAt times you least expect  \r\nTryin' to bully you, strongarm you,  \r\nInspire you with fear  \r\nIt has the opposite effect  \r\n  \r\nThere's a new grove o

In [10]:
df.shape

(5000, 3)

Text Cleaning/ Text Preprocessing

In [11]:
df['text'] = df['text'].str.lower().replace(r'^\w\s', ' ').replace(r'\n', ' ', regex = True)

In [12]:
import nltk 
from nltk.stem.porter import PorterStemmer
stemmer = PorterStemmer()

def tokenization(txt):
    tokens = nltk.word_tokenize(txt)
    stemming = [stemmer.stem(w) for w in tokens]
    return " ".join(stemming)

In [13]:
import nltk
nltk.download('punkt')
nltk.download('punkt_tab')  # Download the missing resource

df['text'] = df['text'].apply(lambda x: tokenization(x))

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Hp\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\Hp\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


In [14]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [15]:
tfidvector = TfidfVectorizer(analyzer='word',stop_words='english')
matrix = tfidvector.fit_transform(df['text'])
similarity = cosine_similarity(matrix)

In [16]:
similarity[0]

array([1.        , 0.10294309, 0.03772765, ..., 0.02796238, 0.02204089,
       0.05132841], shape=(5000,))

In [32]:
# to define recommendation using similarity score
def get_recommendations(title):
    index = df[df['song'] == title].index[0]
    distances = similarity[index]
    song_list = sorted(list(enumerate(distances)),reverse=True,key=lambda x:x[1])[1:6]
    for i in song_list:
        print(df.iloc[i[0]].song)
    return

In [33]:
get_recommendations('Good Old Days')

Brand New Me
Good Day Sunshine
Good Things
This And That
Fun Day


In [34]:
import pickle
pickle.dump(similarity,open('similarity2.pkl','wb'))
pickle.dump(df,open('df2.pkl','wb'))

In [35]:
pickle.dump(df.to_dict(),open('df2_dict.pkl','wb'))