In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("spotify_millsongdata.csv")

In [3]:
df.head(5)

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [4]:
df.tail(5)

Unnamed: 0,artist,song,link,text
57645,Ziggy Marley,Good Old Days,/z/ziggy+marley/good+old+days_10198588.html,Irie days come on play \r\nLet the angels fly...
57646,Ziggy Marley,Hand To Mouth,/z/ziggy+marley/hand+to+mouth_20531167.html,Power to the workers \r\nMore power \r\nPowe...
57647,Zwan,Come With Me,/z/zwan/come+with+me_20148981.html,all you need \r\nis something i'll believe \...
57648,Zwan,Desire,/z/zwan/desire_20148986.html,northern star \r\nam i frightened \r\nwhere ...
57649,Zwan,Heartsong,/z/zwan/heartsong_20148991.html,come in \r\nmake yourself at home \r\ni'm a ...


In [5]:
df.shape

(57650, 4)

In [6]:
df.isnull().sum()

artist    0
song      0
link      0
text      0
dtype: int64

In [7]:
df =df.sample(5000).drop('link', axis=1).reset_index(drop=True)

In [8]:
df.head(10)

Unnamed: 0,artist,song,text
0,Lynyrd Skynyrd,Born To Run,There's an old man sittin' on a front porch no...
1,Journey,Butterfly (She Flies Alone),She never knew the meaning of the word \r\nOr...
2,King Diamond,Welcome Home,Grandma' welcome home \r\nYou have \r\nBeen ...
3,Black Sabbath,Dear Father,A childhood innocence was drowned in your tear...
4,Cyndi Lauper,Santa Claus Is Coming To Town,I just came back from a lovely trip along the ...
5,Dean Martin,Let Me Know,Let me know the moment he lets you go \r\nI'l...
6,Alison Krauss,Two Highways,"Two highways lay before me, which one will I c..."
7,Kris Kristofferson,You Show Me Yours,"If you're feeling salty, then I'm your tequila..."
8,Rush,Test For Echo,"[Chorus:] \r\nHere we go, vertigo \r\nVideo ..."
9,Ocean Colour Scene,Hundred Mile City,"So I said I'm on the roam, so I need a car \r..."


In [9]:
df['text'][0]

"There's an old man sittin' on a front porch now  \r\nTalkin' 'bout how it used to be  \r\nWhen I was young I was a hell of a man  \r\nMy father died when I was ten  \r\nI hit the road to find a job  \r\nHad to feed my family  \r\nTimes were hard my hands are still scarred  \r\nFrom the life I've had to lead  \r\nI was born to run  \r\n  \r\nDrove a tractor and trailer all my life  \r\nSix kids and a hell of a wife  \r\nMade lots of money it all slipped away  \r\nA large family that's the price you pay  \r\nI always dreamed never gave up  \r\nSon, even when times got tough  \r\nThat's when I'd push it a little bit more  \r\nYou should've heard that engine roar  \r\n  \r\n[Chorus]  \r\nI was born to run  \r\nCan't slow down  \r\nNo regrets, I've been blessed  \r\nBorn to run  \r\nIn time you'll see what the good lord's done for me  \r\nBorn to run  \r\nI can't slow down  \r\nNo regrets, I did my best  \r\nBorn to run  \r\nIn time you'll see  \r\nWhat the good lord's done, done for me  \

In [10]:
df.shape

(5000, 3)

In [11]:
df['text'] = df['text'].str.lower().replace(r'^\w\s', ' ').replace(r'\n', ' ', regex = True)

In [12]:
import nltk
from nltk.stem.porter import PorterStemmer
stemmer = PorterStemmer()

def tokenization(txt):
    tokens = nltk.word_tokenize(txt)
    stemming = [stemmer.stem(w) for w in tokens]
    return " ".join(stemming)

In [14]:
df['text'] = df['text'].apply(lambda x: tokenization(x))

In [15]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [16]:
tfidvector = TfidfVectorizer(analyzer='word',stop_words='english')
matrix = tfidvector.fit_transform(df['text'])
similarity = cosine_similarity(matrix)

In [17]:
similarity[0]

array([1.        , 0.01889638, 0.02846341, ..., 0.00678158, 0.02104815,
       0.11624149])

In [18]:
df[df['song'] == 'Waiting For The Man']

Unnamed: 0,artist,song,text


In [19]:
def recommendation(song_df):
    if song_df not in df['song'].values:
        return f"Song '{song_df}' not found in the dataset."
    
    idx = df[df['song'] == song_df].index[0]
    distances = sorted(list(enumerate(similarity[idx])), reverse=True, key=lambda x: x[1])
    
    songs = [df.iloc[m_id[0]].song for m_id in distances[1:6]]
    return songs


In [22]:
print(recommendation('Dear Father'))

['Come On, Come On, Come On', 'If You Had My Love', 'Doing All Right', 'Pearl', 'Tear It Up']


In [23]:
print(recommendation('Hundred Mile City'))

["I Don't Need You", 'I Need To Know', 'Best I Could', 'Everybody Needs Somebody To Love', 'All You Need Is Love']


In [24]:
import pickle
pickle.dump(similarity,open('similarity.pkl','wb'))
pickle.dump(df,open('df.pkl','wb'))