# Packages Needed
1. Numpy
2. Pandas
3. Scikit-Learn

# Download dataset from https://www.kaggle.com/mousehead/songlyrics

# Import Packages

In [1]:
import numpy as np
import pandas as pd

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Read Dataset

In [2]:
df = pd.read_csv('songdata.csv')
df.head()

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \nAnd..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \nTouch me gentl..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \nWhy I had t...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


# TF-IDF Search Engine

In [3]:
# Get tf-idf matrix using fit_transform function
vectorizer = TfidfVectorizer()

In [4]:
%%time
X = vectorizer.fit_transform(df['text']) # Store tf-idf representations of all docs

CPU times: user 12.6 s, sys: 220 ms, total: 12.9 s
Wall time: 13.2 s


In [5]:
print(X.shape) # (Number of songs, Number of unique words)

(57650, 82385)


# Query Processing

In [6]:
query = "Take it easy with me, please"

In [7]:
%%time
query_vec = vectorizer.transform([query]) # Ip -- (n_docs,x), Op -- (n_docs,n_Feats)
results = cosine_similarity(X,query_vec).reshape((-1,)) # Op -- (n_docs,1) -- Cosine Sim with each doc

CPU times: user 72 ms, sys: 7.63 ms, total: 79.7 ms
Wall time: 79.5 ms


# Print Results

In [8]:
# Print Top 10 results
for i in results.argsort()[-10:][::-1]:
    print(df.iloc[i,0],"--",df.iloc[i,1])

Guns N' Roses -- It's So Easy
Linda Ronstadt -- It's So Easy (To Fall In Love)
Kris Kristofferson -- Easy, Come On
Lorde -- Easy
Kiss -- Easy As It Seems
Ne-Yo -- Make It Easy
Rolling Stones -- It's Not Easy
Frank Zappa -- Easy Meat
Billy Joel -- Easy Money
Stevie Wonder -- Please, Please, Please
