## Modelling 

In [1]:
# Importing required libraries.
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# Reading the CSV file using Pandas.
df_og = pd.read_csv("holly_bolly_10733_final.csv")

In [3]:
# Displaying the first 5 records to view the columns.
df_og.head()

Unnamed: 0,name,artist,id,danceability,key,mode,instrumentalness,duration,energy,loudness,speechiness,acousticness,tempo,liveness,valence,popularity,hollywood
0,ishqkaminaa,djjiten,1YFTLSuSQ0odUosJIVTvOC,0.782,5,0,1.3e-05,1.907673,0.972,-7.542,0.148,0.0255,0.60896,0.369,0.557,0.0,0
1,bhoolja,shaan,7pCE1BHlNVbvIuqUEOGAj5,0.696,0,0,0.000185,1.389656,0.888,-2.285,0.0336,0.0592,0.549908,0.131,0.676,0.180905,0
2,tanhadil,shaan,4kO9vFarqUPNwlgFnVNgVh,0.621,7,0,0.0,1.761857,0.704,-5.293,0.0236,0.101,0.454296,0.185,0.366,0.221106,0
3,sexymama,bombayrockers,5VG9YmTLrTuhOlhWYDdI8v,0.83,9,0,1e-06,1.40292,0.77,-4.135,0.112,0.483,0.454514,0.127,0.927,0.135678,0
4,ariaripart1,bombayrockers,6dWQ9E5mFjQZA7ujh3wd2Z,0.758,7,1,0.000154,0.458445,0.918,-2.487,0.108,0.00085,0.445791,0.0619,0.721,0.165829,0


In [4]:
# Dropping text values (we'll be using them in another approach).
df = df_og.drop(["name","artist","id"],axis=1)

In [5]:
# Converting the dataframe into numpy format for further processing.
df_numpy = df.to_numpy()

In [6]:
df_numpy.shape

(10733, 14)

In [7]:
# Finding the cosine distance of every record with every other record in the dataset.
cosine_sim = cosine_similarity(df_numpy,df_numpy)

In [8]:
# Printing the shape of the result
cosine_sim.shape

(10733, 10733)

In [9]:
# Viewing the third record from the original dataset. 
# This will be used as an input for getting recommendations.
df_og.iloc[2]

name                              tanhadil
artist                               shaan
id                  4kO9vFarqUPNwlgFnVNgVh
danceability                         0.621
key                                      7
mode                                     0
instrumentalness                         0
duration                           1.76186
energy                               0.704
loudness                            -5.293
speechiness                         0.0236
acousticness                         0.101
tempo                             0.454296
liveness                             0.185
valence                              0.366
popularity                        0.221106
hollywood                                0
Name: 2, dtype: object

In [10]:
# Sorting the cosine distances of the input song with other songs in the ascending order. 
score_series = pd.Series(cosine_sim[2]).sort_values(ascending=False)

In [11]:
# Selecting indices of 5 songs which are closest to the input song.
top_5_indexes = list(score_series.iloc[1:6].index)

In [12]:
top_5_indexes

[113, 838, 277, 899, 600]

In [13]:
# Deriving the names of the recommended songs from the original dataset.
recommended_songs = []
for i in top_5_indexes:
        recommended_songs.append((list(df_og.name)[i],list(df_og.artist)[i]))

In [14]:
# Printing the recommended songs.
recommended_songs

[('meredilmeinaajkyahaitheamorouslovermix', 'instantkarma'),
 ('tujunooniyatclimaxsong', 'shreysinghal'),
 ('basekpalremix', 'kk'),
 ('chorichoritakna', 'darkmc'),
 ('farak', 'divine')]

In [15]:
indices = pd.Series(df_og['name'])

In [16]:
def recommendations(title, cosine_sim = cosine_sim):
    
    recommended_movies = []
    
    # gettin the index of the movie that matches the title
    idx = indices[indices == title].index[0]

    # creating a Series with the similarity scores in descending order
    score_series = pd.Series(cosine_sim[idx]).sort_values(ascending = False)

    # getting the indexes of the 5 most similar movies
    top_5_indexes = list(score_series.iloc[1:6].index)
    
    # populating the list with the titles of the best 5 matching movies
    for i in top_5_indexes:
        recommended_movies.append(((df_og.name)[i],(df_og.artist)[i]))
        
    return recommended_movies

In [17]:
recommended_songs = recommendations("numb")

In [18]:
# Printing the recommendations
for i,song in enumerate(recommended_songs):
    print("{}: {} by {}".format(i+1,song[0].title(),song[1].title()))

1: Sleepingawake2006Remaster by Pod
2: Aplaceformyhead by Linkinpark
3: Dontbeshy by Thelibertines
4: Papercut by Linkinpark
5: Disposableteens by Marilynmanson
