In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

In [2]:
songs = pd.read_csv('data.csv')

In [3]:
songs.head()

Unnamed: 0,acousticness,artists,danceability,duration_ms,energy,explicit,id,instrumentalness,key,liveness,loudness,mode,name,popularity,release_date,speechiness,tempo,valence,year
0,0.991,['Mamie Smith'],0.598,168333,0.224,0,0cS0A1fUEUd1EW3FcF8AEI,0.000522,5,0.379,-12.628,0,Keep A Song In Your Soul,12,1920,0.0936,149.976,0.634,1920
1,0.643,"[""Screamin' Jay Hawkins""]",0.852,150200,0.517,0,0hbkKFIJm7Z05H8Zl9w30f,0.0264,5,0.0809,-7.261,0,I Put A Spell On You,7,1920-01-05,0.0534,86.889,0.95,1920
2,0.993,['Mamie Smith'],0.647,163827,0.186,0,11m7laMUgmOKqI3oYzuhne,1.8e-05,0,0.519,-12.098,1,Golfing Papa,4,1920,0.174,97.6,0.689,1920
3,0.000173,['Oscar Velazquez'],0.73,422087,0.798,0,19Lc5SfJJ5O1oaxY0fpwfh,0.801,2,0.128,-7.311,1,True House Music - Xavier Santos & Carlos Gomi...,17,1920-01-01,0.0425,127.997,0.0422,1920
4,0.295,['Mixe'],0.704,165224,0.707,1,2hJjbsLCytGsnAHfdsLejp,0.000246,10,0.402,-6.036,0,Xuniverxe,2,1920-10-01,0.0768,122.076,0.299,1920


In [4]:
print('Duplicates : ',songs.duplicated().sum())
print('Duration greater than 10 mins: ',len(songs[songs['duration_ms']>600000]))

Duplicates :  2159
Duration greater than 10 mins:  2566


In [5]:
data = songs.drop_duplicates()

In [6]:
data.shape

(172230, 19)

In [7]:
data = data.drop(data[data['duration_ms']>600000].index.values)

In [8]:
data.shape

(169699, 19)

In [9]:
cols = data.columns.values
print('List of columns : ')
for i in cols:
    if i not in ['euc','name','release_date','id','artists']:
        print(i)
columns = input('Enter the columns for recommendation criteria : ').split()

List of columns : 
acousticness
danceability
duration_ms
energy
explicit
instrumentalness
key
liveness
loudness
mode
popularity
speechiness
tempo
valence
year
Enter the columns for recommendation criteria : acousticness danceability duration_ms energy


In [10]:
col_dict = {}
for i in columns:
    dum = 'Value for '+ i + ' :' + '(' + str(min(data[i])) + ',' + str(max(data[i])) + ')'
    dum = float(input(dum))
    col_dict[i] = dum
    #if ((dum >= min(data[i])) and (dum <= max(data[i]))):
    #    col_dict[i] = dum
    #else:
    #    print('Please provide correct input')

Value for acousticness :(0.0,0.996)0.3
Value for danceability :(0.0,0.988)0.8
Value for duration_ms :(4937,600000)30000
Value for energy :(0.0,1.0)0.8


In [11]:
col_dict

{'acousticness': 0.3,
 'danceability': 0.8,
 'duration_ms': 30000.0,
 'energy': 0.8}

In [12]:
user_song = pd.DataFrame.from_dict([col_dict])

In [13]:
data[columns]

Unnamed: 0,acousticness,danceability,duration_ms,energy
0,0.991000,0.598,168333,0.224
1,0.643000,0.852,150200,0.517
2,0.993000,0.647,163827,0.186
3,0.000173,0.730,422087,0.798
4,0.295000,0.704,165224,0.707
...,...,...,...,...
174379,0.795000,0.429,144720,0.211
174381,0.795000,0.429,144720,0.211
174383,0.795000,0.429,144720,0.211
174385,0.795000,0.429,144720,0.211


In [14]:
user_song_ = pd.DataFrame(np.repeat(user_song.values,len(data[columns]),axis=0))
user_song_.columns = user_song.columns
user_song_

Unnamed: 0,acousticness,danceability,duration_ms,energy
0,0.3,0.8,30000.0,0.8
1,0.3,0.8,30000.0,0.8
2,0.3,0.8,30000.0,0.8
3,0.3,0.8,30000.0,0.8
4,0.3,0.8,30000.0,0.8
...,...,...,...,...
169694,0.3,0.8,30000.0,0.8
169695,0.3,0.8,30000.0,0.8
169696,0.3,0.8,30000.0,0.8
169697,0.3,0.8,30000.0,0.8


In [15]:
scaler = StandardScaler()
scaler.fit(data[columns])
data_std = scaler.transform(data[columns])
user_song_std = scaler.transform(user_song_)
data['euc'] = ((data_std - user_song_std)**2).sum(axis=1)
print(max(data['euc']))
print(min(data['euc']))

72.43264697893358
0.1042616572793488


In [16]:
no = int(input('Enter the no of recommendations you want : '))

Enter the no of recommendations you want : 5


In [17]:
data.sort_values(by='euc').head(no)

Unnamed: 0,acousticness,artists,danceability,duration_ms,energy,explicit,id,instrumentalness,key,liveness,loudness,mode,name,popularity,release_date,speechiness,tempo,valence,year,euc
109795,0.197,['Armin van Buuren'],0.781,40910,0.783,0,472KJ60Jr1KSNo1nvc6RkK,0.0,1,0.338,-11.623,0,A State Of Trance (ASOT 903) - Double Vinyl AS...,1,2019-02-28,0.319,133.043,0.881,2019,0.104262
89745,0.203,"['Dr. Dre', 'Traci Nelson', 'Ms. Roq', 'Eddie ...",0.813,50653,0.796,1,0WWem437MQdylQuXxzE80H,0.0,2,0.257,-8.545,0,Bar One,48,1999-11-16,0.308,81.328,0.871,1999,0.124006
9628,0.425,['The Beatles'],0.792,52973,0.763,0,6j67aNAPeQ31uw4qw4rpLa,0.627,2,0.789,-11.185,1,Wild Honey Pie - Remastered 2009,49,1968-11-22,0.0506,89.9,0.152,1968,0.194646
125978,0.266,['Armin van Buuren'],0.717,32938,0.724,0,3iHHUV2gNFaH14jGmgifL6,1.2e-05,1,0.371,-11.988,0,A State Of Trance (ASOT 837) - Tune Of The Yea...,1,2017-10-26,0.0774,132.027,0.815,2017,0.313224
18377,0.434,['Armin van Buuren'],0.727,38330,0.767,0,7bPhqHzOvdsw5y6reuJz52,0.0,0,0.532,-7.291,1,A State Of Trance (ASOT 991) - ASOT Tune Of Th...,24,2020-11-19,0.536,127.471,0.969,2020,0.322933


In [18]:
user_song

Unnamed: 0,acousticness,danceability,duration_ms,energy
0,0.3,0.8,30000.0,0.8


In [19]:
print('Your list of songs : ')
print()
for i in data.sort_values(by='euc').head(no)['name']:
    print('Song name : ',i)
    print('Artist name : ',str(data['artists'][data['name']==i].values)[3:-3])

Your list of songs : 

Song name :  A State Of Trance (ASOT 903) - Double Vinyl ASOT Year Mix 2018 Contest
Artist name :  'Armin van Buuren'
Song name :  Bar One
Artist name :  'Dr. Dre', 'Traci Nelson', 'Ms. Roq', 'Eddie Griffin'
Song name :  Wild Honey Pie - Remastered 2009
Artist name :  'The Beatles'
Song name :  A State Of Trance (ASOT 837) - Tune Of The Year 2017 voting, Pt. 5: vote.astateoftrance.com
Artist name :  'Armin van Buuren'
Song name :  A State Of Trance (ASOT 991) - ASOT Tune Of The Year 2020 voting now open: vote.astateoftrance.com, Pt. 3
Artist name :  'Armin van Buuren'
