In [29]:
#!pip install neo4j
#!pip install pandas
#!pip install numpy
#!pip install sklearn
#!pip install category_encoders

In [30]:
from neo4j import GraphDatabase
import pandas as pd
import numpy as np
import category_encoders as ce
from sklearn.metrics.pairwise import cosine_similarity
import operator

In [31]:
graphDb = GraphDatabase.driver(uri="bolt://localhost:7687", auth=("neo4j", "amit955raja"))
graphDb

<neo4j.BoltDriver at 0x2168fe1b0d0>

In [32]:
#contentbased filtering
session = graphDb.session()
USERID = 6113
query = "MATCH n = (:User{userId:'" + str(USERID) + "'})-[:LIKES]->() return n"
nodes = session.run(query)
userSongsList = []
for node in nodes:
    l = list(node["n"])[0].nodes
    song = dict(l[1])["title"]
    userSongsList.append(song)
print(userSongsList)

['This Is What You Came For', 'Lips Are Movin', 'Roar', 'Uptown Funk', 'Trumpets', 'Time of Our Lives', 'Want to Want Me', 'Hello', 'Never Forget You', 'Worth It', 'NO', 'Dark Horse', 'Break Free', '7 Years', 'Dear Future Husband', 'Sorry', 'Bloodstream', 'Chandelier', 'Dangerous Woman', 'Cake By The Ocean', 'Same Old Love', 'Stitches', 'Wrecking Ball']


In [33]:
session = graphDb.session()
query = "MATCH n = (:Song)-[:SUNG_BY]->(:Artist) return n"
nodes = session.run(query)
songs = pd.DataFrame(columns = ['title', 'artist', 'loudness', 'liveness', 'valence', 'danceability', 'speechiness', 'popularity', 'acousticness', 'bpm', 'energy', 'year','duration', 'genre'])
for node in nodes:
    song = dict(list(list(node["n"])[0].nodes)[0])
    song['artist'] = dict(list(list(node["n"])[0].nodes)[1])['name']
    songs = songs.append(song, ignore_index = True)
songs.head()

Unnamed: 0,title,artist,loudness,liveness,valence,danceability,speechiness,popularity,acousticness,bpm,energy,year,duration,genre
0,My First Kiss - feat. Ke$ha,3OH!3,-4,36,83,68,8,62,1,138,89,2010,192,
1,She Looks So Perfect,5 Seconds of Summer,-4,33,44,49,13,71,0,160,95,2014,202,
2,Say Something,A Great Big World,-9,9,9,45,3,61,87,138,15,2014,229,
3,Whataya Want from Me,Adam Lambert,-5,6,45,44,5,66,1,186,68,2010,227,
4,If I Had You,Adam Lambert,-4,37,79,65,11,59,1,131,91,2010,228,


In [34]:
session = graphDb.session()
query = "MATCH n = (:Song)-[:BELONGS_TO]->(:Genre) return n"
nodes = session.run(query)
for node in nodes:
    songTitle = dict(list(list(node["n"])[0].nodes)[0])['title']
    genre = dict(list(list(node["n"])[0].nodes)[1])['name']
    songs.loc[songs['title'] == songTitle, 'genre'] = genre
songs.head()

Unnamed: 0,title,artist,loudness,liveness,valence,danceability,speechiness,popularity,acousticness,bpm,energy,year,duration,genre
0,My First Kiss - feat. Ke$ha,3OH!3,-4,36,83,68,8,62,1,138,89,2010,192,dance pop
1,She Looks So Perfect,5 Seconds of Summer,-4,33,44,49,13,71,0,160,95,2014,202,boy band
2,Say Something,A Great Big World,-9,9,9,45,3,61,87,138,15,2014,229,neo mellow
3,Whataya Want from Me,Adam Lambert,-5,6,45,44,5,66,1,186,68,2010,227,australian pop
4,If I Had You,Adam Lambert,-4,37,79,65,11,59,1,131,91,2010,228,australian pop


In [35]:
encoder=ce.OneHotEncoder(cols=['artist', 'genre'],handle_unknown='return_nan',return_df=True,use_cat_names=True)
songsEncoded = encoder.fit_transform(songs)

In [36]:
userSongs = songsEncoded[songsEncoded.title.isin(userSongsList)]
otherSongs = songsEncoded[~songsEncoded.title.isin(userSongsList)]
otherSongsList = list(np.unique(otherSongs['title']))
userSongs.set_index('title', inplace = True)
otherSongs.set_index('title', inplace = True)
cosineSim = cosine_similarity(userSongs, otherSongs)
scores = list(np.sum(np.array(cosineSim), axis = 0))
scoresDict = {}
for i in range(0, len(scores)):
    scoresDict[otherSongsList[i]] = scores[i]
sortedScores = sorted(scoresDict.items(), key=operator.itemgetter(1))
sortedScores.reverse()
top10 = sortedScores[0:10]
recommendations = []
for i in top10:
    recommendations.append(i[0])
topRecommendations = songs[songs.title.isin(recommendations)]
topRecommendations[['title', 'artist']]

Unnamed: 0,title,artist
24,Supernova,Ansel Elgort
161,Want To,Dua Lipa
176,Lights - Single Version,Ellie Goulding
296,Die Young,Kesha
305,Talk (feat. Disclosure),Khalid
309,Party Rock Anthem,LMFAO
359,Youre Mine (Eternal),Mariah Carey
460,Boom Boom,RedOne
496,Bad Liar,Selena Gomez
548,Call You Mine,The Chainsmokers


In [37]:
#colaborative filtering
session = graphDb.session()
usersPlaylist = pd.DataFrame(columns = ['userId', 'song'])
USERID = 6113
query = "MATCH n = (:User{userId:'" + str(USERID) + "'})-[:LIKES]->() return n"
nodes = session.run(query)
userSongs = []
for node in nodes:
    l = list(node["n"])[0].nodes
    song = dict(l[1])["title"]
    userSongs.append(song)
    usersong = {}
    usersong['userId'] = USERID
    usersong['song'] = song
    usersong['data_availability'] = 1
    usersPlaylist = usersPlaylist.append(usersong, ignore_index = True)
userSongs

['This Is What You Came For',
 'Lips Are Movin',
 'Roar',
 'Uptown Funk',
 'Trumpets',
 'Time of Our Lives',
 'Want to Want Me',
 'Hello',
 'Never Forget You',
 'Worth It',
 'NO',
 'Dark Horse',
 'Break Free',
 '7 Years',
 'Dear Future Husband',
 'Sorry',
 'Bloodstream',
 'Chandelier',
 'Dangerous Woman',
 'Cake By The Ocean',
 'Same Old Love',
 'Stitches',
 'Wrecking Ball']

In [38]:
otherUsers = []
for i in userSongs:
    session = graphDb.session()
    query = "MATCH n = (:Song{title:'" + str(i) + "'})-[:LIKEDBY]->() return n"
    nodes = session.run(query)
    for node in nodes:
        user = list(list(node["n"])[0].nodes)[1]
        userId = dict(user)["userId"]
        if userId != USERID and userId not in otherUsers:
            otherUsers.append(userId)
print(otherUsers)
        

['6431', '7065', '7375', '6798', '7212', '6113', '6436', '7095', '6331', '6772', '6551', '7161', '6235', '6503', '6694', '7939', '7365', '7566', '6406', '6664', '6820', '7260', '6952', '7680', '7849', '6596', '7244', '7349', '7875', '7760', '7367', '7382', '6411', '7681', '6348', '7315', '7776', '6682', '6447', '7211', '6628', '6617', '6499', '7045', '6688', '7580', '6730', '7806', '6516', '6511', '6557', '7815', '6794', '7354']


In [39]:
#finding playlists of other users
for i in otherUsers:
    session = graphDb.session()
    query = "MATCH n = (:User{userId:'" + str(i) + "'})-[:LIKES]->() return n"
    nodes = session.run(query)
    for node in nodes:
        l = list(node["n"])[0].nodes[1]
        song = dict(l)["title"]
        usersong = {}
        usersong['userId'] = int(i)
        usersong['song'] = song
        usersong['data_availability'] = 1
        usersPlaylist = usersPlaylist.append(usersong, ignore_index = True)
usersPlaylist[['userId', 'song']].head()

Unnamed: 0,userId,song
0,6113,This Is What You Came For
1,6113,Lips Are Movin
2,6113,Roar
3,6113,Uptown Funk
4,6113,Trumpets


In [40]:
usersPlaylist.drop_duplicates(subset=['song', 'userId'], keep="first", inplace=True)
usersPlaylist.shape

(1361, 3)

In [41]:
usersPlaylistMatrix = usersPlaylist.pivot(index = 'userId', columns = 'song', values='data_availability')
usersPlaylistMatrix = usersPlaylistMatrix.fillna(0)
usersPlaylistMatrix.head()

song,24K Magic,7 Years,A Sky Full of Stars,A Thousand Years,Adore You,Alejandro,Alive,All About That Bass,All The Right Moves,All We Know,...,Work,Worth It,Wrecking Ball,XO,Yeah 3x,You And I,You Da One,Young Girls,Your Love Is My Drug,human
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
6113,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6235,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
6331,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6348,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
6406,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [42]:
def similarUsers(userId, userPlaylist, k=5):
    user = userPlaylist[userPlaylist.index == userId]
    otherUsers = userPlaylist[userPlaylist.index != userId]
    cosineSimilarities = cosine_similarity(user,otherUsers)[0].tolist()
    otherUsersIds = otherUsers.index.tolist()
    cosineSimilaritiesDict = {}
    for i in range(0, len(otherUsersIds)):
        cosineSimilaritiesDict[otherUsersIds[i]] = cosineSimilarities[i]
    sortedCosineSimilarities = sorted(cosineSimilaritiesDict.items(), key=operator.itemgetter(1))
    sortedCosineSimilarities.reverse()
    kSimilarUsers = sortedCosineSimilarities[0:k]
    kSimilarUsersIds = []
    for i in kSimilarUsers:
        kSimilarUsersIds.append(i[0])
        
    return kSimilarUsersIds

similarUsersIds = similarUsers(USERID, usersPlaylistMatrix)
print(similarUsersIds)

[6431, 6820, 7161, 7680, 6331]


In [43]:
songsListenedByUser = list(np.unique(list(usersPlaylist[usersPlaylist.userId == USERID].song)))
songsYetToBeListened = []
for i in similarUsersIds:
    songsListenedBySimilarUser = list(usersPlaylist[usersPlaylist.userId == i].song)
    for j in songsListenedBySimilarUser:
        if j not in songsListenedByUser:
            songsYetToBeListened.append(j)

songsYetToBeListened = list(np.unique(np.array(songsYetToBeListened)))
print(songsYetToBeListened)

['Adore You', 'All About That Bass', 'All of Me', 'Anaconda', 'Animals', 'Bang Bang', 'Blow', 'Blurred Lines', 'Burn', 'Call Me Maybe', 'Close', 'Closer', 'Dance Again', 'Elastic Heart', 'Everybody Talks', 'Fancy', 'Firework', 'Focus', 'Good Time', 'Gorilla', 'Grenade', 'Hands To Myself', 'Heartbeat Song', 'Here', 'Higher', 'How Deep Is Your Love', 'I Know What You Did Last Summer', 'I Really Like You', 'In the Name of Love', 'International Love', 'Into You', 'Jar of Hearts', 'Just the Way You Are', 'Kill Em With Kindness', 'Latch', 'Let Her Go', 'Let Me Love You', 'Love Yourself', 'Maps', 'Me Too', 'My House', 'Not a Bad Thing', 'PILLOWTALK', 'Payphone', 'Perfect', 'Perfect Illusion', 'Pompeii', 'Problem', 'Rise', 'Rolling in the Deep', 'Roses', 'Say Something', 'Shake It Off', 'Sing', 'Someone Like You', 'Starships', 'Starving', 'Stay With Me', 'Steal My Girl', 'Story of My Life', 'Sugar', 'Summer', 'Super Bass', 'Take Me To Church', 'Team', 'Teenage Dream', 'The Hills', 'The Way', '

In [44]:
songsList = songsListenedByUser + songsYetToBeListened
songs = songsEncoded[songsEncoded.title.isin(songsList)]
songs.head()

Unnamed: 0,title,artist_3OH!3,artist_5 Seconds of Summer,artist_A Great Big World,artist_Adam Lambert,artist_Adele,artist_Alan Walker,artist_Alessia Cara,artist_Alesso,artist_Alicia Keys,...,genre_indie pop,genre_folk-pop,genre_moroccan pop,genre_barbadian pop,genre_acoustic pop,genre_colombian pop,genre_australian dance,genre_celtic rock,genre_complextro,genre_baroque pop
2,Say Something,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Someone Like You,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Rolling in the Deep,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14,Here,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25,Problem,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [45]:
userSongs = songs[songs.title.isin(songsListenedByUser)] 
topSongs = songs[songs.title.isin(songsYetToBeListened)]
userSongs.set_index('title', inplace = True)
topSongs.set_index('title', inplace = True)
cosineSim = cosine_similarity(userSongs, topSongs)
scores = list(np.sum(np.array(cosineSim), axis = 0))
scoresDict = {}
for i in range(0, len(scores)):
    scoresDict[songsYetToBeListened[i]] = scores[i]
sortedScores = sorted(scoresDict.items(), key=operator.itemgetter(1))
sortedScores.reverse()
top10 = sortedScores[0:10]
recommendations = []
for i in top10:
    recommendations.append(i[0])
topRecommendations = songs[songs.title.isin(recommendations)]
topRecommendations[['title']]

Unnamed: 0,title
2,Say Something
7,Someone Like You
32,The Way
76,Young Girls
177,Burn
282,Firework
370,Sugar
497,Kill Em With Kindness
549,Roses
572,PILLOWTALK


In [46]:
#page rank 
session = graphDb.session()
USERID = 6113
query = "MATCH n = (:User{userId:'" + str(USERID) + "'})-[:LIKES]->() return n"
nodes = session.run(query)
userSongsList = []
for node in nodes:
    l = list(node["n"])[0].nodes
    song = dict(l[1])["title"]
    userSongsList.append(song)
print(userSongsList)

['This Is What You Came For', 'Lips Are Movin', 'Roar', 'Uptown Funk', 'Trumpets', 'Time of Our Lives', 'Want to Want Me', 'Hello', 'Never Forget You', 'Worth It', 'NO', 'Dark Horse', 'Break Free', '7 Years', 'Dear Future Husband', 'Sorry', 'Bloodstream', 'Chandelier', 'Dangerous Woman', 'Cake By The Ocean', 'Same Old Love', 'Stitches', 'Wrecking Ball']


In [47]:
session = graphDb.session()
query = "MATCH n = (:User{userId:'" + str(USERID) + "'})-[:LIKES]->(:Song)-[:LIKEDBY]->(:User)-[:LIKES]->(:Song) return n"
nodes = session.run(query)
similarSongsFreq = {}
for node in nodes:
    song = dict(list(list(node["n"])[2].nodes)[1])['title']
    if song not in userSongsList:
        if song in similarSongsFreq:
            similarSongsFreq[song] += 1
        else:
            similarSongsFreq[song] = 1

In [48]:
session = graphDb.session()
query = "MATCH n = (:User{userId:'" + str(USERID) + "'})-[:LIKES]->(:Song)-[SUNG_BY]->(:Artist)-[:SINGS]->(:Song) return n"
nodes = session.run(query)
for node in nodes:
    song = dict(list(list(node["n"])[2].nodes)[1])['title']
    if song not in userSongsList:
        if song in similarSongsFreq:
            similarSongsFreq[song] += 1
        else:
            similarSongsFreq[song] = 1

In [49]:
sortedSimilarSongsFreq = sorted(similarSongsFreq.items(), key=operator.itemgetter(1))
sortedSimilarSongsFreq.reverse()
topSongsFreq = sortedSimilarSongsFreq[0:10]
recommendedSongs = []
for i in topSongsFreq:
    recommendedSongs.append(i[0])

recommendedSongs

['Problem',
 'Closer',
 'Bang Bang',
 'Love Yourself',
 'Roses',
 'Fancy',
 'My House',
 'Perfect',
 'Treat You Better',
 'Maps']