In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from scipy import sparse
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
import pickle 
from fuzzywuzzy import fuzz
from sklearn.model_selection import train_test_split

from gensim.models import FastText
import re
from numpy import savetxt
from gensim.models import FastText



In [2]:
user_listen = pd.read_csv('../Datasets/Copy of explicit_data - Data preprocessing - songs.csv')
songs = pd.read_csv('../Datasets/Copy of explicit_data - Songs - All.csv')
songs_with_artist_id = pd.read_csv('../Datasets/Copy of explicit_data - Songs - All-with artist_id.csv')

In [133]:
songs.head()

Unnamed: 0,song_id,Title,Artist,Artist_id,Album,Release Year
0,2,Aa Ra Sulan,Nirosha Virajini,21.0,Aa Ra Sulan,2011.0
1,283,Aale katha,"Kalpana Nayanamadu, Shermaine Willis ft Iraj",11.0,Aale Katha,2018.0
2,3,Ada Nam Ma Hada Iwasum Na,Raveen Kanishka & Kalpana Kavindi,101.0,,
3,4,Ada Thaniyen Ma Hadanne Na Ma,Shihan Mihiranga,62.0,,
4,5,Adambarai Baluwama Nam,Surani De Mel,80.0,,


In [11]:
class Dynamic_Palylist_Generation():
    def __init__(self):
        self.merged_data = None
        self.model_knn = None
        self.user_id = None 
        self.cooccurence_matrix = None
        self.all_songs = None
        self.popularity_recommendations = None
#         self.train_data = None
#         self.test_data = None
        
        
    def merge_data(self, user_listen, songs):
        self.merged_data = pd.merge(user_listen, songs.drop_duplicates(['song_id']), on="song_id", how="left")
        self.merged_data['song'] = self.merged_data[['Title', 'Artist']].apply(lambda x: ' - '.join(x), axis=1)
        self.merged_data['listened_song'] = np.ones((441,), dtype=int)
        return self.merged_data
    
    def train_test_split(self):
        df_merge = self.merge_data(user_listen, songs)
        train_data, test_data = train_test_split(df_merge, test_size = 0.20, random_state=0)
        return train_data, test_data
    
    def create(self):
        df_merge = self.merge_data(user_listen, songs)
        # get a count of user_ids for each unique song as recommendation score
        data_grouped = df_merge.groupby(['song_id']).agg({'user_id': 'count'}).reset_index()
        data_grouped.rename(columns = {'user_id': 'score'},inplace=True)

        # Sort the songs based upon recommendation score
        data_sort = data_grouped.sort_values(['score', 'song_id'], ascending = [0,1])

        # Generate a recommendation rank based upon score
        data_sort['Rank'] = data_sort['score'].rank(ascending=0, method='first')

        # Get the top 10 recommendations
        self.popularity_recommendations = data_sort.head(10) 
        return self.popularity_recommendations
    
    def baselineMethod(self): # call this
        df_merge = self.merge_data(user_listen, songs)
        user_recommendations = self.create()
    
        cols = user_recommendations.columns.tolist()
        cols = cols[-1:] + cols[:-1]
        user_recommendations = user_recommendations[cols]
        user_recommendations.reset_index(drop=True, inplace = True)
        popular_song_ids = user_recommendations['song_id']
        top_recommendations = []
        for i in popular_song_ids:
            top_recommendations.append(i)
        return top_recommendations
    
    # get recommendations based on user favourites
    def user_item_matrix(self,train_data):
        # df_merge = self.merge_data(user_listen, songs)
        user_item_matrix = train_data.pivot(
            index='song_id',
            columns='user_id',
            values='listened_song'
        ).fillna(0)
        return user_item_matrix
              
    
    def sparse_matrix(self, train_data):
        df_song_features = self.user_item_matrix(train_data)
        user_item_mat = csr_matrix(df_song_features.values)
        
        return user_item_mat
    
    def song_idx_mapping(self, train_data):
        df_song_features = self.user_item_matrix(train_data)
        song_to_idx = {
            song: i for i, song in 
            enumerate(list(songs.set_index('song_id').loc[df_song_features.index].Title))
        }
        return song_to_idx
    
    def KNN_model(self, train_data): # training and saving model # call this and save the model
        model_knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=20, n_jobs=-1)
        song_user_mat_sparse = self.sparse_matrix(train_data)
        model_knn.fit(song_user_mat_sparse)
        # save the model
        knnPickle = open('model_knn_for_user_fav', 'wb') 
        pickle.dump(model_knn, knnPickle)   
    
    
    def fuzzy_matching(self, mapper, fav_song, verbose=True):
        match_tuple = []
        # get match
        for title, idx in mapper.items():
            ratio = fuzz.ratio(title.lower(), fav_song.lower())
            if ratio >= 60:
                match_tuple.append((title, idx, ratio))
        # sort
        match_tuple = sorted(match_tuple, key=lambda x: x[2])[::-1]
        if not match_tuple:
            #print('Oops! No match is found')
            return
        return match_tuple[0][1]
    
    def DPG_recommendation(self, fav_song_id, test_data): #call this using test set
        n_recommendations = 10
        data = self.sparse_matrix(test_data)
        mapper = self.song_idx_mapping(test_data)
        
        # load the model from disk
        loaded_model = pickle.load(open('model_knn_for_user_fav', 'rb'))
        # fit
        loaded_model.fit(data)

        # print('You have input song:', fav_song)
        fav_song = songs['Title'].loc[songs['song_id'] == fav_song_id]
        fav_song = [i for i in fav_song]
#         print(fav_song[0])
        idx = self.fuzzy_matching(mapper, fav_song[0])

        # print('Recommendation system start to make inference')
        # print('......\n')
        distances, indices = loaded_model.kneighbors(data[idx], n_neighbors=n_recommendations+1)

        raw_recommends = sorted(list(zip(indices.squeeze().tolist(), distances.squeeze().tolist())), key=lambda x: x[1])[:0:-1]
        # get reverse mapper
        reverse_mapper = {v: k for k, v in mapper.items()}
        l2 = []
        suggestions =[]
        # print recommendations
        #print('Recommendations for {}:'.format(fav_song))
        for i, (idx, dist) in enumerate(raw_recommends):
            # print(reverse_mapper[idx])
            l2.append(songs['song_id'].loc[songs['Title'] == reverse_mapper[idx]]) 
        for i in range(len(l2)):
            for j in l2[i]:
                suggestions.append(j)
            
            #print('{0}: {1}, with distance of {2}'.format(i+1, reverse_mapper[idx], dist))
        return suggestions
    
    # get recommendations based on user listening history + suggest new songs
    def get_user_items(self, user_id):
        df_merge = self.merge_data(user_listen, songs)
        user_data = df_merge[df_merge['user_id'] == user_id]
        user_items = list(user_data['song'].unique())
        return user_items
    
    # Get unique users for a given item (song)
    def get_item_users(self, song):
        df_merge = self.merge_data(user_listen, songs)
        item_data = df_merge[df_merge['song'] == song]
        item_users = set(item_data['user_id'].unique())
        return item_users
    
    # Get unique items (songs) in the training data
    def get_all_items_train_data(self):
        df_merge = self.merge_data(user_listen, songs)
        all_items = list(df_merge['song'].unique())
        return all_items

    def get_item_users_by_title(self, Title):
        df_merge = self.merge_data(user_listen, songs)
        item_data = df_merge[df_merge['Title'] == Title]
        item_users_ = set(item_data['user_id'].unique())
#         print(item_users_)
        return item_users_     
    
    def get_sentences(self):
        music = songs 
        song_name = music.Title.values
        song_name_clean = [re.sub(r'[^\w]', ' ', str(item))for item in song_name]
        song_name_clean = [re.sub(r" \d+", '', str(item.strip())) for item in song_name_clean]

        sentences = list()
        for item in song_name_clean:
            sentences.append(item.split())
        unique_sentence = np.unique(sentences)
        return unique_sentence, sentences
    
    def Fasttext_model(self): # save train content based model
        num_features = 50    # Word vector dimensionality                      
        min_word_count = 1                      
        num_workers = 1      # Number of CPUs
        context = 3          # Context window size; 

        downsampling = 1e-3   # threshold for configuring which 
                              # higher-frequency words are randomly downsampled

        # Initialize and train the model 
        model = FastText(workers=num_workers, \
                    size=num_features, min_count = min_word_count, \
                    window = context, sample = downsampling, sg = 1)
        unique, sentences_ = self.get_sentences()
        model.build_vocab(sentences = unique)
        model.train(sentences = unique,  total_examples=len(sentences_), epochs=10)

        model.init_sims(replace=True)
        
        model.save('Fasttext.model')

    def generate_similars(self, song_name_id):

        # load the trained model
        model = FastText.load('Fasttext.model')

        # split the song title
        song_name = songs['Title'].loc[songs['song_id'] == song_name_id]
        song_name = [w for w in song_name][0]
        tokens = song_name.split() 
        unique_sentence, sentences_ = self.get_sentences()

        suggestions = []

        # check for most similar items form the model
        suggestions.append(model.wv.most_similar(positive=tokens, topn=10))

        predictions = []
        for l in range(len(suggestions[0])):
            for i in range(len(unique_sentence)):
                for j in range(len(unique_sentence[i])):
                    if unique_sentence[i][j] == suggestions[0][l][0]:
#                         print(unique_sentence[i])
                        s = ' '
                        word = s.join(unique_sentence[i])
#                         print(word)
                        predictions.append(word)

        return predictions

    def recommend_new_items(self, user_id, new_song_id):

        predictions = self.generate_similars(new_song_id)
        for item in predictions:
            for value in self.get_item_users_by_title(item):
                if value == user_id:
                    return new_song
                else:
                    continue
    

        # Construct cooccurence matrix
    def construct_cooccurence_matrix(self, user_songs, all_songs):
        df_merge = self.merge_data(user_listen, songs)
        user_songs_users = []
        for i in range(0, len(user_songs)):
            user_songs_users.append(self.get_item_users(user_songs[i]))

            cooccurence_matrix = np.matrix(np.zeros(shape=(len(user_songs), len(all_songs))), float)

        for i in range(0, len(all_songs)):
            # Calculate unique listeners (users) of song (item) i
            songs_i_data = df_merge[df_merge['song'] == all_songs[i]]
            users_i = set(songs_i_data['user_id'].unique())
    #         print(songs_i_data)
    #         print(users_i)

            for j in range(0, len(user_songs)):
                # Get unique listeners (users) of song (item) j
                users_j = user_songs_users[j]

                # Calculate intersection of listeners of songs i and j
                users_intersection = users_i.intersection(users_j)

                # Calculate cooccurence_matrix[i,j] as Jaccard Index
                if len(users_intersection) != 0:
                    # Calculate union of listeners of songs i and j
                    users_union = users_i.union(users_j)

                    cooccurence_matrix[j,i] = float(len(users_intersection))/float(len(users_union))

                else:
                    cooccurence_matrix[j,i] = 0

        return cooccurence_matrix

    # Use the cooccurence matrix to make top recommendations
    def generate_top_recommendations(self, user_id, all_songs, user_songs, new_song_id = None):
        df_merge = self.merge_data(user_listen, songs)
        cooccurence_matrix = self.construct_cooccurence_matrix(user_songs, all_songs)
        #print("Non zero values in cooccurence_matrix :%d" % np.count_nonzero(cooccurence_matrix))

        # Calculate a weighted average of the scores in cooccurence matrix for all user songs.
        user_sim_scores = cooccurence_matrix.sum(axis=0)/float(cooccurence_matrix.shape[0])
        user_sim_scores = np.array(user_sim_scores)[0].tolist()

        # Sort the indices of user_sim_scores based upon their value Also maintain the corresponding score
        sort_index = sorted(((e,i) for i,e in enumerate(list(user_sim_scores))), reverse=True)

        # Create a dataframe from the following
        columns = ['user_id', 'song', 'score', 'rank']
        # index = np.arange(1) # array of numbers for the number of samples
        df = pd.DataFrame(columns=columns)
        
        # Fill the dataframe with top 10 item based recommendations
        rank = 1 
        for i in range(0,len(sort_index)):
            if ~np.isnan(sort_index[i][0]) and all_songs[sort_index[i][1]] not in user_songs and rank <= 10:
                df.loc[len(df)]=[user_id,all_songs[sort_index[i][1]],sort_index[i][0],rank]
                rank = rank+1
            # Handle the case where there are no recommendations
        #print(df)
        l2 = []
        suggestions = []
        for i in df['song']:
            l2.append(df_merge['song_id'].loc[df_merge['song'] == i]) 
        for i in range (len(l2)):
            for j in l2[i]:
                suggestions.append(j)
        suggestions = list(dict.fromkeys(suggestions))
        #print(suggestions)
        if len(suggestions) == 0:
            #print("The current user has no songs for training the item similarity based recommendation model.")
            return -1
        elif(new_song_id != None):
            suggestions.append(new_song_id) 
            # new_song_id = songs['song_id'].loc[songs['Title'] == new_song]
        
            #for i in new_song_id:
                #suggestions.append(i)
            return suggestions
        else:
            return suggestions


        # Use the item similarity based recommender system model to make recommendations test set
    def recommend_songs(self, user_id, new_song_id = None): #call this
        df_merge = self.merge_data(user_listen, songs)
        user_songs = self.get_user_items(user_id)    
        # print("No. of unique songs for the user: %d" % len(user_songs))

        all_songs = self.get_all_items_train_data()

        # print("no. of unique songs in the training set: %d" % len(all_songs))

        cooccurence_matrix = self.construct_cooccurence_matrix(user_songs, all_songs)

        if (new_song_id != None):
            new_item = self.recommend_new_items(user_id, new_song_id)
            df_recommendations = self.generate_top_recommendations(user_id, all_songs, user_songs, new_item)
        else:
            df_recommendations = self.generate_top_recommendations(user_id, all_songs, user_songs)

        return df_recommendations






In [12]:
dpg = Dynamic_Palylist_Generation()
full_data = dpg.merge_data(user_listen, songs)

In [13]:
user_item_matrix = dpg.user_item_matrix(full_data)

In [14]:
user_item_matrix

user_id,10001,10002,10003,10004,10010,10014,10015,10019,10022,10023,...,10425,10426,10428,10429,10430,10434,10435,10436,10439,10440
song_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
307,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
308,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
309,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
310,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [27]:
full_data.loc[full_data['user_id'] == 10296]

Unnamed: 0,user_id,age_group,gender,profession,hours_spending,musical_aspect,song_id,Title,Artist,Artist_id,Album,Release Year,song,listened_song
295,10296,18 - 24,Male,University student,0 - 1,It varies,126,Niwaduwatath Man Dan Sankawen,Sangeeth Wijesuriya,31.0,,,Niwaduwatath Man Dan Sankawen - Sangeeth Wijes...,1
297,10296,18 - 24,Male,University student,0 - 1,It varies,125,Nirayase,WePlus,128.0,,,Nirayase - WePlus,1
298,10296,18 - 24,Male,University student,0 - 1,It varies,21,Baila Gamuda,Bathiya n Santhush,79.0,,,Baila Gamuda - Bathiya n Santhush,1


In [None]:
actual = [126, 125, 21]

In [8]:
dpg.KNN_model(full_data)

In [10]:
test

Unnamed: 0,user_id,age_group,gender,profession,hours_spending,musical_aspect,song_id,Title,Artist,Artist_id,Album,Release Year,song,listened_song
361,10361,18 - 24,Female,Student,0 - 1,The singer's voice,8,Adare sithum,Kasun Kalhara,5.0,,,Adare sithum - Kasun Kalhara,1
249,10250,25 - 34,Male,Software Developer,0 - 1,Music,211,Unuhuma 2-Aradhana,Tehan Perera,75.0,,,Unuhuma 2-Aradhana - Tehan Perera,1
271,10270,18 - 24,Female,Medical student,3 - 5,The message of the song,288,Prema sajjayanaya,Tharindu Damsara,47.0,,,Prema sajjayanaya - Tharindu Damsara,1
434,10435,18 - 24,Female,student,0 - 1,Music,300,Wala thiryen eha,T.M.Jayaratne,9.0,,,Wala thiryen eha - T.M.Jayaratne,1
397,10398,18 - 24,Female,Student,1 - 3,The message of the song,79,Kuweni,Ridma Weerawardena ft Dinupa Kodagoda,15.0,Galana Ganga,,Kuweni - Ridma Weerawardena ft Dinupa Kodagoda,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
380,10379,18 - 24,Female,Student,0 - 1,The singer's voice,14,Ananthayata Yana Para Dige,Kasun Kalhara,5.0,,2013.0,Ananthayata Yana Para Dige - Kasun Kalhara,1
213,10213,25 - 34,Male,Banker,1 - 3,The message of the song,136,Obage Mathaken,Shihan Mihiranga,62.0,,,Obage Mathaken - Shihan Mihiranga,1
134,10135,18 - 24,Female,Architecture student,2 - 3,The message of the song,105,Me Sanda Unath Paya Awith,Buddika Ushan,94.0,,,Me Sanda Unath Paya Awith - Buddika Ushan,1
49,10049,25 - 34,Male,Undergraduate,2 - 3,The singer's voice,102,Mayam Kalawe,Nadeemal Perera,30.0,,,Mayam Kalawe - Nadeemal Perera,1


In [7]:
dpg.DPG_recommendation(164,train) 

[232, 230, 234, 238, 236, 237, 235, 175, 257, 179]

In [5]:
dpg.baselineMethod()

[174, 79, 140, 175, 227, 127, 145, 26, 73, 196]

In [6]:
dpg.DPG_recommendation(164) 

[236, 231, 238, 239, 240, 235, 237, 175, 257, 179]

In [28]:
predicted = dpg.recommend_songs(user_id = 10296)

In [29]:
predicted

[253, 205, 299, 300, 301, 302, 303, 304, 305, 129]

In [20]:
# unique, sentences = dpg.get_sentences()
# print(unique)
# print(sentences)
# dpg.KNN_model(user_listen, songs) # run one time, once the database is updated run again to save the model
# dpg.DPG_recommendation(164) 
# # dpg.Fasttext_model() # run one time, once the database is updated run again to save the model
# dpg.recommend_songs(user_id = 10296, new_song_id = xxx) #pass the id of the newly added song to the database

[list(['Aa', 'Ra', 'Sulan']) list(['Aale', 'katha'])
 list(['Ada', 'Nam', 'Ma', 'Hada', 'Iwasum', 'Na'])
 list(['Ada', 'Thaniyen', 'Ma', 'Hadanne', 'Na', 'Ma'])
 list(['Adambarai', 'Baluwama', 'Nam'])
 list(['Adanne', 'Ay', 'Sudu', 'Manike'])
 list(['Adara', 'Mage', 'Jesuni']) list(['Adaraneeya', 'Neranjana'])
 list(['Adaraya', 'Ayai']) list(['Adaraye', 'Ulpatha', 'Wu', 'Amma'])
 list(['Adare', 'sithum']) list(['Adarema', 'Geethayak'])
 list(['Adaren', 'Lanwenna', 'Hithuwata'])
 list(['Aduru', 'kutiya', 'thula']) list(['Ae'])
 list(['Ahas', 'Gabe', 'Sura', 'Duwak'])
 list(['Ahas', 'Thale', 'Nagei', 'Ruwan']) list(['Ahasin', 'eha'])
 list(['Ahasin', 'polowata']) list(['Ai', 'Kale', 'Adare'])
 list(['Ai', 'kale', 'mulu', 'hadinma'])
 list(['Akeekaru', 'pem', 'kathawak']) list(['Alawanthakam'])
 list(['Alen', 'Ma']) list(['Alen', 'Wela', 'Ganna'])
 list(['Amma', 'Adare', 'Uthura', 'Hinahuna'])
 list(['Amma', 'Budu', 'Wewa'])
 list(['Amma', 'Mathu', 'Buduwana', 'Amma']) list(['Amma', 'Sand

In [27]:
# dpg.generate_similars('Saragee Asille')

['Saragaye', 'Niya', 'Rata', 'Mawanawa']
Saragaye Niya Rata Mawanawa
['Sandawathiya', 'Obai']
Sandawathiya Obai
['Mathakaida', 'ada', 'wage']
Mathakaida ada wage
['Oba', 'apple', 'malak', 'wage']
Oba apple malak wage
['Hanthanata', 'Payana', 'Sanda']
Hanthanata Payana Sanda
['Maha', 'Warusawata', 'Pasuwa', 'Nagena', 'Sanda']
Maha Warusawata Pasuwa Nagena Sanda
['Me', 'Sanda', 'Unath', 'Paya', 'Awith']
Me Sanda Unath Paya Awith
['Paya', 'Ena', 'Sanda', 'Watha', 'Manaram']
Paya Ena Sanda Watha Manaram
['Sanda', 'Latha', 'Payala']
Sanda Latha Payala
['Sanda', 'Nawath', 'Kamak', 'Nathe']
Sanda Nawath Kamak Nathe
['Sanda', 'Thaniyama']
Sanda Thaniyama
['Sanda', 'pahan', 'raye']
Sanda pahan raye
['Oba', 'Ha', 'Mema', 'Athinath', 'Aran']
Oba Ha Mema Athinath Aran
['Amma', 'Budu', 'Wewa']
Amma Budu Wewa
['Kaasi']
Kaasi
['Sepalikawo']
Sepalikawo
['Baila', 'Gamuda']
Baila Gamuda
['Sihina', 'Lowe', 'Maya', 'Wethire']
Sihina Lowe Maya Wethire


['Saragaye Niya Rata Mawanawa',
 'Sandawathiya Obai',
 'Mathakaida ada wage',
 'Oba apple malak wage',
 'Hanthanata Payana Sanda',
 'Maha Warusawata Pasuwa Nagena Sanda',
 'Me Sanda Unath Paya Awith',
 'Paya Ena Sanda Watha Manaram',
 'Sanda Latha Payala',
 'Sanda Nawath Kamak Nathe',
 'Sanda Thaniyama',
 'Sanda pahan raye',
 'Oba Ha Mema Athinath Aran',
 'Amma Budu Wewa',
 'Kaasi',
 'Sepalikawo',
 'Baila Gamuda',
 'Sihina Lowe Maya Wethire']

In [32]:
# dpg.recommend_new_items(user_id = 10296, new_song = 'Saragee Asille')

'Saragee Asille'

In [5]:
# https://medium.com/cisco-emerge/creating-semantic-representations-of-out-of-vocabulary-words-for-common-nlp-tasks-842dbdafba18
# https://towardsdatascience.com/fasttext-under-the-hood-11efc57b2b3
# https://pathmind.com/wiki/word2vec
# https://github.com/manasRK/word2vec-recommender/blob/master/loadReviewModel.py
# https://medium.com/building-creative-market/word2vec-inspired-recommendations-in-production-f2c6a6b5b0bf
# https://arxiv.org/pdf/1601.01356.pdf
# https://towardsdatascience.com/word2vec-for-phrases-learning-embeddings-for-more-than-one-word-727b6cf723cf
# https://www.geeksforgeeks.org/python-word-embedding-using-word2vec/
# https://towardsdatascience.com/a-beginners-guide-to-word-embedding-with-gensim-word2vec-model-5970fa56cc92?#702d
# https://towardsdatascience.com/using-word2vec-for-music-recommendations-bb9649ac2484
# https://github.com/YIZHE12/music_recom/blob/master/music_recommendation_binary.ipynb
# https://towardsdatascience.com/using-word2vec-to-analyze-news-headlines-and-predict-article-success-cdeda5f14751
# https://machinelearningmastery.com/develop-word-embeddings-python-gensim/
# https://www.analyticsvidhya.com/blog/2019/07/how-to-build-recommendation-system-word2vec-python/

In [56]:
# s = songs['song_id'].loc[songs['Title'] == 'Aa Ra Sulan']
# s

0    2
Name: song_id, dtype: int64

In [None]:
f