# Recommender system

Build a content base recommender system with topic name, top words in topic and album name.

In [16]:
import pandas as pd
import pickle
from gensim import corpora, models, similarities, matutils
import seaborn as sns
import re
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
def load(clf_file):
    pickle_in = open(clf_file,"rb")
    clf = pickle.load(pickle_in)
    return clf

data = load('song_topic.pkl')

In [3]:
data

Unnamed: 0,artist,album,track_title,lyric,tokenized,lemmatize,processed,topic,dictionary
0,Taylor Swift,1989,All You Had to Do Was Stay,People like you always want back The love they...,"[People, like, you, always, want, back, The, l...","[People, like, you, always, want, back, The, l...",People want love gave people wan na believe 'v...,feeling bad,"[people, want, love, gave, people, wan, believ..."
1,Taylor Swift,1989,Bad Blood,"'Cause baby, now we've got bad blood You know ...","['Cause, baby, ,, now, we, 've, got, bad, bloo...","['Cause, baby, ,, now, we, 've, got, bad, bloo...",baby 've got bad blood know mad love look 've ...,dancing,"[baby, got, bad, blood, know, mad, love, look,..."
2,Taylor Swift,1989,Blank Space,"Nice to meet you, where you been? I could show...","[Nice, to, meet, you, ,, where, you, been, ?, ...","[Nice, to, meet, you, ,, where, you, been, ?, ...",Nice meet incredible thing Magic madness sin S...,beautiful love,"[nice, meet, incredible, thing, magic, madness..."
3,Taylor Swift,1989,Clean,The drought was the very worst When the flower...,"[The, drought, was, the, very, worst, When, th...","[The, drought, wa, the, very, worst, When, the...",drought wa worst flower grown died thirst wa m...,play games,"[drought, worst, flower, grown, died, thirst, ..."
4,Taylor Swift,1989,How You Get The Girl,"Uh, uh, uh Uh, uh, uh Uh-uh, uh-uh Stand there...","[Uh, ,, uh, ,, uh, Uh, ,, uh, ,, uh, Uh-uh, ,,...","[Uh, ,, uh, ,, uh, Uh, ,, uh, ,, uh, Uh-uh, ,,...",Uh uh Uh uh Uh-uh uh-uh Stand ghost Shaking ra...,feeling bad,"[stand, ghost, shaking, rain, rain, open, door..."
...,...,...,...,...,...,...,...,...,...
89,Taylor Swift,reputation,King of My Heart,"I'm perfectly fine, I live on my own I made up...","[I, 'm, perfectly, fine, ,, I, live, on, my, o...","[I, 'm, perfectly, fine, ,, I, live, on, my, o...",'m fine live mind 'm better bein met week try ...,reflecting,"[fine, live, mind, better, bein, met, week, tr..."
90,Taylor Swift,reputation,Look What You Made Me Do,I don't like your little games Don't like your...,"[I, do, n't, like, your, little, games, Do, n'...","[I, do, n't, like, your, little, game, Do, n't...",like little game Do like tilted stage role pla...,trouble,"[like, little, game, like, tilted, stage, role..."
91,Taylor Swift,reputation,New Year's Day,There's glitter on the floor after the party G...,"[There, 's, glitter, on, the, floor, after, th...","[There, 's, glitter, on, the, floor, after, th...",'s glitter floor party Girls carrying shoe lob...,reflecting,"[glitter, floor, party, girls, carrying, shoe,..."
92,Taylor Swift,reputation,So It Goes...,"See you in the dark All eyes on you, my magici...","[See, you, in, the, dark, All, eyes, on, you, ...","[See, you, in, the, dark, All, eye, on, you, ,...",See dark All eye magician All eye u disappear ...,trouble,"[see, dark, eye, magician, eye, disappear, cut..."


In [4]:
lda = models.LdaModel.load('topic.model')

In [5]:
topic = lda.print_topics()

In [6]:
topic_map = {0: 'play games', 1: 'reflecting', 2: 'beautiful love', 3: 'dancing', 4: 'felling bad', 5: 'trouble'}

In [10]:
topic_word = []
for i in range(len(topic)):

    word_lst = re.findall(r'\b[a-zA-Z]+\b', topic[i][1])
    print(word_lst)
    word_str = ''
    for w in word_lst:
        word_str += ' '
        word_str += w
    topic_word.append(word_str)

['think', 'wan', 'end', 'hold', 'game', 'big', 'night', 'aah', 'gorgeous', 'day']
['know', 'time', 'stay', 'look', 'love', 'wish', 'got', 'better', 'want', 'think']
['said', 'know', 'time', 'delicate', 'love', 'want', 'dress', 'beautiful', 'take', 'mon']
['shake', 'new', 'love', 'gon', 'york', 'dancing', 'welcome', 'time', 'girl', 'hand']
['know', 'got', 'baby', 'thing', 'bad', 'feel', 'tell', 'come', 'night', 'said']
['ooh', 'want', 'wood', 'clear', 'time', 'trouble', 'mean', 'got', 'waiting', 'call']


In [11]:
# make the bag of words with album names and topic names
data['bag'] = data['album'] + ' ' + data['topic']

In [13]:
# also add top words in each topic to the 'bag'
for topic_ind in range(len(topic_word)):
    ind_lst = list(data[data['topic'] == topic_map[topic_ind]]['bag'].index)
    data.loc[ind_lst,'bag'] = data[data['topic'] == topic_map[topic_ind]]['bag'] + topic_word[topic_ind]
    

In [17]:
count = CountVectorizer()
count_matrix = count.fit_transform(data['bag'])

# generating consine similarity matrix
consine_sim = cosine_similarity(count_matrix,count_matrix)

In [18]:
indices = pd.Series(data.index)

In [20]:
# build a recommender system that allows user to input 1 song and recommend 3 similar songs
def recommendations(song, consine_sim):
    recommendations = []
    indx = indices[data['track_title'] == song].index[0]
    scores = pd.Series(consine_sim[indx]).sort_values(ascending = False)
    
    #getting the indexes of the 3 most similar songs
    ind = list(scores.iloc[:4].index)
    print(scores.iloc[0:4])
    for i in ind:
        if list(data['track_title'])[i] != song:
            recommendations.append(list(data['track_title'])[i])
    
    return recommendations

In [21]:
recommendations('Bad Blood',consine_sim)

1     1.000000
12    1.000000
9     1.000000
45    0.928571
dtype: float64


['Welcome to New York', 'Shake It Off', 'The Moment I Knew']