# Hybrid Recommender System
### combine content-based and collaborative filtering

In [1]:
import pandas as pd
import numpy as np
import pickle
from surprise import KNNWithMeans

In [2]:
# load cosine_matrix.pkl
with open('cosine_matrix.pkl', 'rb') as f:
    cosine_matrix = pickle.load(f)

In [3]:
# load knn.pkl
with open('knn_matrix.pkl', 'rb') as f:
    knn_matrix = pickle.load(f)
    

In [4]:
# load titles.pkl
with open('titles.pkl', 'rb') as f:
    titles = pickle.load(f)

#### weighted matrix

In [6]:
# change weights if desired
knn_sim_weight = 0.5
cos_sim_weight = 0.5

In [7]:
weighted_sim = (knn_sim_weight * knn_matrix) + (cos_sim_weight * cosine_matrix)

### Retrieving top recommendations

In [8]:
titles.head()

Unnamed: 0,track_id,name,artist
0,0,Mr. Brightside,The Killers
1,1,Wonderwall,Oasis
2,2,Take Me Out,Franz Ferdinand
3,3,Karma Police,Radiohead
4,4,Clocks,Coldplay


In [9]:
def get_track_id(artist, song):
    '''Get index from titles where artist and song match'''
    
    try:
        track_id = titles[(titles['artist'] == artist) & (titles['name'] == song)].index[0]
        return track_id
    except:
        print('Song not found in database')
        return None

In [10]:
# test
get_track_id('Oasis', 'Wonderwall')

1

In [11]:
def get_song(index):
    '''Get song artist and name at index'''
    try:
        row = titles.iloc[index]
        artist_song = row['artist'] + ' - ' + row['name']
        return artist_song
    except:
        print('Index not found in database')
        return None

In [12]:
# test
get_song(0)

'The Killers - Mr. Brightside'

In [13]:
def recommend_content(artist, title, sim_matrix):
    '''Get similar songs using the similarity matrix'''
    # get index for our song
    idx = get_track_id(artist, title)
    
    # get similarity scores of all songs w.r.t to our song
    sim_scores = list(enumerate(sim_matrix[idx]))
    
    # sort scores based on similarity
    sorted_sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # limit to 20 songs
    sorted_sim_scores = sorted_sim_scores[1:20]

    # get song artist and names
    content_similar_scores = []
    for i in sorted_sim_scores:
        content_similar_scores.append(get_song(i[0]))
    
    return content_similar_scores

In [35]:
# select 10 random rows from titles and display artist and name
titles.sample(10)[['artist', 'name']]

Unnamed: 0,artist,name
10428,Wintersun,Sleeping Stars
8544,Flying Lotus,SexSlaveShip
12652,Q-Tip,Johnny Is Dead
14703,Black Lips,Navajo
13130,Hailee Steinfeld,Capital Letters
7400,The Hives,Diabolic Scheme
25418,Dying Fetus,Institutions Of Deceit
21123,The Raveonettes,"Oh, I Buried You Today"
14421,36 Crazyfists,All I Am
27131,Mike Oldfield,Musica Universalis


In [36]:
# recommend_content test
recommend_content('Flying Lotus', 'SexSlaveShip', weighted_sim)

['Flying Lotus - Comet Course',
 'Flying Lotus - Parisian Goldfish',
 'Flying Lotus - GNG BNG',
 'Flying Lotus - Beginners Falafel',
 'RJD2 - The Horror',
 'Holy Fuck - Latin America',
 'Tricky - Nothing Matters',
 'Thievery Corporation - The Numbers Game',
 'Badmarsh & Shri - Gharana',
 'FLYamSAM - The Offbeat',
 'Black Moth Super Rainbow - Vietcaterpillar',
 'Morcheeba - Bullet Proof',
 'ill.Gates - Collateral Damage',
 'Panzer AG - Bereit',
 'Hird - Buddy Rich',
 'Little People - Gravitas',
 'Mr. Scruff - Ug',
 'Deltron 3030 - Memory Loss',
 'Massive Attack - Simple Rules']

In [27]:
# store weighted_sim in pickle
with open('weighted_sim.pkl', 'wb') as f:
    pickle.dump(weighted_sim, f)