In [1]:
import pandas
import numpy as np
import tensorflow as tf
from sklearn.cross_validation import train_test_split



### Loading Datasets
A Million Song Dataset (MSD) is used for this project

In [2]:
# =========================================
# Data Loading
# Read userid-songid-listen_count triplets
# =========================================

# In case want to take the dataset from internet
# This step might take time to download data from external sources
# triplets_file = 'https://static.turi.com/datasets/millionsong/10000.txt'
# songs_metadata_file = 'https://static.turi.com/datasets/millionsong/song_data.csv'

triplets_file = './dataset/10000.txt'
songs_metadata_file = './dataset/song_data.csv'

try: 
    song_df = pandas.read_table(triplets_file, header=None)
    metadata_df = pandas.read_csv(songs_metadata_file)
except:
    song_df = pandas.read_table('https://static.turi.com/datasets/millionsong/10000.txt', header=None)
    metadata_df = pandas.read_csv('https://static.turi.com/datasets/millionsong/song_data.csv')

song_df.columns=['user_id', 'song_id', 'listen_count']

dataset_df = pandas.merge(song_df, metadata_df.drop_duplicates(['song_id']), on='song_id', how='left')

# taking only 10000 dataset
data_df = dataset_df.head(10000)

# =========================================
# Data Pre-processing
# =========================================
# Merging unnecessary columns (song title - Artist)
data_df['song'] = data_df['title'].map(str) + ' - ' + data_df['artist_name']
data_df = data_df.drop(['title', 'release', 'artist_name', 'year'], axis=1)
users = data_df['user_id'].unique()
songs = data_df['song_id'].unique()
print('Number of unique users: ' + str(len(users)))
print('Number of unique songs:' + str(len(songs)))
print(data_df.head(10))

# =========================================
# Train Data Preparation
# =========================================
train_data, test_data = train_test_split(data_df, test_size = 0.20, random_state=0)

Number of unique users: 365
Number of unique songs:5175
                                    user_id             song_id  listen_count  \
0  b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOAKIMP12A8C130995             1   
1  b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOBBMDR12A8C13253B             2   
2  b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOBXHDL12A81C204C0             1   
3  b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOBYHAJ12A6701BF1D             1   
4  b80344d063b5ccb3212f76538f3d9e43d87dca9e  SODACBL12A8C13C273             1   
5  b80344d063b5ccb3212f76538f3d9e43d87dca9e  SODDNQT12A6D4F5F7E             5   
6  b80344d063b5ccb3212f76538f3d9e43d87dca9e  SODXRTY12AB0180F3B             1   
7  b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOFGUAY12AB017B0A8             1   
8  b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOFRQTD12A81C233C0             1   
9  b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOHQWYZ12A6D4FA701             1   

                                                song

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


### Popularity Based Recommender
This recommendation is based on the popularity of the item (listen_count in this case).

In [3]:
import Recommender.Popularity as Recommender_Popularity

Recommender0 = Recommender_Popularity.Popularity()
Recommender0.create(train_data, 'song', 'listen_count')
Recommender0.recommend(12)

Unnamed: 0,song,listen_count
0,You're The One - Dwight Yoakam,399
1,Waiting For A Dream - Rufus Wainwright,247
2,Undo - Björk,151
3,Times - Tenth Avenue North,140
4,Catch You Baby (Steve Pitron & Max Sanna Radio...,137
5,Revelry - Kings Of Leon,136
6,Fools - The Dodos,128
7,Sehr kosmisch - Harmonia,115
8,Secrets - OneRepublic,77
9,Numb (Album Version) - Disturbed,77


### Collaborative Filtering Recommender
This section is using Collaborative Filtering to recommend song to a user.

#### Item - Item Collaborative Filtering using Cooccurence Matrix

This method is achieved by constructing coocurence matrix of the (user_songs X all_songs). In this example, the recommendations are the songs that are outside of his listening list (songs that he has already listened to).

In [4]:
import Recommender.CF_Item_Item as Recommender_CF_II

# using Item-item Collaborative Filtering
Recommender1 = Recommender_CF_II.CF_Item_Item()
Recommender1.create(data_df, 'user_id', 'song',)

user_id = users[5]
user_items = Recommender1.getUserItems(user_id)
print("------------------------------------------------------------------------------------")
print("Song that has been played by the user userid: %s:" % user_id)
print("------------------------------------------------------------------------------------")
for item in user_items:
    print(item)
    
print("----------------------------------------------------------------------")
print("Recommendation process going on:")
print("----------------------------------------------------------------------")

Recommender1.recommend(user_id, 10)

------------------------------------------------------------------------------------
Song that has been played by the user userid: 4bd88bfb25263a75bbdd467e74018f4ae570e5df:
------------------------------------------------------------------------------------
Somebody To Love - Justin Bieber
Ghosts 'n' Stuff (Original Instrumental Mix) - Deadmau5
My Dad's Gone Crazy - Eminem / Hailie Jade
Missing You - John Waite
Ya Nada Queda - Kudai
The Real Slim Shady - Eminem
Forgive Me - Leona Lewis
Say My Name - Destiny's Child
Just Lose It - Eminem
16 Candles - The Crests
Without Me - Eminem
Push It - Salt-N-Pepa
Speechless - Lady GaGa
----------------------------------------------------------------------
Recommendation process going on:
----------------------------------------------------------------------
No. of unique items (songs) for the user: 13
No. of unique items (songs) in the training set: 5151
Non zero values in cooccurence_matrix :2813


Unnamed: 0,user_id,song,score,rank
0,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Superman - Eminem / Dina Rae,0.096418,1
1,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Mockingbird - Eminem,0.08991,2
2,4bd88bfb25263a75bbdd467e74018f4ae570e5df,U Smile - Justin Bieber,0.061363,3
3,4bd88bfb25263a75bbdd467e74018f4ae570e5df,I'm Back - Eminem,0.057932,4
4,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Here Without You - 3 Doors Down,0.055542,5
5,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Teach Me How To Dougie - California Swag District,0.054701,6
6,4bd88bfb25263a75bbdd467e74018f4ae570e5df,American Idiot [feat. Green Day & The Cast Of ...,0.054508,7
7,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Monster - Lady GaGa,0.052736,8
8,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Hellbound - J-Black & Masta Ace,0.052564,9
9,4bd88bfb25263a75bbdd467e74018f4ae570e5df,You Found Me (Album Version) - The Fray,0.052564,10


#### Collaborative Filtering using k-NearestNeighbors


In [5]:
import Recommender.CF_kNN as Recommender_CF_kNN

Recommender2 = Recommender_CF_kNN.CF_kNN()
Recommender2.create(data_df, 'user_id', 'song', 'listen_count')

print("\n----------------------------------------------------------------------")
print("Recommendation with random song as a reference")
print("Recommendation process going on:")
print("----------------------------------------------------------------------")

Recommender2.recommend(user_id, 10)

training...

----------------------------------------------------------------------
Recommendation with random song as a reference
Recommendation process going on:
----------------------------------------------------------------------
Recommendations for The Joker - Fatboy Slim:





Unnamed: 0,user_id,song,distance,rank
0,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Parks - Four Tet,0.0,1
1,4bd88bfb25263a75bbdd467e74018f4ae570e5df,I Feel For You - Stefan Schrom,0.0,2
2,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Comet Course - Flying Lotus,0.0,3
3,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Playboy - Hot Chip,0.0,4
4,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Plastic People - Four Tet,0.0,5
5,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Church - T-Pain featuring Teddy Verseti,0.0,6
6,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Phantom - Justice,0.0,7
7,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Paul's Birthday - Caribou (formerly Dan Snaith...,0.0,8
8,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Puppetmad - Puppetmastaz,0.0,9
9,4bd88bfb25263a75bbdd467e74018f4ae570e5df,This Unfolds - Four Tet,0.0,10


In [6]:
print("\n----------------------------------------------------------------------")
print("Recommendation with query song as a reference")
print("Recommendation process going on:")
print("----------------------------------------------------------------------")

Recommender2.recommend(user_id, 10, item='Somebody To Love - Justin Bieber')


----------------------------------------------------------------------
Recommendation with query song as a reference
Recommendation process going on:
----------------------------------------------------------------------
fuzzy ratio query results:
[('One Time - Justin Bieber', 75, 3161), ('Somebody To Love - Justin Bieber', 100, 3868), ('Somebody To Love - Queen', 79, 3869)]

Recommendations for Somebody To Love - Justin Bieber:



Unnamed: 0,user_id,song,distance,rank
0,4bd88bfb25263a75bbdd467e74018f4ae570e5df,U Smile - Justin Bieber,0.364605,1
1,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Teach Me How To Dougie - California Swag District,0.487478,2
2,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Girls Just Wanna Have Fun - Miley Cyrus,0.527134,3
3,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Be Somebody - Kings Of Leon,0.527134,4
4,4bd88bfb25263a75bbdd467e74018f4ae570e5df,My Dad's Gone Crazy - Eminem / Hailie Jade,0.533568,5
5,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Push It - Salt-N-Pepa,0.541086,6
6,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Dream On - Aerosmith,0.5514,7
7,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Without Me - Eminem,0.564752,8
8,4bd88bfb25263a75bbdd467e74018f4ae570e5df,All The Right Moves - OneRepublic,0.565315,9
9,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Time To Pretend - MGMT,0.565645,10


#### Collaborative Filtering using Matrix Factorization (SVD - Singular Value Decomposition)

In [7]:
import Recommender.CF_Matrix_Factorization as Recommender_CF_MF

Recommender3 = Recommender_CF_MF.SVD()
Recommender3.create(train_data, 'user_id', 'song', 'listen_count')
Recommender3.recommend(10, item='Time To Pretend - MGMT')

Int64Index([3963], dtype='int64')


Unnamed: 0,song,corr
3963,Time To Pretend - MGMT,1.0
875,Dental Care - Owl City,0.996354
4255,What You Know - Two Door Cinema Club,0.995999
2499,My Happy Ending - Avril Lavigne,0.995539
4436,You'd Be So Nice To Come Home To - Julie London,0.995333
2117,Lean Back [Remix feat. Lil Jon_ Eminem_ Mase &...,0.995302
1464,Half Alive - Secondhand Serenade,0.995302
1561,Hero (Album Version) - Skillet,0.995302
483,Bottom of a Bottle (Explicit Album Version) - ...,0.995302
1158,Falling In Love Again - Eagle-Eye Cherry,0.995302


In [8]:
song_df2 = pandas.read_csv('./dataset/MSD_songs.csv', sep='\t')
song_df2 = song_df2.drop(['dig7_id', 'release', 'artist_name', 'year'], axis=1)
tag_df2 = pandas.read_csv('./dataset/LAST_FM_tags.csv', sep='\t')
tag_df2 = pandas.merge(tag_df2,song_df2, on='track_id', how='inner')
print(song_df2.head())
print(tag_df2.head())

             track_id             song_id  \
0  TRAHHBV128F930B736  SOHWZOW12AB017EE95   
1  TRAHHSV128F42374E3  SOQBKAT12A81C20661   
2  TRAHHJY12903CA73BD  SOEKMUY12AB018CB47   
3  TRAHHPE128F934AC3B  SOFGBOY12AB018549F   
4  TRAHHAY12903CD8B6F  SOXHLIO12AB0185C6F   

                                      title  
0                        Spirit and Machine  
1  I Only Have Eyes For You (Album Version)  
2                             Porcelain Man  
3                     (Wake Up) Time to Die  
4                             Easier By Now  
             track_id             tags             song_id  \
0  TRAHHSV128F42374E3    american_idol  SOQBKAT12A81C20661   
1  TRAHHSV128F42374E3  only_eyes_for_u  SOQBKAT12A81C20661   
2  TRAHHMV12903CD0B0D             sexy  SOKEFDU12AB0187D54   
3  TRAHHMV12903CD0B0D              pop  SOKEFDU12AB0187D54   
4  TRAHHMV12903CD0B0D            dance  SOKEFDU12AB0187D54   

                                      title  
0  I Only Have Eyes For You (Album

### Content-Based Music Recommender System

In [11]:
import Recommender.DeepContent.DeepContent as RecommenderDeepContent
Recommender5 = RecommenderDeepContent.DeepContent(song_df2, tag_df2, song_df)
Recommender5.train()
query = 'I Only Have Eyes For You (Album Version)'
Recommender5.song_df[Recommender5.song_df['title'] == query].head()

Unnamed: 0,track_id,song_id,title,tags
0,TRAHHSV128F42374E3,SOQBKAT12A81C20661,I Only Have Eyes For You (Album Version),american_idol only_eyes_for_u


In [12]:
Recommender5.recommendFromQuery(query,10)

Unnamed: 0,track_id,song_id,title,tags
0,TRBAZFD12903CB641D,SOUEYVF12A58A7B70F,Hold Up My Heart,brooke_white pop american_idol female_vocalist...
1,TRAFOKY128F92ED751,SORCRQT12A8C142A5A,I Did it for You,rock alternative_rock i_did_it_for_you david_c...
2,TRAYJOF128F92F9EEA,SOAYXCJ12AB0183822,Cry,female_vocalists pop pop_rock heartbreaking ro...
3,TRACAKD128F92D5D38,SOYMPFK12A8C141D1A,My Life Would Suck Without You,pop female_vocalists kelly_clarkson pop_rock r...
4,TRALMBM128F92EAA15,SOXIZCU12A8C1440A1,Never Again,pop female_vocalists rock kelly_clarkson pop_r...
5,TRAHHUN128F4227029,SOUWPSC12A81C2193E,Jody,blues_rock guitar_blues
6,TRAHHMV12903CD0B0D,SOKEFDU12AB0187D54,PDA,sexy pop dance backstreet_boys male_vocalists ...
7,TRAHHMM128F932D5D9,SOZVXUQ12AB01832CC,Turntable Terrorist,turntable_terrorist breakcore
8,TRAHHVA128F424ED80,SOQOPQU12A8C134960,Dept. Of Youth,metal rock indie hard_rock heavy_metal glam_rock
9,TRAHGXT12903CF1566,SOGVPUQ12AC468BCA8,Humppa,herrlich stuff_to_go freshe_piccoz
