In [1]:
import pandas
import numpy as np
import tensorflow as tf
from sklearn.cross_validation import train_test_split
import CF_Item_Item as Recommender_CF_II
# import CF_User_Item as Recommender_CF_UI 
# import CF_kNN as Recommender_CF_kNN

import Recommender_Try as Recommenders



### Loading Datasets
A Million Song Dataset (MSD) is used for this project

In [2]:
# =========================================
# Data Loading
# Read userid-songid-listen_count triplets
# =========================================

# In case want to take the dataset from internet
# This step might take time to download data from external sources
# triplets_file = 'https://static.turi.com/datasets/millionsong/10000.txt'
# songs_metadata_file = 'https://static.turi.com/datasets/millionsong/song_data.csv'

triplets_file = './dataset/10000.txt'
songs_metadata_file = './dataset/song_data.csv'

try: 
    song_df = pandas.read_table(triplets_file, header=None)
    metadata_df = pandas.read_csv(songs_metadata_file)
except:
    song_df = pandas.read_table('https://static.turi.com/datasets/millionsong/10000.txt', header=None)
    metadata_df = pandas.read_csv('https://static.turi.com/datasets/millionsong/song_data.csv')

song_df.columns=['user_id', 'song_id', 'listen_count']

dataset_df = pandas.merge(song_df, metadata_df.drop_duplicates(['song_id']), on='song_id', how='left')

# taking only 10000 dataset
data_df = dataset_df.head(10000)

# =========================================
# Data Pre-processing
# =========================================
# Merging unnecessary columns (song title - Artist)
data_df['song'] = data_df['title'].map(str) + ' - ' + data_df['artist_name']
data_df = data_df.drop(['title', 'release', 'artist_name', 'year'], axis=1)
users = data_df['user_id'].unique()
songs = data_df['song_id'].unique()
print('Number of unique users: ' + str(len(users)))
print('Number of unique songs:' + str(len(songs)))
print(data_df.head(10))

# =========================================
# Train Data Preparation
# =========================================
train_data, test_data = train_test_split(data_df, test_size = 0.20, random_state=0)

Number of unique users: 365
Number of unique songs:5175
                                    user_id             song_id  listen_count  \
0  b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOAKIMP12A8C130995             1   
1  b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOBBMDR12A8C13253B             2   
2  b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOBXHDL12A81C204C0             1   
3  b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOBYHAJ12A6701BF1D             1   
4  b80344d063b5ccb3212f76538f3d9e43d87dca9e  SODACBL12A8C13C273             1   
5  b80344d063b5ccb3212f76538f3d9e43d87dca9e  SODDNQT12A6D4F5F7E             5   
6  b80344d063b5ccb3212f76538f3d9e43d87dca9e  SODXRTY12AB0180F3B             1   
7  b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOFGUAY12AB017B0A8             1   
8  b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOFRQTD12A81C233C0             1   
9  b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOHQWYZ12A6D4FA701             1   

                                                song

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


### Collaborative Filtering Recommender
This section is using Collaborative Filtering to recommend song to a user.

#### Item - Item Collaborative Filtering using Coocurence Matrix

This method is achieved by constructing coocurence matrix of the (user_songs X all_songs). In this example, the recommendations are the songs that are outside of his listening list (songs that he has already listened to).

In [3]:
# using Item-item Collaborative Filtering
Recommender1 = Recommender_CF_II.CF_Item_Item()
Recommender1.create(data_df, 'user_id', 'song',)

user_id = users[5]
user_items = Recommender1.getUserItems(user_id)
print("------------------------------------------------------------------------------------")
print("Song that has been played by the user userid: %s:" % user_id)
print("------------------------------------------------------------------------------------")
for item in user_items:
    print(item)
    
print("----------------------------------------------------------------------")
print("Recommendation process going on:")
print("----------------------------------------------------------------------")

Recommender1.recommend(user_id, 10)

------------------------------------------------------------------------------------
Song that has been played by the user userid: 4bd88bfb25263a75bbdd467e74018f4ae570e5df:
------------------------------------------------------------------------------------
Somebody To Love - Justin Bieber
Ghosts 'n' Stuff (Original Instrumental Mix) - Deadmau5
My Dad's Gone Crazy - Eminem / Hailie Jade
Missing You - John Waite
Ya Nada Queda - Kudai
The Real Slim Shady - Eminem
Forgive Me - Leona Lewis
Say My Name - Destiny's Child
Just Lose It - Eminem
16 Candles - The Crests
Without Me - Eminem
Push It - Salt-N-Pepa
Speechless - Lady GaGa
----------------------------------------------------------------------
Recommendation process going on:
----------------------------------------------------------------------
No. of unique items (songs) for the user: 13
no. of unique items (songs) in the training set: 5151
Non zero values in cooccurence_matrix :2813


Unnamed: 0,user_id,song,score,rank
0,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Superman - Eminem / Dina Rae,0.096418,1
1,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Mockingbird - Eminem,0.08991,2
2,4bd88bfb25263a75bbdd467e74018f4ae570e5df,U Smile - Justin Bieber,0.061363,3
3,4bd88bfb25263a75bbdd467e74018f4ae570e5df,I'm Back - Eminem,0.057932,4
4,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Here Without You - 3 Doors Down,0.055542,5
5,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Teach Me How To Dougie - California Swag District,0.054701,6
6,4bd88bfb25263a75bbdd467e74018f4ae570e5df,American Idiot [feat. Green Day & The Cast Of ...,0.054508,7
7,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Monster - Lady GaGa,0.052736,8
8,4bd88bfb25263a75bbdd467e74018f4ae570e5df,Hellbound - J-Black & Masta Ace,0.052564,9
9,4bd88bfb25263a75bbdd467e74018f4ae570e5df,You Found Me (Album Version) - The Fray,0.052564,10
