In [None]:
import graphlab

# Load Music Data

In [None]:
song_data = graphlab.SFrame('song_data.gl/')

# Explore Song Data

In [None]:
song_data.head()

In [None]:
graphlab.canvas.set_target('ipynb')

In [None]:
song_data['song'].show()

In [None]:
len(song_data)

## Count Number of Users

In [None]:
users = song_data['user_id'].unique()

In [None]:
len(users)

# Create a Song Recommender

In [None]:
train_data, test_data = song_data.random_split(.8,seed=0)

## Popularity Based Recommender

In [None]:
popularity_model = graphlab.popularity_recommender.create(train_data,user_id='user_id',
                                              item_id='song')

## Use the Popularity Model to Make Predictions

In [None]:
popularity_model.recommend(users=[users[0]])

In [None]:
popularity_model.recommend(users=[users[1]])

# Build a song recommender with personalization

In [None]:
personalized_model = graphlab.item_similarity_recommender.create(train_data,user_id='user_id',
                                              item_id='song')

## Applying the personalize recommender system

In [None]:
personalized_model.recommend(users=[users[0]])

In [None]:
personalized_model.recommend(users=[users[1]])

In [None]:
personalized_model.get_similar_items(['With Or Without You - U2'])

In [None]:
personalized_model.get_similar_items(['Chan Chan (Live) - Buena Vista Social Club'])

# Quantitative Comparison Between Models

In [None]:
model_performance = graphlab.compare(test_data, [popularity_model, personalized_model], user_sample=0.05)
graphlab.show_comparison(model_performance,[popularity_model, personalized_model])

## Unique Users

In [None]:
kw = song_data[song_data['artist'] == 'Kanye West']
kw_users = kw['user_id'].unique()
len(kw_users)

In [None]:
ff = song_data[song_data['artist'] == 'Foo Fighters']
len(ff['user_id'].unique())

In [None]:
a = song_data[song_data['artist'] == 'Taylor Swift']
len(a['user_id'].unique())

In [None]:
a = song_data[song_data['artist'] == 'Lady GaGa']
len(a['user_id'].unique())

## Group By

In [None]:
ssd = song_data.groupby(key_columns='artist', operations={'total_count': graphlab.aggregate.SUM('listen_count')}).sort('total_count', ascending = False)

In [None]:
ssd[0]
ssd[-1]

In [None]:
ssd.head()

In [None]:
ssd.tail()

## Most Recommended Songs

In [None]:
subset_test_users = test_data['user_id'].unique()[0:10000]

In [None]:
personalized_model.recommend(subset_test_users,k=1)

In [None]:
recs = personalized_model.recommend(subset_test_users,k=1)

In [None]:
len(recs)

In [None]:
recs.show()

In [None]:
rg = recs.groupby(key_columns='song', operations={'count': graphlab.aggregate.COUNT()}).sort('count', ascending = False)

In [None]:
rg.head()

In [None]:
rg.tail()