# Building a song recommender


# Fire up GraphLab Create

In [None]:
import graphlab

# Load music data

In [None]:
song_data = graphlab.SFrame('song_data.gl/')

# Explore data

Music data shows how many times a user listened to a song, as well as the details of the song.

In [None]:
song_data.head()

##Showing the most popular songs in the dataset

In [None]:
graphlab.canvas.set_target('ipynb')

In [None]:
song_data['song'].show()

In [None]:
len(song_data)

## Count number of unique users in the dataset

In [None]:
users = song_data['user_id'].unique()

In [None]:
len(users)

# Create a song recommender

In [None]:
train_data,test_data = song_data.random_split(.8,seed=0)

## Simple popularity-based recommender

In [None]:
popularity_model = graphlab.popularity_recommender.create(train_data,
                                                         user_id='user_id',
                                                         item_id='song')

### Use the popularity model to make some predictions

A popularity model makes the same prediction for all users, so provides no personalization.

In [None]:
popularity_model.recommend(users=[users[0]])

In [None]:
popularity_model.recommend(users=[users[1]])

## Build a song recommender with personalization

We now create a model that allows us to make personalized recommendations to each user. 

In [None]:
personalized_model = graphlab.item_similarity_recommender.create(train_data,
                                                                user_id='user_id',
                                                                item_id='song')

### Applying the personalized model to make song recommendations

As you can see, different users get different recommendations now.

In [None]:
personalized_model.recommend(users=[users[0]])

In [None]:
personalized_model.recommend(users=[users[1]])

### We can also apply the model to find similar songs to any song in the dataset

In [None]:
personalized_model.get_similar_items(['With Or Without You - U2'])

In [None]:
personalized_model.get_similar_items(['Chan Chan (Live) - Buena Vista Social Club'])

# Quantitative comparison between the models

We now formally compare the popularity and the personalized models using precision-recall curves. 

In [None]:
if graphlab.version[:3] >= "1.6":
    model_performance = graphlab.compare(test_data, [popularity_model, personalized_model], user_sample=0.05)
    graphlab.show_comparison(model_performance,[popularity_model, personalized_model])
else:
    %matplotlib inline
    model_performance = graphlab.recommender.util.compare_models(test_data, [popularity_model, personalized_model], user_sample=.05)

The curve shows that the personalized model provides much better performance. 

# 1



## Compute the number of unique users who have listened to songs by various artists

In [None]:
unique_users_kanye = song_data[song_data['artist']=='Kanye West']['user_id'].unique()
unique_users_foo = song_data[song_data['artist']=='Foo Fighters']['user_id'].unique()
unique_users_taylor = song_data[song_data['artist']=='Taylor Swift']['user_id'].unique()
unique_users_gaga = song_data[song_data['artist']=='Lady GaGa']['user_id'].unique()

In [None]:
print('Kanye West ' + str(len(unique_users_kanye)))
print('Foo Fighters ' + str(len(unique_users_foo)))
print('Taylor Swift ' + str(len(unique_users_taylor)))
print('Lady Gaga ' + str(len(unique_users_gaga)))

# 2

## Using groupby-aggregate to find the most popular and least popular artist:

Each row of song_data contains the number of times a user listened to particular song by a particular artist. If we would like to know how many times any song by 'Kanye West' was listened to, we need to select all the rows where ‘artist’=='Kanye West' and sum the ‘listen_count’ column. If we would like to find the most popular artist, we would need to follow this procedure for each artist, which would be very slow. Instead, you will learn about a very important method:

In [None]:
song_data_ = song_data.groupby(key_columns='artist', operations={'total_count': graphlab.aggregate.SUM('listen_count')})

In [None]:
print(song_data_.sort('total_count',ascending=False)[0])
print(song_data_.sort('total_count',ascending=True)[0])

# 3

## Using groupby-aggregate to find the most recommended songs

In [None]:
subset_test_users = test_data['user_id'].unique()[0:10000]


In [None]:
recommendations = personalized_model.recommend(subset_test_users,k=1)

In [None]:
grouped_by = recommendations.groupby(key_columns='song', operations={'total_count': graphlab.aggregate.COUNT('song')})

In [None]:
print(grouped_by.sort('total_count',ascending=False)[0])