# Build a song recommender system

In [None]:
import turicreate

# Load some music data

In [None]:
song_data = turicreate.SFrame('./song_data.sframe/')

# Explore our data

In [None]:
song_data

## Show the most popular songs in the dataset

In [None]:
song_data['song'].show()

# Count the number of unique users in the data

In [None]:
users = song_data['user_id'].unique()

In [None]:
len(users)

# Create a song recommender

In [None]:
train_data,test_data = song_data.random_split(.8,seed=0)

## Create a very simple popularity recommender

In [None]:
popularity_model = turicreate.popularity_recommender.create(train_data,
                                                           user_id = 'user_id',
                                                           item_id = 'song')

## Use the popularity model to make some predictions

In [None]:
popularity_model.recommend(users=[users[0]])

In [None]:
popularity_model.recommend(users=[users[1]])

# Build a recommender with personalization

In [None]:
personalized_model = turicreate.item_similarity_recommender.create(train_data,
                                                                  user_id = 'user_id',
                                                                  item_id = 'song')

## Apply personalized model to make song recommendations

In [None]:
personalized_model.recommend(users=[users[0]])

In [None]:
personalized_model.recommend(users=[users[1]])

# Apply model to find similar songs in the data set

In [None]:
personalized_model.get_similar_items(['With Or Without You - U2'])

In [None]:
personalized_model.get_similar_items(['Chan Chan (Live) - Buena Vista Social Club'])

# Compare the models quantitatively
We now formally compare the popularity and the personalized models using precision-recall curves. 

In [None]:
model_performance = turicreate.recommender.util.compare_models(test_data, [popularity_model, personalized_model], user_sample=.05)

The table shows that the personalized model provides much better performance.

# Homework

## Counting unique users

In [None]:
def get_unique_artist_listeners(artist):
  return len(song_data[song_data['artist'] == artist]['user_id'].unique())

In [None]:
kanye_west_listerens_count = get_unique_artist_listeners('Kanye West')
foo_fighters_listeners_count = get_unique_artist_listeners('Foo Fighters')
taylor_swift_listeners_count = get_unique_artist_listeners('Taylor Swift')
lady_gaga_listeners_count = get_unique_artist_listeners('Lady GaGa')

print("Kanye West:", kanye_west_listerens_count)
print("Foo Fighters:", foo_fighters_listeners_count)
print("Taylor Swift:", taylor_swift_listeners_count)
print("Lady GaGa:", lady_gaga_listeners_count)


## Using groupby-aggregate to find the most popular and least popular artist

In [None]:
total_listen_count_data = song_data.groupby(key_column_names='artist', 
                  operations={'total_count': turicreate.aggregate.SUM('listen_count')}
                  )

In [None]:
total_listen_count_data = total_listen_count_data.sort('total_count', ascending=False)

In [None]:
most_popular_artist = total_listen_count_data[0]['artist']
print("Most popular artist:", most_popular_artist)

In [None]:
least_popular_artist = total_listen_count_data[-1]['artist']
print("Least popular artist:", least_popular_artist)

## [OPTIONAL] Using groupby-aggregate to find the most recommended songs

In [None]:
subset_test_users = test_data['user_id'].unique()[0:10000]

In [None]:
recommendations = personalized_model.recommend(subset_test_users, k=1)

In [None]:
recommendations_by_song = recommendations.groupby(key_column_names='song', operations={'count': turicreate.aggregate.COUNT()})

In [None]:
recommendations_by_song = recommendations_by_song.sort('count', ascending=False)

In [None]:
most_recommended_song = recommendations_by_song[0]['song']
print('Most recommended song:', most_recommended_song)

In [None]:
least_recommended_song = recommendations_by_song[-1]['song']
print('Least recommended song:', least_recommended_song)