# Music Album Recommender

### Import libraries and Data

In [1]:
import pandas as pd
import numpy as np
from scipy import sparse
from sklearn.metrics.pairwise import pairwise_distances, cosine_distances, cosine_similarity

In [2]:
music_df_full = pd.read_csv('./data/clean/amazon_music_reviews.csv')

In [3]:
music_df_full.drop(columns=['Unnamed: 0', 'level_0', 'index'], inplace=True)

In [4]:
music_df_full.shape

(4740778, 5)

In [5]:
music_df_full.head(1)

Unnamed: 0,customer_id,review_id,product_title,star_rating,review_body
0,10140119,R3LI5TRP3YIDQL,Whatever's for Us: Remastered,5.0,Love this CD along with other CDs by the same ...


In [6]:
music_df_subset = music_df_full[music_df_full.groupby(['product_title'])['review_body'].transform('count') >= 50]

In [7]:
# subset of full dataset for recommender
music_df = music_df_subset[['product_title', 'review_id', 'customer_id', 'star_rating', 'review_body']]
music_df = music_df.sample(n=100_000)

### Prepare recommender using cosine distances

In [8]:
#pivot table from the books_df dataframe
pivot_df = pd.pivot_table(music_df, index='product_title', columns='customer_id', values='star_rating')
#create sparse matrix
sparse_df = sparse.csr_matrix(pivot_df.fillna(0))
#calculate cosine distances for similarities
recommender = pairwise_distances(sparse_df, metric='cosine')
#export as a dataframe
rec_df = pd.DataFrame(recommender, columns=pivot_df.index, index=pivot_df.index)

In [9]:
# get average rating, number of players, playing time, etc to filter by in app, group by game ID
music_info = music_df_full.groupby('product_title').mean()
music_info.rename(columns={'star_rating': 'average_rating'}, inplace=True)
music_info.drop(columns=['customer_id'])

rec_df = pd.merge(left = rec_df, right= music_info, right_index = True, left_index = True)
rec_df.insert(0, 'album_name', rec_df.index)

### Define book Recommender function

In [10]:
def album_recommender(album_title):
    return 1- rec_df[album_title].sort_values()[1:6]

In [11]:
album_recommender("Room for Squares")

product_title
Love Me Back                                   0.152145
Calling All Hearts                             0.144338
Bangerz (Deluxe Version)                       0.095044
Love Hurts                                     0.085588
Richard Pryor - ... Is It Something I Said?    0.076035
Name: Room for Squares, dtype: float64

In [12]:
album_recommender("Random Access Memories")

product_title
Angles                              0.061167
The Definitive Collection [2 CD]    0.041338
Home For Christmas                  0.012950
Dystopia (Deluxe)                   0.003325
Pale Communion                      0.002871
Name: Random Access Memories, dtype: float64

In [13]:
# save rec_df for use in streamlit app
# rec_df.to_pickle('./streamlit_app/data/albums.pk1')