# Music Album Recommender

### Import libraries and Data

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import sparse
from sklearn.metrics.pairwise import pairwise_distances, cosine_distances, cosine_similarity

In [7]:
music_df_full = pd.read_csv('./data/clean/music_df.csv', index_col=0)

#drop duplicates
music_df_full.dropna(subset=['content'], inplace=True)
music_df_full.dropna(subset=['title'], inplace=True)

In [8]:
music_df_full.shape

(257449, 10)

In [9]:
music_df_full.head(1)

Unnamed: 0,user_id,amazon-id,content,score,summary,genre,title,artist,first-release-year,related
0,A1OFY4ATO7D13W,26197898,Buy this album. Now. Don't worry about the re...,5,PITCHFORK Eat your heart out!,Alternative Rock,Southern Fashion,The Sudden Passion,2012.0,


In [12]:
# subset of full dataset for recommender
music_df = music_df_full[['title', 'amazon-id', 'user_id', 'score']]

In [13]:
music_df

Unnamed: 0,title,amazon-id,user_id,score
0,Southern Fashion,0026197898,A1OFY4ATO7D13W,5
1,Southern Fashion,0026197898,A2KH83L1F70QR8,5
2,Meditation For Success: In Body & Mind Rejuven...,0615205399,A1KGXC7IRLVJR3,5
3,Meditation For Success: In Body & Mind Rejuven...,0615205399,A1BT6LQ9NY6RO3,5
4,Meditation For Success: In Body & Mind Rejuven...,0615205399,A206OKO2FE2IPL,5
...,...,...,...,...
263520,The Original Sounds of the Smokies: Volume One,B00LG9GR3S,A3RKUPYX1RC9WO,5
263521,The Original Sounds of the Smokies: Volume One,B00LG9GR3S,A122G17YDFX176,5
263522,The Original Sounds of the Smokies: Volume One,B00LG9GR3S,A26QVK35BBBKU8,5
263523,The Original Sounds of the Smokies: Volume One,B00LG9GR3S,A1V76VMZ0N3H5W,5


In [15]:
# #create a sample dataframe with less entries, sample 100,000 reviews
# music_df_subset = music_df.sample(n=30000, replace=False)
# music_df_subset.shape

In [17]:
#create a sample dataframe with less entries, only show albums with 5 or more reviews
music_df_subset = music_df_full.groupby('title').count()
music_df_subset = music_df_subset[music_df_subset['content'] >=5]
music_df_subset.shape

(9116, 9)

### Prepare recommender using cosine distances

In [18]:
#pivot table from the books_df dataframe
pivot_df = pd.pivot_table(music_df_subset, index='title', columns='user_id', values='score')
#create sparse matrix
sparse_df = sparse.csr_matrix(pivot_df.fillna(0))
#calculate cosine distances for similarities
recommender = pairwise_distances(sparse_df, metric='cosine')
#export as a dataframe
rec_df = pd.DataFrame(recommender, columns=pivot_df.index, index=pivot_df.index)

In [20]:
# get average rating, number of players, playing time, etc to filter by in app, group by game ID
music_info = music_df_full.groupby('title').mean()
music_info.drop(columns=['first-release-year'], inplace=True)
music_info.rename(columns={'score': 'average_rating'}, inplace=True)

rec_df = pd.merge(left = rec_df, right= music_info, right_index = True, left_index = True)
# rec_df.insert(0, 'artist_name', )
rec_df.insert(0, 'album_name', rec_df.index)

### Define book Recommender function

In [23]:
# music_df_full.sample(n=5)

In [26]:
music_df_full[music_df_full['artist'] == 'fun.']

Unnamed: 0,user_id,amazon-id,content,score,summary,genre,title,artist,first-release-year,related


In [28]:
music_df_full[music_df_full['amazon-id'] =='B01HJG3VZI']

Unnamed: 0,user_id,amazon-id,content,score,summary,genre,title,artist,first-release-year,related


In [25]:
music_df_full.shape

(257449, 10)

In [47]:
music_df_full.title[23772]

'Anthology 1 '

In [46]:
music_df_full[music_df_full['artist']=='The Beatles']

Unnamed: 0,user_id,amazon-id,content,score,summary,genre,title,artist,first-release-year,related
23772,AL2VDE4LO4ESR,B000002TYX,Anthology 1 was the first two-disk set release...,4,Better for study of Beatles history than intro...,Rock,Anthology 1,The Beatles,1995.0,"{'also_bought': ['B000002TYZ', 'B000002TZ2', '..."
23773,A3JXOXKWOIETIY,B000002TYX,Anthology I starts with &quot;Free as a Bird&q...,5,A Must Listen for all fans,Rock,Anthology 1,The Beatles,1995.0,"{'also_bought': ['B000002TYZ', 'B000002TZ2', '..."
23774,A1V3LVQVGH5TK5,B000002TYX,This album is a lovely compliation of the earl...,3,More compilation than rare versions,Rock,Anthology 1,The Beatles,1995.0,"{'also_bought': ['B000002TYZ', 'B000002TZ2', '..."
23775,A1GAHLV8D744HK,B000002TYX,Great addition to Beatles discography!Vinyl pr...,5,Great addition to Beatles discography!,Rock,Anthology 1,The Beatles,1995.0,"{'also_bought': ['B000002TYZ', 'B000002TZ2', '..."
23776,A1K6BI4CRSGC3P,B000002TYX,The whole Anthology is spectacular!! I bought...,5,Listen to Shout! w/ earphones,Rock,Anthology 1,The Beatles,1995.0,"{'also_bought': ['B000002TYZ', 'B000002TZ2', '..."
...,...,...,...,...,...,...,...,...,...,...
253686,A26Q9TVAJFM1BC,B00CRS941Y,Had some minor defects but that is to be expec...,4,Still works great after all the years of age.,Rock,"REVOLVER by The Beatles, Capitol Records, Viny...",The Beatles,,"{'also_bought': ['B0025KVLTW', 'B0041KVZ1I', '..."
253687,AUFR5X4GT1PT8,B00CRS941Y,Record played great. Cover very shabby. Reco...,4,Good find on old favorite.,Rock,"REVOLVER by The Beatles, Capitol Records, Viny...",The Beatles,,"{'also_bought': ['B0025KVLTW', 'B0041KVZ1I', '..."
253688,AFEN54UGJQOK9,B00CRS941Y,In my opinion there are a few albums that over...,5,GRANNY SMITH PART FRIGGEN TWO!!!!!!!,Rock,"REVOLVER by The Beatles, Capitol Records, Viny...",The Beatles,,"{'also_bought': ['B0025KVLTW', 'B0041KVZ1I', '..."
257873,A3KV9KSIBU2N9E,B00EWTRYZE,The packaging isn't deluxe but the songs are. ...,5,Excellent A-Z Collection,Rock,The Beatles and Solo Greatest Hits Collection ...,The Beatles,,"{'also_viewed': ['B00AP5KRFI', 'B0021LHOIM', '..."


In [45]:
music_df_full[music_df_full['title']=='Southern Fashion']

Unnamed: 0,user_id,amazon-id,content,score,summary,genre,title,artist,first-release-year,related
0,A1OFY4ATO7D13W,26197898,Buy this album. Now. Don't worry about the re...,5,PITCHFORK Eat your heart out!,Alternative Rock,Southern Fashion,The Sudden Passion,2012.0,
1,A2KH83L1F70QR8,26197898,The Sudden Passion did a great job with this o...,5,Americana from the Southlands,Alternative Rock,Southern Fashion,The Sudden Passion,2012.0,


In [21]:
def album_recommender(album_title):
    return 1- rec_df[album_title].sort_values()[1:6]

In [22]:
album_recommender('Anthology 1 ')

title
Anthology 1                                                                                                                 1.0
Prokofiev: Peter And The Wolf / Saint-Saëns: Carnival Of The Animals                                                        0.0
Prokofiev: Peter and the Wolf                                                                                               0.0
Prokofiev: Peter and the Wolf, Britten: The Young Person's Guide to the Orchestra, Saint-Saens: Carnival of the Animals     0.0
Prokofiev: Piano Concertos Nos. 1 & 3 / Bartok: Piano Concerto No. 3                                                        0.0
Name: Anthology 1 , dtype: float64

In [102]:
# save rec_df for use in streamlit app
rec_df.to_pickle('./streamlit_app/data/albums.pk1')