# Non-negative Matrix Factorization (NMF) for Recommendation Systems

In [1]:
import pandas as pd
from sklearn.decomposition import NMF
from sklearn.preprocessing import MaxAbsScaler, Normalizer
from sklearn.pipeline import make_pipeline


# Load dataframes
artist_df = pd.read_csv('/Users/alexandergursky/Local_Repository/Datasets/Dataset_Package/Musical artists/artists.csv', header=None)
samples_df = pd.read_csv('/Users/alexandergursky/Local_Repository/Datasets/Dataset_Package/Musical artists/scrobbler-small-sample.csv')

In [2]:
# Mapping is required to merge the dataframes to something useable
artist_df['artist_key'] = artist_df.index

artist_df = artist_df.rename(columns={0:'artist_name'}) # Renamed the column containing the names from 0 to artist_name, not required.

# Merging to one dataframe
merged_df = pd.merge(artist_df, samples_df,left_on='artist_key', right_on='artist_offset')

# Dropping the keys
merged_df = merged_df.drop(['artist_key', 'artist_offset'],axis=1)

# Turning the merged dataframe into a sparse array
# row= artist, columns= user_offset, values= playcount
sparse_arr_df = merged_df.pivot_table(index='artist_name', columns='user_offset', values='playcount', fill_value=0)

In [3]:
# Creating instances of: scaler, NMF, and normalization
scaler = MaxAbsScaler()
nmf = NMF(n_components= 20)
norm = Normalizer()

# Creating Pipeline
pipeline = make_pipeline(scaler, nmf, norm)

# Fitting and transforming the model to the data, then depositing it as a finished product
scal_nmf_norm = pipeline.fit_transform(sparse_arr_df)




In [4]:
# Making a list of the artist names to transform output to a df
artist_list = artist_df['artist_name'].values.tolist()

# Df of all of the artist and their component similarity ratings in each cluster
scal_nmf_norm_df = pd.DataFrame(scal_nmf_norm, index= artist_list)


In [5]:
# Selecting an artist, and the component similarity ratings
artist = scal_nmf_norm_df.loc['Foo Fighters']

# Scalar(dot) product operation on our df
recomendation = scal_nmf_norm_df.dot(artist)

# Displaying recomendations
print(recomendation.nlargest(6))

Foo Fighters        1.000000
AC/DC               0.990990
Aimee Mann          0.987317
Rufus Wainwright    0.959258
Madonna             0.951436
Jet                 0.936947
dtype: float64
