# Nonnegative Matrix Factorization

In [19]:
import pandas as pd
import numpy as np

from sklearn.decomposition import NMF
from sklearn.preprocessing import StandardScaler

!!!! SCALE DATA FIRST !!!!

## Using Sklearn

Step 1. input matrix X

In [3]:
# size 7x5
rating_matrix = np.array([
              [1, 2, 2, 0, 0],
              [3, 5, 5, 0, 0],
              [4, 4, 4, 0, 0],
              [5, 5, 5, 0, 0],
              [0, 2, 0, 4, 4],
              [0, 0, 0, 5, 5],
              [0, 1, 0, 2, 2]])

movies = ['TheMatrix', 'Alien', 'StarWars', 'Casablanca', 'Titanic']
users = ['Alice', 'Bob', 'Cindy', 'Dan', 'Emily', 'Frank', 'Greg']
ratings_df = pd.DataFrame(rating_matrix, index=users, columns=movies)
ratings_df.head(7)

Unnamed: 0,TheMatrix,Alien,StarWars,Casablanca,Titanic
Alice,1,2,2,0,0
Bob,3,5,5,0,0
Cindy,4,4,4,0,0
Dan,5,5,5,0,0
Emily,0,2,0,4,4
Frank,0,0,0,5,5
Greg,0,1,0,2,2


Step 2. turn matrix X into two: W (user-indexed), H (item-indexed)

In [10]:
from sklearn.decomposition import NMF
model = NMF(n_components=2, init='random', random_state=0)
W = model.fit_transform(rating_matrix)
H = model.components_
W_df = pd.DataFrame(W, index=users)
H_df = pd.DataFrame(H, columns=movies)

In [11]:
# user-indexed matrix shape 7x2
W_df.shape

(7, 2)

In [13]:
# user-indexed
# movies turned into 2 essential features
# ratings turned into weights showing how important that feature is to each user
# can take a new movie and calculate how much Alice will like it based on the weights from this table 
# if we have the features of that movie
W_df.head(7)

Unnamed: 0,0,1
Alice,0.006249,1.350341
Bob,0.010737,3.493405
Cindy,0.0,3.169976
Dan,0.0,3.96247
Emily,1.502844,0.36582
Frank,1.819595,0.0
Greg,0.751422,0.18291


In [15]:
# item-index matrix shape 5x5
H_df.shape

(2, 5)

In [16]:
# item-indexed
# users reduced to 2 essential types from 7 actual users
# numbers not actual ratings but weights
H_df.head()

Unnamed: 0,TheMatrix,Alien,StarWars,Casablanca,Titanic
0,0.0,0.463986,0.0,2.708103,2.708103
1,1.10961,1.332708,1.318328,0.0,0.0


Step 3. Check / use first two latent features to create new approximation

In [17]:
ratings_reconstructed = model.inverse_transform(W)

ratings_reconstructed_df = pd.DataFrame(ratings_reconstructed, index=users, columns=movies)
ratings_reconstructed_df.head(7)

Unnamed: 0,TheMatrix,Alien,StarWars,Casablanca,Titanic
Alice,1.498352,1.802509,1.780193,0.016922,0.016922
Bob,3.876317,4.66067,4.605455,0.029076,0.029076
Cindy,3.517437,4.224651,4.179069,0.0,0.0
Dan,4.396796,5.280814,5.223836,0.0,0.0
Emily,0.405918,1.18483,0.482271,4.069857,4.069857
Frank,0.0,0.844267,0.0,4.92765,4.92765
Greg,0.202959,0.592415,0.241135,2.034928,2.034928


In [16]:
# original ratings for comparison
ratings_df.head(7)

Unnamed: 0,TheMatrix,Alien,StarWars,Casablanca,Titanic
Alice,1,2,2,0,0
Bob,3,5,5,0,0
Cindy,4,4,4,0,0
Dan,5,5,5,0,0
Emily,0,2,0,4,4
Frank,0,0,0,5,5
Greg,0,1,0,2,2


## NumPy algorithm overview

In [None]:
# to factorize input matrix V into W and H ( V = W * H )
# 1. initialize W matrix with small, positive, random values

# Initialize a random matrix W
W = np.random.rand(10,5)

# 2. find least squares for H
# 3. clip negative values in H

# Solve for H using a least squares solver
H = np.linalg.lstsq(W, V)[0]

# Clip H so there are no negative values
H[H < 0] = 0

# 4. find least squares for W
# 5. clip negative values in W

# Repeat until convergence (error is small enough)