In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import sys

import numpy as np
import pandas as pd
import plotly as pl

In [3]:
sys.path.insert(0, "..")

np.random.random(20121020)

pl.offline.init_notebook_mode(connected=True)

In [4]:
from matrix_factorization.nmf_by_multiple_V_and_H import nmf_by_multiple_V_and_H
from matrix_factorization.nmf_by_multiplicative_update import (
    nmf_by_multiplicative_update,
)
from matrix_factorization.nmf_by_sklearn import nmf_by_sklearn

In [5]:
def plot_R_norms(R_norms, title=None):

    if title is None:

        title = "NMF Convergence"

    pl.offline.iplot(
        dict(
            layout=dict(
                width=640,
                height=640,
                title=title,
                xaxis=dict(title="Iteration"),
                yaxis=dict(title="Residual Matrix Norm"),
            ),
            data=[
                dict(type="scatter", name=i, x=list(range(R_norms_.size)), y=R_norms_)
                for i, R_norms_ in enumerate(R_norms)
            ],
        )
    )

In [6]:
def plot_Ws_and_Hs(Ws, Hs):

    for i, W in enumerate(Ws):

        pl.offline.iplot(
            dict(
                layout=dict(
                    width=320,
                    height=640,
                    title="W {}".format(i),
                    xaxis=dict(title="k"),
                    yaxis=dict(title="m"),
                ),
                data=[
                    dict(
                        type="heatmap", z=W[::-1], colorscale="Picnic", showscale=False
                    )
                ],
            )
        )

    for i, H in enumerate(Hs):

        pl.offline.iplot(
            dict(
                layout=dict(
                    width=640,
                    height=320,
                    title="H {}".format(i),
                    xaxis=dict(title="n"),
                    yaxis=dict(title="k"),
                ),
                data=[
                    dict(
                        type="heatmap", z=H[::-1], colorscale="Picnic", showscale=False
                    )
                ],
            )
        )

In [7]:
m = 160

n = 80

V = np.random.random_sample(size=(m, n))

V += abs(V.min())

Vs = (V, V * 10)

for V in Vs:

    print(V.shape)

(160, 80)
(160, 80)


In [8]:
k = 2

n_iteration = int(1e3)

random_seed = 20121020

In [9]:
individual_R_norms = []

for V in Vs:

    W, H, R_norms = nmf_by_multiplicative_update(
        V, k, n_iteration=n_iteration, random_seed=random_seed
    )

    plot_Ws_and_Hs((W,), (H,))

    print("R norm (multiplicative_update): {:.2f}".format(R_norms[-1]))

    individual_R_norms.append(R_norms)

    W_by_sklean, H_by_sklean, R_by_sklean = nmf_by_sklearn(
        V, k, n_iteration=n_iteration, random_seed=random_seed
    )

    plot_Ws_and_Hs((W_by_sklean,), (H_by_sklean,))

    print("R norm (sklean): {:.2f}".format(R_by_sklean))

plot_R_norms(individual_R_norms, title="NMF Independently")

R norm (multiplicative_update): 31.63


R norm (sklean): 31.63


R norm (multiplicative_update): 316.33


R norm (sklean): 316.33


In [10]:
W, Hs, R_norms = nmf_by_multiple_V_and_H(
    Vs, k, n_iteration=n_iteration, random_seed=random_seed
)

plot_Ws_and_Hs((W,), Hs)

print(
    "R norm (multiple_V_and_H): {}".format(
        ", ".join("{:.2f}".format(float_) for float_ in R_norms[:, -1])
    )
)

plot_R_norms(R_norms, title="NMF Together")

R norm (multiple_V_and_H): 31.65, 316.53
