In [2]:
from sklearn.datasets import load_breast_cancer
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt

# 1. Import Data

In [3]:
# Load Data
breast = load_breast_cancer()

In [4]:
# Convert to dataframe
X = pd.DataFrame(breast.data)
Y = pd.DataFrame(breast.target)
kolom = list(breast.feature_names) + ['Y']

In [5]:
# Set column names
breast_data = pd.concat([X,Y],axis=1)
breast_data.columns = kolom

In [6]:
breast_data

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,Y
0,17.99,10.38,122.80,1001.0,0.11840,0.27760,0.30010,0.14710,0.2419,0.07871,...,17.33,184.60,2019.0,0.16220,0.66560,0.7119,0.2654,0.4601,0.11890,0
1,20.57,17.77,132.90,1326.0,0.08474,0.07864,0.08690,0.07017,0.1812,0.05667,...,23.41,158.80,1956.0,0.12380,0.18660,0.2416,0.1860,0.2750,0.08902,0
2,19.69,21.25,130.00,1203.0,0.10960,0.15990,0.19740,0.12790,0.2069,0.05999,...,25.53,152.50,1709.0,0.14440,0.42450,0.4504,0.2430,0.3613,0.08758,0
3,11.42,20.38,77.58,386.1,0.14250,0.28390,0.24140,0.10520,0.2597,0.09744,...,26.50,98.87,567.7,0.20980,0.86630,0.6869,0.2575,0.6638,0.17300,0
4,20.29,14.34,135.10,1297.0,0.10030,0.13280,0.19800,0.10430,0.1809,0.05883,...,16.67,152.20,1575.0,0.13740,0.20500,0.4000,0.1625,0.2364,0.07678,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,21.56,22.39,142.00,1479.0,0.11100,0.11590,0.24390,0.13890,0.1726,0.05623,...,26.40,166.10,2027.0,0.14100,0.21130,0.4107,0.2216,0.2060,0.07115,0
565,20.13,28.25,131.20,1261.0,0.09780,0.10340,0.14400,0.09791,0.1752,0.05533,...,38.25,155.00,1731.0,0.11660,0.19220,0.3215,0.1628,0.2572,0.06637,0
566,16.60,28.08,108.30,858.1,0.08455,0.10230,0.09251,0.05302,0.1590,0.05648,...,34.12,126.70,1124.0,0.11390,0.30940,0.3403,0.1418,0.2218,0.07820,0
567,20.60,29.33,140.10,1265.0,0.11780,0.27700,0.35140,0.15200,0.2397,0.07016,...,39.42,184.60,1821.0,0.16500,0.86810,0.9387,0.2650,0.4087,0.12400,0


# 2. Non-Negative Matrix Factorization (NMF)
Goal utama dari ini adalah menemukan matriks W yang memenuhi persamaan **V = WH** dimana 
- **V** merupakan matriks data awal yang berukuran m x n
- **W** merupakan matriks yang berukuran m x k 
- **H** merupakan matriks yang berukuran k x n

dimana m adalah banyaknya observasi, n adalah banyaknya variabel dan k adalah banyaknya komponen yang terbentuk

In [7]:
from sklearn.decomposition import NMF

In [8]:
model = NMF(n_components=2, init='random', random_state=0, max_iter=50000)

In [9]:
W = model.fit_transform(X)
H = model.components_

In [12]:
df_NMF = pd.DataFrame(W)
df_NMF.columns = ['NM1','NM2']
df_NMF

Unnamed: 0,NM1,NM2
0,19.501659,7.146940
1,18.203181,20.847962
2,15.818966,20.153354
3,5.202298,6.942521
4,14.229749,26.056207
...,...,...
564,18.724250,25.610713
565,15.943794,22.101390
566,10.227441,16.356143
567,16.856303,21.068639


In [13]:
"""
Dapat kita lihat bahwa perkalian dua matriks antara W dan H akan menghasilkan hasil yang setara dengan
data raw X.
"""
pd.DataFrame(np.matmul(W,H)).head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
0,13.321425,13.337769,89.616624,1016.228911,0.055387,0.118853,0.166753,0.095415,0.101892,0.029933,...,20.152346,20.072553,135.974696,2018.536645,0.089639,0.347851,0.455878,0.187284,0.199711,0.053826
1,23.697475,29.026279,155.794374,1313.887044,0.139559,0.179953,0.189231,0.106238,0.261538,0.087315,...,28.769733,39.093677,191.407631,1957.185345,0.194246,0.449394,0.527065,0.222514,0.428264,0.121381
2,22.211169,27.605123,145.749813,1194.262521,0.133898,0.166294,0.169268,0.09479,0.251184,0.084406,...,26.431823,36.896928,175.598692,1711.328153,0.184685,0.408443,0.472613,0.20022,0.406904,0.1157
3,7.554562,9.44643,49.533922,400.862607,0.045985,0.056221,0.056412,0.031554,0.086301,0.029077,...,8.913634,12.586133,59.180113,564.417284,0.063192,0.137092,0.157681,0.066904,0.139187,0.039629
4,26.2779,34.101224,171.449675,1278.566313,0.169576,0.188179,0.171041,0.094872,0.319015,0.109128,...,29.345447,44.573851,194.01752,1580.251261,0.227963,0.437143,0.481913,0.206778,0.501265,0.143856


In [14]:
X.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


# Refference
1. https://papers.nips.cc/paper/2000/file/f9d1152547c0bde01830b7e8bd60024c-Paper.pdf
2. https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.NMF.html