In [52]:
import numpy as np
import scipy.sparse as spr

import nimfa
import pandas as pd

In [53]:
dat=pd.read_csv('out_clean.csv')

In [54]:
nn = np.array(dat[['Total Generation (KWH)','Insolation']])
nn

array([[3218.        ,    9.37535974],
       [3633.        ,    9.37535974],
       [4377.        ,    9.37535974],
       ...,
       [3732.03      ,    5.834     ],
       [4063.88      ,    5.584     ],
       [4093.9       ,    5.635     ]])

In [55]:
# V = spr.csr_matrix([[1, 0, 2, 4], [0, 0, 6, 3], [4, 0, 5, 6]])
V = spr.csr_matrix(nn)
print('Target:\n%s' % V.todense())

Target:
[[3218.            9.37535974]
 [3633.            9.37535974]
 [4377.            9.37535974]
 ...
 [3732.03          5.834     ]
 [4063.88          5.584     ]
 [4093.9           5.635     ]]


In [56]:
nmf = nimfa.Nmf(V, max_iter=200, rank=2, update='euclidean', objective='fro')
nmf_fit = nmf()

In [57]:
W = nmf_fit.basis()
print('Basis matrix:\n%s' % W.todense())

Basis matrix:
[[    0.          3812.74159278]
 [    0.             0.        ]
 [12212.71014479     0.        ]
 ...
 [10413.11415162     0.        ]
 [ 4883.63434694  2741.18994188]
 [ 2478.79688325  3797.92901355]]


In [58]:
H = nmf_fit.coef()
print('Mixture matrix:\n%s' % H.todense())

Mixture matrix:
[[0.35839711 0.        ]
 [0.84401231 0.00237953]]


In [59]:
print('Euclidean distance: %5.3f' % nmf_fit.distance(metric='euclidean'))


Euclidean distance: 3730309177.984


In [60]:
sm = nmf_fit.summary()
sm

  c = numerator.sum() / np.sqrt((denomA.sum() * denomB.sum()))


{'rank': 2,
 'sparseness': (0.2846241409257992, 0.6242181966994691),
 'rss': 3730309177.983668,
 'evar': 0.9075944366627561,
 'residuals': <2370x2 sparse matrix of type '<class 'numpy.float64'>'
 	with 4497 stored elements in Compressed Sparse Row format>,
 'connectivity': matrix([[1., 1.],
         [1., 1.]]),
 'predict_samples': (matrix([[1, 1]]),
  [0.7019283773336058, 0.9958150831866607]),
 'predict_features': (matrix([[1, 0, 0, ..., 0, 0, 1]]),
  [0.9999999973772153,
   0.0,
   0.999999999181181,
   0.9999999981751044,
   0.6662372820650259,
   0.7986505293792701,
   0.9999999976191462,
   0.5509596376280931,
   0.5602264871512806,
   0.0,
   0.9999999974485603,
   0.9999999981751002,
   0.7020137068304044,
   0.9999999989139515,
   0.9999999977759784,
   0.9999999977176417,
   0.9999999986011588,
   0.0,
   0.7115858533093374,
   0.9999999975638924,
   0.8742078901338797,
   0.951876942857369,
   0.5211271488616318,
   0.999999997168746,
   0.9999999978812504,
   0.99999999438471

In [61]:
print('Sparseness Basis: %5.3f  Mixture: %5.3f' % (sm['sparseness'][0], sm['sparseness'][1]))

Sparseness Basis: 0.285  Mixture: 0.624


In [62]:
print('Iterations: %d' % sm['n_iter'])
print('Target estimate:\n%s' % np.dot(W.todense(), H.todense()))

Iterations: 200
Target estimate:
[[3218.00085372    9.07254733]
 [   0.            0.        ]
 [4377.            0.        ]
 ...
 [3732.03          0.        ]
 [4063.87849325    6.522754  ]
 [4093.89248939    9.0373003 ]]
