Skip to content

Commit

Permalink
minor cleanup + change to api
Browse files Browse the repository at this point in the history
  • Loading branch information
Allen Tran committed Feb 11, 2016
1 parent 20d2c8c commit e3277fb
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 23 deletions.
58 changes: 36 additions & 22 deletions ppca/_ppca.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,34 +3,47 @@
import numpy as np
from scipy.linalg import orth


class PPCA():

def __init__(self, data):
def __init__(self):

self.raw = data
self.raw[np.isinf(self.raw)] = np.max(self.raw[np.isfinite(self.raw)])
self.raw = None
self.data = None
self.C = None
self.means = None
self.stds = None

def _standardize(self, X):

def fit(self, d=None, tol=1e-4, min_obs=10, verbose=False):
if self.means is None or self.stds is None:
raise RuntimeError("Fit model first")

return (X - self.means) / self.stds

def fit(self, data, d=None, tol=1e-4, min_obs=10, verbose=False):

self.raw = data
self.raw[np.isinf(self.raw)] = np.max(self.raw[np.isfinite(self.raw)])

valid_series = np.sum(~np.isnan(self.raw), axis=0) >= min_obs

data = self.raw[:, valid_series].copy()
N = data.shape[0]
D = data.shape[1]

M = np.nanmean(data, axis=0)
stds = np.nanstd(data, axis=0)
data = (data - np.tile(M, (N, 1))) / np.tile(stds, (N, 1))
self.means = np.nanmean(data, axis=0)
self.stds = np.nanstd(data, axis=0)

data = self._standardize(data)
observed = ~np.isnan(data)
missing = np.sum(~observed)
data[~observed] = 0

# initial

if d is None:
d = round(0.2*D)
d = data.shape[1]

if self.C is None:
C = np.random.randn(D, d)
Expand All @@ -40,7 +53,7 @@ def fit(self, d=None, tol=1e-4, min_obs=10, verbose=False):
X = np.dot(np.dot(data, C), np.linalg.inv(CC))
recon = np.dot(X, C.T)
recon[~observed] = 0
ss = np.sum((recon-data)**2)/(N*D - missing)
ss = np.sum((recon - data)**2)/(N*D - missing)

v0 = np.inf
counter = 0
Expand Down Expand Up @@ -79,36 +92,37 @@ def fit(self, d=None, tol=1e-4, min_obs=10, verbose=False):
counter += 1
v0 = v1


C = orth(C)
vals, vecs = np.linalg.eig(np.cov(np.dot(data, C).T))
order = np.flipud(np.argsort(vals))
vecs = vecs[:, order]
vals = vals[order]

C = np.dot(C, vecs)
X = np.dot(data, C)


# attach objects to class
self.C = C
self.ss = ss
self.M = M
self.sigma = stds
self.X = X
self.data = data
self.eig_vals = vals
self._calc_var()

def transform():
import IPython
IPython.embed()
assert False

def transform(self, data=None):

assert self.C is not None
self.X = np.dot(self.data, self.C)
return self.X
if self.C is None:
raise RuntimeError('Fit the data model first.')
if data is None:
return np.dot(self.data, self.C)
return np.dot(data, self.C)

def _calc_var(self):

if self.data is None:
print 'Fit the data model first.'
return None
raise RuntimeError('Fit the data model first.')

data = self.data.T

Expand All @@ -119,7 +133,7 @@ def _calc_var(self):

def save(self, fpath):

np.save(fpath)
np.save(fpath, self.C)

def load(self, fpath):

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
name = 'ppca'
setup(
name = name,
version = "0.0.0",
version = "0.0.2",
author = 'Allen Tran',
author_email = 'realallentran@gmail.com',
description = 'Probabilistic PCA',
Expand Down

0 comments on commit e3277fb

Please sign in to comment.