Skip to content

Commit

Permalink
Rely on fewer local variables (use object members instead)
Browse files Browse the repository at this point in the history
  • Loading branch information
Tim Hopper committed Aug 12, 2015
1 parent 57f721b commit d83764d
Showing 1 changed file with 7 additions and 10 deletions.
17 changes: 7 additions & 10 deletions lda/lda.py
Expand Up @@ -170,25 +170,22 @@ def transform(self, X, max_iter=20, tol=1e-16):
# in case user passes a (non-sparse) array of shape (n_features,)
# turn it into an array of shape (1, n_features)
X = np.atleast_2d(X)
phi = self.components_
alpha = self.alpha
n_topics = len(self.components_)
doc_topic = np.empty((X.shape[0], n_topics))
doc_topic = np.empty((X.shape[0], self.n_topics))
WS, DS = lda.utils.matrix_to_lists(X)
# TODO: this loop is parallelizable
for d in range(X.shape[0]):
for d in np.unique(DS):
# initialization step
ws_doc = WS[DS == d]
PZS = (phi[:, ws_doc].T * alpha).astype(float)
PZS = (self.components_[:, ws_doc].T * self.alpha).astype(float)
# NOTE: numpy /= is integer division
PZS /= PZS.sum(axis=1)[:, np.newaxis]
assert PZS.shape == (len(ws_doc), n_topics)
assert PZS.shape == (len(ws_doc), self.n_topics)
PZS_new = np.empty_like(PZS)
for s in range(max_iter):
PZS_sum = PZS.sum(axis=0)
for i, w in enumerate(ws_doc):
PZS_sum -= PZS[i]
PZS_new[i] = phi[:, w] * (PZS_sum + alpha)
PZS_new[i] = self.components_[:, w] * (PZS_sum + self.alpha)
PZS_sum += PZS[i]
PZS_new /= PZS_new.sum(axis=1)[:, np.newaxis]
delta_naive = np.abs(PZS_new - PZS).sum()
Expand All @@ -198,8 +195,8 @@ def transform(self, X, max_iter=20, tol=1e-16):
break
theta_doc = PZS.sum(axis=0)
theta_doc /= sum(theta_doc)
assert len(theta_doc) == n_topics
assert theta_doc.shape == (n_topics,)
assert len(theta_doc) == self.n_topics
assert theta_doc.shape == (self.n_topics,)
doc_topic[d] = theta_doc
return doc_topic

Expand Down

0 comments on commit d83764d

Please sign in to comment.