Skip to content

Commit

Permalink
FIX is_tri has become unstable with numpy > 1.9 (#25)
Browse files Browse the repository at this point in the history
* TEST triu dense versus dense.
* FIX is_tri is now more stable
* ENH avoid testing is_tri everytime
  • Loading branch information
NelleV committed Oct 10, 2016
1 parent d474815 commit 0575118
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 13 deletions.
9 changes: 5 additions & 4 deletions iced/normalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,19 +66,20 @@ def ICE_normalization(X, SS=None, max_iter=3000, eps=1e-4, copy=True,
is_symetric_or_tri(X)
old_dbias = None
bias = np.ones((m, 1))
_is_tri = is_tri(X)

if total_counts is None:
total_counts = X.sum()
for it in np.arange(max_iter):
if norm == 'l1':
# Actually, this should be done if the matrix is diag sup or diag
# inf
if is_tri(X):
if _is_tri:
sum_ds = X.sum(axis=0) + X.sum(axis=1).T - X.diagonal()
else:
sum_ds = X.sum(axis=0)
elif norm == 'l2':
if is_tri(X):
if _is_tri:
sum_ds = ((X**2).sum(axis=0) +
(X**2).sum(axis=1).T -
(X**2).diagonal())
Expand All @@ -87,7 +88,6 @@ def ICE_normalization(X, SS=None, max_iter=3000, eps=1e-4, copy=True,

if SS is not None:
raise NotImplementedError

dbias = sum_ds.reshape((m, 1))
if counts_profile is not None:
dbias /= counts_profile[:, np.newaxis]
Expand All @@ -100,7 +100,8 @@ def ICE_normalization(X, SS=None, max_iter=3000, eps=1e-4, copy=True,
if sparse.issparse(X):
X = _update_normalization_csr(X, np.array(dbias).flatten())
else:
X /= dbias * dbias.T
X /= dbias
X /= dbias.T

bias *= np.sqrt(X.sum() / total_counts)
X *= total_counts / X.sum()
Expand Down
24 changes: 18 additions & 6 deletions iced/tests/test_normalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,16 +48,28 @@ def test_sparse_ICE_normalization_triu():
X[thres] = 0
X = X + X.T
sparse_X = sparse.triu(X)
true_normed_X = ICE_normalization(X, eps=1e-10, max_iter=10)
true_normed_X, true_biases = ICE_normalization(
X, eps=1e-10, max_iter=10, output_bias=True)
true_normed_X = np.triu(true_normed_X)
X = np.triu(X)
normed_X = ICE_normalization(sparse_X, eps=1e-10, max_iter=10)
assert_array_almost_equal(X, sparse_X.todense())

normed_X_sparse, biases_sparse = ICE_normalization(
sparse_X, eps=1e-10, max_iter=100,
output_bias=True)
normed_X_dense, biases_dense = ICE_normalization(
np.triu(X), eps=1e-10, max_iter=100,
output_bias=True)

# The sparse and dense version are going to be equal up to a constant
# factor
assert_array_almost_equal(normed_X_dense,
np.array(normed_X_sparse.toarray()))

normed_X *= true_normed_X.mean() / normed_X.mean()
assert_array_almost_equal(true_normed_X, np.array(normed_X.todense()))
normed_X_sparse *= true_normed_X.mean() / normed_X_sparse.mean()
normed_X_dense *= true_normed_X.mean() / normed_X_dense.mean()

assert_array_almost_equal(true_normed_X,
np.array(normed_X_sparse.todense()))
assert_array_almost_equal(true_normed_X, normed_X_dense)

total_counts = 5000
normed_X = ICE_normalization(sparse_X, eps=1e-10,
Expand Down
10 changes: 9 additions & 1 deletion iced/utils/tests/test_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def test_is_symetric_or_tri():
assert_raises(ValueError, validation.is_symetric_or_tri, X)
X = X + X.T
validation.is_symetric_or_tri(X)
X[np.tri(n, dtype=bool)] = 0
X = np.triu(X)
validation.is_symetric_or_tri(X)


Expand All @@ -32,3 +32,11 @@ def test_is_symetric_or_tri_sparse():
validation.is_symetric_or_tri(X)
X[np.tri(n, dtype=bool)] = 0
validation.is_symetric_or_tri(X)


def test_is_tri():
n = 100
random_state = np.random.RandomState(seed=42)
X = random_state.randn(n, n)
assert validation.is_tri(np.triu(X))
assert validation.is_tri(np.tril(X))
3 changes: 1 addition & 2 deletions iced/utils/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@ def is_tri(X):
if not (sparse.tril(X).sum() - diag) or \
not (sparse.triu(X).sum() - diag):
return True
elif (not (np.triu(X).sum() - diag) or
not (np.tril(X).sum() - diag)):
elif not np.triu(X, 1).sum() or not np.tril(X, -1).sum():
return True
else:
return False

0 comments on commit 0575118

Please sign in to comment.