Skip to content

Commit

Permalink
Merge pull request #6564 from thocevar/naive_bayes
Browse files Browse the repository at this point in the history
[FIX] Naive Bayes: fix predictions with unknown values
  • Loading branch information
janezd committed Sep 1, 2023
2 parents 8608e84 + 4bba14d commit 38ccb8b
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 1 deletion.
3 changes: 2 additions & 1 deletion Orange/classification/naive_bayes.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def _dense_probs(self, data, probs):
zeros = np.zeros((1, probs.shape[1]))
for col, attr_prob in zip(data.T, self.log_cont_prob):
col = col.copy()
col[np.isnan(col)] = attr_prob.shape[1] - 1
col[np.isnan(col)] = attr_prob.shape[1]
col = col.astype(int)
probs0 = np.vstack((attr_prob.T, zeros))
probs += probs0[col]
Expand All @@ -113,6 +113,7 @@ def _sparse_probs(self, data, probs):
p0 = p.T[0].copy()
probs[:] += p0
log_prob[i, :p.shape[1]] = p.T - p0
log_prob[i, n_vals-1] = -p0

dat = data.data.copy()
dat[np.isnan(dat)] = n_vals - 1
Expand Down
33 changes: 33 additions & 0 deletions Orange/tests/test_naive_bayes.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,14 +101,17 @@ def test_compare_results_of_predict_and_predict_storage(self):
def test_predictions(self):
self._test_predictions(sparse=None)
self._test_predictions_with_absent_class(sparse=None)
self._test_predict_missing_attributes(sparse=None)

def test_predictions_csr_matrix(self):
self._test_predictions(sparse=sp.csr_matrix)
self._test_predictions_with_absent_class(sparse=sp.csr_matrix)
self._test_predict_missing_attributes(sparse=sp.csr_matrix)

def test_predictions_csc_matrix(self):
self._test_predictions(sparse=sp.csc_matrix)
self._test_predictions_with_absent_class(sparse=sp.csc_matrix)
self._test_predict_missing_attributes(sparse=sp.csc_matrix)

def _test_predictions(self, sparse):
x = np.array([
Expand Down Expand Up @@ -311,6 +314,36 @@ def _test_predictions_with_absent_class(self, sparse):
np.testing.assert_almost_equal(exp_probs, probs)
np.testing.assert_equal(values, np.argmax(exp_probs, axis=1))

def _test_predict_missing_attributes(self, sparse):
x = np.array([
[1, 0, 0],
[0, 1, 0],
[0, 0, 0],
[0, 1, 0],
[1, 1, 0],
[1, 2, 0],
[1, 2, np.nan]])
if sparse is not None:
x = sparse(x)
y = np.array([1,0,0,0,1,1,1])
domain = Domain(
[DiscreteVariable("a", values="ab"),
DiscreteVariable("b", values="abc"),
DiscreteVariable("c", values="a")],
DiscreteVariable("y", values="AB"))
data = Table.from_numpy(domain, x, y)

model = self.learner(data)
test_x = np.array([[np.nan, np.nan, np.nan],
[np.nan, 0, np.nan],
[0, np.nan, np.nan]])
if sparse is not None and sparse is not sp.csc_matrix:
test_x = sparse(test_x)
probs = model(test_x, ret=model.Probs)
np.testing.assert_almost_equal(probs, [[(3+1)/(7+2), (4+1)/(7+2)],
[(1+1)/(2+2), (1+1)/(2+2)],
[(3+1)/(3+2), (0+1)/(3+2)]])

def test_no_attributes(self):
y = np.array([0, 0, 0, 1, 1, 1, 2, 2])
domain = Domain([], DiscreteVariable("y", values="abc"))
Expand Down

0 comments on commit 38ccb8b

Please sign in to comment.