Skip to content

Commit

Permalink
add kmeans tests
Browse files Browse the repository at this point in the history
  • Loading branch information
noahnovsak authored and markotoplak committed Oct 29, 2023
1 parent 1338926 commit c046e49
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 35 deletions.
3 changes: 2 additions & 1 deletion Orange/clustering/clustering.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import numpy as np
import scipy.sparse
import dask.array as da

from Orange.data import Table, Instance
from Orange.data.table import DomainTransformationError
Expand All @@ -20,7 +21,7 @@ def fix_dim(x):
return x[0] if one_d else x

one_d = False
if isinstance(data, np.ndarray):
if isinstance(data, (np.ndarray, da.Array)):
one_d = data.ndim == 1
prediction = self.predict(np.atleast_2d(data))
elif isinstance(data, scipy.sparse.csr_matrix) or \
Expand Down
7 changes: 6 additions & 1 deletion Orange/clustering/kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ def __init__(self, projector):
self.k = projector.get_params()["n_clusters"]

def predict(self, X):
if isinstance(X, da.Array):
X = X.rechunk({0: "auto", 1: -1})
return self.projector.predict(X)


Expand Down Expand Up @@ -48,7 +50,10 @@ def fit(self, X: Union[np.ndarray, da.Array], y: np.ndarray = None):
import dask_ml.cluster

del params["n_init"]
assert params["init"] == "k-means||"
if params["init"] != "k-means||":
warnings.warn(f"Initializing with {params['init']} defaults"
f" to sklearn. Using k-means|| instead.")
params["init"] = "k-means||"

X = X.rechunk({0: "auto", 1: -1})
__wraps__ = dask_ml.cluster.KMeans
Expand Down
83 changes: 50 additions & 33 deletions Orange/tests/test_clustering_kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,37 +11,46 @@
from Orange.clustering.kmeans import KMeans, KMeansModel
from Orange.data import Table, Domain, ContinuousVariable
from Orange.data.table import DomainTransformationError
from Orange.tests.test_dasktable import with_dasktable


class TestKMeans(unittest.TestCase):
def setUp(self):
self.kmeans = KMeans(n_clusters=2)
self.iris = Orange.data.Table('iris')

def test_kmeans(self):
c = self.kmeans(self.iris)
@with_dasktable
def test_kmeans(self, prepare_table):
iris = prepare_table(self.iris)
c = self.kmeans(iris)
# First 20 iris belong to one cluster
self.assertEqual(np.ndarray, type(c))
self.assertEqual(len(self.iris), len(c))
self.assertEqual(1, len(set(c[:20].ravel())))
self.assertEqual(type(iris.X), type(c))
self.assertEqual(len(iris), len(c))
self.assertEqual(1, len(np.unique(np.asarray(c[:20]))))

def test_kmeans_parameters(self):
@with_dasktable
def test_kmeans_parameters(self, prepare_table):
kmeans = KMeans(n_clusters=10, max_iter=10, random_state=42, tol=0.001,
init='random')
c = kmeans(self.iris)
self.assertEqual(np.ndarray, type(c))
self.assertEqual(len(self.iris), len(c))

def test_predict_table(self):
c = self.kmeans(self.iris)
self.assertEqual(np.ndarray, type(c))
self.assertEqual(len(self.iris), len(c))

def test_predict_numpy(self):
c = self.kmeans.fit(self.iris.X)
iris = prepare_table(self.iris)
c = kmeans(iris)
self.assertEqual(type(iris.X), type(c))
self.assertEqual(len(iris), len(c))

@with_dasktable
def test_predict_table(self, prepare_table):
iris = prepare_table(self.iris)
c = self.kmeans(iris)
self.assertEqual(type(iris.X), type(c))
self.assertEqual(len(iris), len(c))

@with_dasktable
def test_predict_numpy(self, prepare_table):
iris = prepare_table(self.iris)
c = self.kmeans.fit(iris.X)
self.assertEqual(KMeansModel, type(c))
self.assertEqual(np.ndarray, type(c.labels))
self.assertEqual(len(self.iris), len(c.labels))
self.assertEqual(type(iris.X), type(c.labels))
self.assertEqual(len(iris), len(c.labels))

def test_predict_sparse_csc(self):
with self.iris.unlocked():
Expand All @@ -57,21 +66,25 @@ def test_predict_spares_csr(self):
self.assertEqual(np.ndarray, type(c))
self.assertEqual(len(self.iris), len(c))

def test_model(self):
c = self.kmeans.get_model(self.iris)
@with_dasktable
def test_model(self, prepare_table):
iris = prepare_table(self.iris)
c = self.kmeans.get_model(iris)
self.assertEqual(KMeansModel, type(c))
self.assertEqual(len(self.iris), len(c.labels))
self.assertEqual(len(iris), len(c.labels))

c1 = c(self.iris)
c1 = c(iris)
# prediction of the model must be same since data are same
np.testing.assert_array_almost_equal(c.labels, c1)

def test_model_np(self):
@with_dasktable
def test_model_np(self, prepare_table):
"""
Test with numpy array as an input in model.
"""
c = self.kmeans.get_model(self.iris)
c1 = c(self.iris.X)
iris = prepare_table(self.iris)
c = self.kmeans.get_model(iris)
c1 = c(iris.X)
# prediction of the model must be same since data are same
np.testing.assert_array_almost_equal(c.labels, c1)

Expand All @@ -93,12 +106,14 @@ def test_model_sparse_csr(self):
# prediction of the model must be same since data are same
np.testing.assert_array_almost_equal(c.labels, c1)

def test_model_instance(self):
@with_dasktable
def test_model_instance(self, prepare_table):
"""
Test with instance as an input in model.
"""
c = self.kmeans.get_model(self.iris)
c1 = c(self.iris[0])
iris = prepare_table(self.iris)
c = self.kmeans.get_model(iris)
c1 = c(iris[0])
# prediction of the model must be same since data are same
self.assertEqual(c1, c.labels[0])

Expand All @@ -107,20 +122,22 @@ def test_model_list(self):
Test with list as an input in model.
"""
c = self.kmeans.get_model(self.iris)
c1 = c(self.iris.X.tolist())
c1 = c(np.asarray(self.iris.X).tolist())
# prediction of the model must be same since data are same
np.testing.assert_array_almost_equal(c.labels, c1)

# example with a list of only one data item
c1 = c(self.iris.X.tolist()[0])
c1 = c(np.asarray(self.iris.X).tolist()[0])
# prediction of the model must be same since data are same
np.testing.assert_array_almost_equal(c.labels[0], c1)

def test_model_bad_datatype(self):
@with_dasktable
def test_model_bad_datatype(self, prepare_table):
"""
Check model with data-type that is not supported.
"""
c = self.kmeans.get_model(self.iris)
iris = prepare_table(self.iris)
c = self.kmeans.get_model(iris)
self.assertRaises(TypeError, c, 10)

def test_model_data_table_domain(self):
Expand Down

0 comments on commit c046e49

Please sign in to comment.