Skip to content

Commit

Permalink
Updating python to use pylibraft (rapidsai#4887)
Browse files Browse the repository at this point in the history
This is a fairly sizable PR but it needs to be done before the `22.10` release as `pyraft` will not longer be released.

Authors:
  - Corey J. Nolet (https://github.com/cjnolet)

Approvers:
  - Dante Gama Dessavre (https://github.com/dantegd)
  - Mark Sadang (https://github.com/msadang)

URL: rapidsai#4887
  • Loading branch information
cjnolet committed Sep 21, 2022
1 parent 5a9ac3b commit 50e9cad
Show file tree
Hide file tree
Showing 94 changed files with 187 additions and 176 deletions.
5 changes: 4 additions & 1 deletion ci/gpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ gpuci_mamba_retry install -c conda-forge -c rapidsai -c rapidsai-nightly -c nvid
"libraft-headers=${MINOR_VERSION}" \
"libraft-distance=${MINOR_VERSION}" \
"libraft-nn=${MINOR_VERSION}" \
"pyraft=${MINOR_VERSION}" \
"pylibraft=${MINOR_VERSION}" \
"raft-dask=${MINOR_VERSION}" \
"dask-cudf=${MINOR_VERSION}" \
"dask-cuda=${MINOR_VERSION}" \
"ucx-py=${UCX_PY_VERSION}" \
Expand Down Expand Up @@ -126,6 +127,7 @@ if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then
set -x
pip install "git+https://github.com/dask/distributed.git@2022.7.1" --upgrade --no-deps
pip install "git+https://github.com/dask/dask.git@2022.7.1" --upgrade --no-deps
pip install "git+https://github.com/hdbscan/hdbscan.git@master" --force-reinstall --upgrade --no-deps
set +x

gpuci_logger "Python pytest for cuml"
Expand Down Expand Up @@ -200,6 +202,7 @@ else
pip install "git+https://github.com/dask/distributed.git@2022.7.1" --upgrade --no-deps
pip install "git+https://github.com/dask/dask.git@2022.7.1" --upgrade --no-deps
pip install "git+https://github.com/dask/dask-glm@main" --force-reinstall --no-deps
pip install "git+https://github.com/scikit-learn-contrib/hdbscan.git@master" --force-reinstall --upgrade --no-deps
pip install sparse

set +x
Expand Down
6 changes: 4 additions & 2 deletions conda/environments/cuml_dev_cuda11.0.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ dependencies:
- libraft-headers=22.10.*
- libraft-distance=22.10.*
- libraft-nn=22.10.*
- pyraft=22.10.*
- raft-dask=22.10.*
- pylibraft=22.10.*
- dask-cudf=22.10.*
- dask-cuda=22.10.*
- ucx>=1.13.0
Expand All @@ -33,15 +34,16 @@ dependencies:
- treelite=2.4.0
- statsmodels
- seaborn
- hdbscan
- nltk
- pip
- pip:
- git+https://github.com/dask/dask.git@2022.7.1
- git+https://github.com/dask/distributed.git@2022.7.1
- git+https://github.com/dask/dask-glm@main
- git+https://github.com/hdbscan/hdbscan.git@master
- sparse


# rapids-build-env, notebook-env and doc-env are defined in
# https://docs.rapids.ai/maintainers/depmgmt/

Expand Down
5 changes: 3 additions & 2 deletions conda/environments/cuml_dev_cuda11.2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ dependencies:
- libraft-headers=22.10.*
- libraft-distance=22.10.*
- libraft-nn=22.10.*
- pyraft=22.10.*
- raft-dask=22.10.*
- pylibraft=22.10.*
- dask-cudf=22.10.*
- dask-cuda=22.10.*
- ucx>=1.13.0
Expand All @@ -33,13 +34,13 @@ dependencies:
- treelite=2.4.0
- statsmodels
- seaborn
- hdbscan
- nltk
- pip
- pip:
- git+https://github.com/dask/dask.git@2022.7.1
- git+https://github.com/dask/distributed.git@2022.7.1
- git+https://github.com/dask/dask-glm@main
- git+https://github.com/hdbscan/hdbscan.git@master
- sparse

# rapids-build-env, notebook-env and doc-env are defined in
Expand Down
5 changes: 3 additions & 2 deletions conda/environments/cuml_dev_cuda11.4.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ dependencies:
- libraft-headers=22.10.*
- libraft-distance=22.10.*
- libraft-nn=22.10.*
- pyraft=22.10.*
- raft-dask=22.10.*
- pylibraft=22.10.*
- dask-cudf=22.10.*
- dask-cuda=22.10.*
- ucx>=1.13.0
Expand All @@ -33,13 +34,13 @@ dependencies:
- treelite=2.4.0
- statsmodels
- seaborn
- hdbscan
- nltk
- pip
- pip:
- git+https://github.com/dask/dask.git@2022.7.1
- git+https://github.com/dask/distributed.git@2022.7.1
- git+https://github.com/dask/dask-glm.git@main
- git+https://github.com/hdbscan/hdbscan.git@master
- sparse

# rapids-build-env, notebook-env and doc-env are defined in
Expand Down
5 changes: 3 additions & 2 deletions conda/environments/cuml_dev_cuda11.5.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ dependencies:
- libraft-headers=22.10.*
- libraft-distance=22.10.*
- libraft-nn=22.10.*
- pyraft=22.10.*
- raft-dask=22.10.*
- pylibraft=22.10.*
- dask-cudf=22.10.*
- dask-cuda=22.10.*
- ucx>=1.13.0
Expand All @@ -33,13 +34,13 @@ dependencies:
- treelite=2.4.0
- statsmodels
- seaborn
- hdbscan
- nltk
- pip
- pip:
- git+https://github.com/dask/dask.git@2022.7.1
- git+https://github.com/dask/distributed.git@2022.7.1
- git+https://github.com/dask/dask-glm@main
- git+https://github.com/hdbscan/hdbscan.git@master
- sparse

# rapids-build-env, notebook-env and doc-env are defined in
Expand Down
6 changes: 4 additions & 2 deletions conda/recipes/cuml/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ requirements:
- cudf {{ minor_version }}
- libcuml={{ version }}
- libcumlprims {{ minor_version }}
- pyraft {{ minor_version }}
- pylibraft {{ minor_version }}
- raft-dask {{ minor_version }}
- cudatoolkit {{ cuda_version }}.*
- ucx-py {{ ucx_py_version }}
- ucx-proc=*=gpu
Expand All @@ -51,7 +52,8 @@ requirements:
- dask-cudf {{ minor_version }}
- libcuml={{ version }}
- libcumlprims {{ minor_version }}
- pyraft {{ minor_version }}
- pylibraft {{ minor_version }}
- raft-dask {{ minor_version }}
- cupy>=7.8.0,<12.0.0a0
- treelite=2.4.0
- nccl>=2.9.9
Expand Down
1 change: 0 additions & 1 deletion cpp/src/hdbscan/detail/reachability.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
#include <raft/linalg/unary_op.cuh>

#include <raft/sparse/convert/csr.cuh>
#include <raft/sparse/hierarchy/detail/connectivities.cuh>
#include <raft/sparse/linalg/symmetrize.cuh>
#include <raft/sparse/selection/knn_graph.cuh>

Expand Down
50 changes: 25 additions & 25 deletions cpp/src/hdbscan/runner.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@

#include <cuml/common/logger.hpp>

#include <raft/cluster/detail/agglomerative.cuh>
#include <raft/cluster/detail/mst.cuh>
#include <raft/sparse/coo.hpp>
#include <raft/sparse/hierarchy/detail/agglomerative.cuh>
#include <raft/sparse/hierarchy/detail/mst.cuh>

#include "detail/condense.cuh"
#include "detail/extract.cuh"
Expand Down Expand Up @@ -160,35 +160,35 @@ void build_linkage(const raft::handle_t& handle,
rmm::device_uvector<value_idx> color(m, stream);
FixConnectivitiesRedOp<value_idx, value_t> red_op(color.data(), core_dists, m);
// during knn graph connection
raft::hierarchy::detail::build_sorted_mst(handle,
X,
mutual_reachability_indptr.data(),
mutual_reachability_coo.cols(),
mutual_reachability_coo.vals(),
m,
n,
out.get_mst_src(),
out.get_mst_dst(),
out.get_mst_weights(),
color.data(),
mutual_reachability_coo.nnz,
red_op,
metric,
(size_t)10);
raft::cluster::detail::build_sorted_mst(handle,
X,
mutual_reachability_indptr.data(),
mutual_reachability_coo.cols(),
mutual_reachability_coo.vals(),
m,
n,
out.get_mst_src(),
out.get_mst_dst(),
out.get_mst_weights(),
color.data(),
mutual_reachability_coo.nnz,
red_op,
metric,
(size_t)10);

/**
* Perform hierarchical labeling
*/
size_t n_edges = m - 1;

raft::hierarchy::detail::build_dendrogram_host(handle,
out.get_mst_src(),
out.get_mst_dst(),
out.get_mst_weights(),
n_edges,
out.get_children(),
out.get_deltas(),
out.get_sizes());
raft::cluster::detail::build_dendrogram_host(handle,
out.get_mst_src(),
out.get_mst_dst(),
out.get_mst_weights(),
n_edges,
out.get_children(),
out.get_deltas(),
out.get_sizes());
}

template <typename value_idx = int64_t, typename value_t = float>
Expand Down
18 changes: 9 additions & 9 deletions cpp/src/hierarchy/linkage.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

#include "pw_dist_graph.cuh"
#include <cuml/cluster/linkage.hpp>
#include <raft/sparse/hierarchy/single_linkage.cuh>
#include <raft/cluster/single_linkage.cuh>

namespace raft {
class handle_t;
Expand All @@ -28,34 +28,34 @@ void single_linkage_pairwise(const raft::handle_t& handle,
const float* X,
size_t m,
size_t n,
raft::hierarchy::linkage_output<int, float>* out,
raft::cluster::linkage_output<int, float>* out,
raft::distance::DistanceType metric,
int n_clusters)
{
raft::hierarchy::single_linkage<int, float, raft::hierarchy::LinkageDistance::PAIRWISE>(
raft::cluster::single_linkage<int, float, raft::cluster::LinkageDistance::PAIRWISE>(
handle, X, m, n, metric, out, 0, n_clusters);
}

void single_linkage_neighbors(const raft::handle_t& handle,
const float* X,
size_t m,
size_t n,
raft::hierarchy::linkage_output<int, float>* out,
raft::cluster::linkage_output<int, float>* out,
raft::distance::DistanceType metric,
int c,
int n_clusters)
{
raft::hierarchy::single_linkage<int, float, raft::hierarchy::LinkageDistance::KNN_GRAPH>(
raft::cluster::single_linkage<int, float, raft::cluster::LinkageDistance::KNN_GRAPH>(
handle, X, m, n, metric, out, c, n_clusters);
}

struct distance_graph_impl_int_float
: public raft::hierarchy::detail::
distance_graph_impl<raft::hierarchy::LinkageDistance::PAIRWISE, int, float> {
: public raft::cluster::detail::
distance_graph_impl<raft::cluster::LinkageDistance::PAIRWISE, int, float> {
};
struct distance_graph_impl_int_double
: public raft::hierarchy::detail::
distance_graph_impl<raft::hierarchy::LinkageDistance::PAIRWISE, int, double> {
: public raft::cluster::detail::
distance_graph_impl<raft::cluster::LinkageDistance::PAIRWISE, int, double> {
};

}; // end namespace ML
10 changes: 5 additions & 5 deletions cpp/src/hierarchy/pw_dist_graph.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@

// TODO: Not a good strategy for pluggability but will be
// removed once our dense pairwise distance API is in RAFT
#include <raft/sparse/hierarchy/common.h>
#include <raft/sparse/hierarchy/detail/connectivities.cuh>
#include <raft/cluster/detail/connectivities.cuh>
#include <raft/cluster/single_linkage_types.hpp>

#include <thrust/device_ptr.h>
#include <thrust/execution_policy.h>
Expand All @@ -44,7 +44,7 @@
#include <limits>

namespace raft {
namespace hierarchy {
namespace cluster {
namespace detail {

template <typename value_idx>
Expand Down Expand Up @@ -117,7 +117,7 @@ void pairwise_distances(const raft::handle_t& handle,
* @tparam value_t
*/
template <typename value_idx, typename value_t>
struct distance_graph_impl<raft::hierarchy::LinkageDistance::PAIRWISE, value_idx, value_t> {
struct distance_graph_impl<raft::cluster::LinkageDistance::PAIRWISE, value_idx, value_t> {
void run(const raft::handle_t& handle,
const value_t* X,
size_t m,
Expand All @@ -140,5 +140,5 @@ struct distance_graph_impl<raft::hierarchy::LinkageDistance::PAIRWISE, value_idx
};

}; // namespace detail
}; // end namespace hierarchy
}; // namespace cluster
}; // end namespace raft
18 changes: 9 additions & 9 deletions cpp/test/sg/hdbscan_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

#include <metrics/adjusted_rand_index.cuh>

#include <raft/sparse/hierarchy/detail/agglomerative.cuh>
#include <raft/cluster/detail/agglomerative.cuh>

#include <raft/distance/distance_type.hpp>
#include <raft/linalg/transpose.cuh>
Expand Down Expand Up @@ -177,14 +177,14 @@ class ClusterCondensingTest : public ::testing::TestWithParam<ClusterCondensingI
/**
* Build dendrogram of MST
*/
raft::hierarchy::detail::build_dendrogram_host(handle,
mst_src.data(),
mst_dst.data(),
mst_data.data(),
params.n_row - 1,
out_children.data(),
out_delta.data(),
out_size.data());
raft::cluster::detail::build_dendrogram_host(handle,
mst_src.data(),
mst_dst.data(),
mst_data.data(),
params.n_row - 1,
out_children.data(),
out_delta.data(),
out_size.data());

/**
* Condense Hierarchy
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/cluster/agglomerative.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import numpy as np
from cuml.common.array import CumlArray
from cuml.common.base import Base
from cuml.common.doc_utils import generate_docstring
from raft.common.handle cimport handle_t
from pylibraft.common.handle cimport handle_t
from cuml.common import input_to_cuml_array
from cuml.common.array_descriptor import CumlArrayDescriptor
from cuml.common.mixins import ClusterMixin
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/cluster/dbscan.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ from libc.stdlib cimport calloc, malloc, free
from cuml.common.array import CumlArray
from cuml.common.base import Base
from cuml.common.doc_utils import generate_docstring
from raft.common.handle cimport handle_t
from pylibraft.common.handle cimport handle_t
from cuml.common import input_to_cuml_array
from cuml.common import using_output_type
from cuml.common.array_descriptor import CumlArrayDescriptor
Expand Down
4 changes: 2 additions & 2 deletions python/cuml/cluster/hdbscan.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ import cupy as cp
from cuml.common.array import CumlArray
from cuml.common.base import Base
from cuml.common.doc_utils import generate_docstring
from raft.common.handle cimport handle_t
from pylibraft.common.handle cimport handle_t

from raft.common.handle import Handle
from pylibraft.common.handle import Handle
from cuml.common import input_to_cuml_array
from cuml.common.array_descriptor import CumlArrayDescriptor
from cuml.common.mixins import ClusterMixin
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/cluster/kmeans.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ from cuml.common.mixins import ClusterMixin
from cuml.common.mixins import CMajorInputTagMixin
from cuml.common import input_to_cuml_array
from cuml.cluster.kmeans_utils cimport *
from raft.common.handle cimport handle_t
from pylibraft.common.handle cimport handle_t

cdef extern from "cuml/cluster/kmeans.hpp" namespace "ML::kmeans":

Expand Down
2 changes: 1 addition & 1 deletion python/cuml/cluster/kmeans_mg.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ from libc.stdlib cimport calloc, malloc, free

from cuml.common.array import CumlArray
from cuml.common.base import Base
from raft.common.handle cimport handle_t
from pylibraft.common.handle cimport handle_t
from cuml.common import input_to_cuml_array

from cuml.cluster import KMeans
Expand Down

0 comments on commit 50e9cad

Please sign in to comment.