## From similarity matrix

In [None]:
import wandb

if wandb.run is not None:
    # Stop run
    wandb.run.finish()
    print("Finished run.")

In [None]:
import numpy as np
import igraph as ig
import leidenalg
import matplotlib.pyplot as plt

# Generate some random data
X = np.random.rand(100, 2)

# Compute the similarity matrix
similarity_matrix = np.exp(-np.sum((X[:, None, :] - X[None, :, :]) ** 2, axis=-1))

# Convert the similarity matrix to an igraph.Graph object
graph = ig.Graph.Weighted_Adjacency(similarity_matrix.tolist())

# Cluster the graph using the Leiden algorithm
partition = leidenalg.find_partition(graph, leidenalg.ModularityVertexPartition)

# Get the cluster assignments
clusters = partition.membership
print(f"Number of clusters: {len(set(partition.membership))}")

# Plot the data points with different colors for each cluster
plt.scatter(X[:, 0], X[:, 1], c=clusters)
plt.show()

## With cuML - cugraph

In [6]:
import cudf
import cugraph
import numpy as np
from scipy.spatial.distance import pdist, squareform

# Create a sample NumPy array
X = np.array([[0, 1], [1, 0], [2, 1], [1, 2], [3, 2], [2, 3]])

# Compute the pairwise distances between the points using pdist
distances = pdist(X)

# Convert the pairwise distances to a distance matrix using squareform
dist_matrix = squareform(distances)

# Convert the distance matrix to a cuDF DataFrame
distances_df = cudf.DataFrame(dist_matrix)

# Create a cugraph Graph object from the distance DataFrame
graph = cugraph.Graph()
graph.from_cudf_adjlist(distances_df)

# Perform graph clustering using the Leiden method
parts, modularity = cugraph.leiden(graph)

# Print the results
print("Cluster assignments:", parts)
print("Modularity score:", modularity)

TypeError: Graph.from_cudf_adjlist() missing 1 required positional argument: 'index_col'

In [4]:
import cudf
import cugraph
import numpy as np

# Create a sample NumPy array
X = np.array([[0, 1], [1, 0], [2, 1], [1, 2], [3, 2], [2, 3]])

# Compute the pairwise distances between the points
distances = cugraph.pairwise_distances(cudf.DataFrame(X))

# Create a cugraph Graph object from the distance DataFrame
graph = cugraph.Graph()
graph.from_cudf_adjlist(distances)

# Perform graph clustering using the Leiden method
parts, modularity = cugraph.leiden(graph)

# Print the results
print("Cluster assignments:", parts)
print("Modularity score:", modularity)

AttributeError: module 'cugraph' has no attribute 'pairwise_distances'

In [None]:
import cugraph
from cugraph.datasets import karate

G = karate.get_graph(download=True)
parts, modularity_score = cugraph.leiden(G)

In [None]:
import cudf
import cugraph
import numpy as np

# Load the distance matrix into a NumPy array
distances = np.load("distances.npy")

# Convert the distance matrix to a cuDF DataFrame
distances_df = cudf.DataFrame(distances)

# Create a cugraph Graph object from the distance DataFrame
graph = cugraph.Graph()
graph.from_cudf_adjlist(distances_df)

# Perform graph clustering using the Louvain method
parts, modularity = cugraph.louvain(graph)

# Print the results
print("Cluster assignments:", parts)
print("Modularity score:", modularity)

In [None]:
import numpy as np
import pandas as pd
from typing import Optional


def leiden(
    adata: AnnData,
    resolution: float = 1.0,
    n_iterations: int = 100,
    use_weights: bool = True,
    neighbors_key: Optional[int] = None,
    key_added: str = "leiden",
) -> None:
    """
    Performs Leiden Clustering using cuGraph

    Parameters
    ----------
        adata :
            annData object with 'neighbors' field.

        resolution
            A parameter value controlling the coarseness of the clustering.
            Higher values lead to more clusters.

        n_iterations
            This controls the maximum number of levels/iterations of the Louvain algorithm.
            When specified the algorithm will terminate after no more than the specified number of iterations.
            No error occurs when the algorithm terminates early in this manner.

        use_weights
            If `True`, edge weights from the graph are used in the computation
            (placing more emphasis on stronger edges).

        neighbors_key
            If not specified, `leiden` looks at `.obsp['connectivities']` for neighbors connectivities
            If specified, `leiden` looks at `.obsp['neighbors_key_ connectivities']` for neighbors connectivities

        key_added
            `adata.obs` key under which to add the cluster labels.

    """
    # Adjacency graph

    if neighbors_key:
        adjacency = adata.obsp[neighbors_key + "_connectivities"]
    else:
        adjacency = adata.obsp["connectivities"]
    offsets = cudf.Series(adjacency.indptr)
    indices = cudf.Series(adjacency.indices)
    if use_weights:
        weights = cudf.Series(adjacency.data)
    else:
        weights = None

    g = Graph()

    g.from_cudf_adjlist(offsets, indices, weights)

    # Cluster
    leiden_parts, _ = culeiden(g, resolution=resolution, max_iter=n_iterations)

    # Format output
    groups = (
        leiden_parts.to_pandas().sort_values("vertex")[["partition"]].to_numpy().ravel()
    )

    adata.obs[key_added] = pd.Categorical(
        values=groups.astype("U"),
        categories=natsorted(map(str, np.unique(groups))),
    )
    # store information on the clustering parameters
    adata.uns["leiden"] = {}
    adata.uns["leiden"]["params"] = dict(
        resolution=resolution,
        n_iterations=n_iterations,
    )