Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add pipeline module and ase function #814

Merged
merged 7 commits into from
Aug 3, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ lint:
test:
pytest tests

fast-test:
pytest tests --ignore=tests/test_latentdistributiontest.py --ignore=tests/test_latentpositiontest.py

type-check:
mypy ./graspologic

Expand Down
2 changes: 1 addition & 1 deletion docs/reference/reference/embed.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ Decomposition

.. autofunction:: select_dimension

.. autofunction:: selectSVD
.. autofunction:: select_svd

Single graph embedding
----------------------
Expand Down
2 changes: 2 additions & 0 deletions docs/reference/reference/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ Reference
models
nominate
partition
preconditions
pipeline
plotting
preprocessing
simulations
Expand Down
14 changes: 14 additions & 0 deletions docs/reference/reference/pipeline.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
Pipeline
========
.. automodule:: graspologic.pipeline

GraphBuilder
------------
.. autoclass:: GraphBuilder

Embed
-----
.. automodule:: graspologic.pipeline.embed
.. autoclass:: graspologic.pipeline.embed.embeddings.Embeddings
.. autofunction:: graspologic.pipeline.embed.adjacency_spectral_embedding.adjacency_spectral_embedding

7 changes: 7 additions & 0 deletions docs/reference/reference/preconditions.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Preconditions
=============

.. autofunction:: graspologic.preconditions.check_argument_types
.. autofunction:: graspologic.preconditions.check_optional_argument_types
.. autofunction:: graspologic.preconditions.check_argument
.. autofunction:: graspologic.preconditions.is_real_weighted
1 change: 1 addition & 0 deletions graspologic/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import graspologic.models
import graspologic.nominate
import graspologic.partition
import graspologic.pipeline
import graspologic.preprocessing
import graspologic.plot
import graspologic.simulations
Expand Down
4 changes: 2 additions & 2 deletions graspologic/embed/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from .mds import ClassicalMDS
from .n2v import node2vec_embed
from .omni import OmnibusEmbed
from .svd import select_dimension, selectSVD
from .svd import select_dimension, select_svd
from .base import BaseSpectralEmbed
from .mug2vec import mug2vec
from .case import CovariateAssistedEmbed
Expand All @@ -21,7 +21,7 @@
"MultipleASE",
"node2vec_embed",
"select_dimension",
"selectSVD",
"select_svd",
"BaseSpectralEmbed",
"CovariateAssistedEmbed",
]
12 changes: 10 additions & 2 deletions graspologic/embed/ase.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# Copyright (c) Microsoft Corporation and contributors.
# Licensed under the MIT License.

from typing import Optional

from .base import BaseSpectralEmbed
from ..utils import augment_diagonal

Expand All @@ -12,7 +14,7 @@ class AdjacencySpectralEmbed(BaseSpectralEmbed):
The adjacency spectral embedding (ASE) is a k-dimensional Euclidean representation
of the graph based on its adjacency matrix. It relies on an SVD to reduce
the dimensionality to the specified k, or if k is unspecified, can find a number of
dimensions automatically (see :class:`~graspologic.embed.selectSVD`).
dimensions automatically (see :class:`~graspologic.embed.select_svd`).

Read more in the `Adjacency Spectral Embedding Tutorial
<https://microsoft.github.io/graspologic/tutorials/embedding/AdjacencySpectralEmbed.html>`_
Expand Down Expand Up @@ -62,6 +64,10 @@ class AdjacencySpectralEmbed(BaseSpectralEmbed):
If graph is directed, whether to concatenate left and right (out and in) latent
positions along axis 1.

svd_seed : int or None (default ``None``)
Only applicable for ``algorithm="randomized"``; allows you to seed the
randomized svd solver for deterministic, albeit pseudo-randomized behavior.



Attributes
Expand All @@ -79,7 +85,7 @@ class AdjacencySpectralEmbed(BaseSpectralEmbed):

See Also
--------
graspologic.embed.selectSVD
graspologic.embed.select_svd
graspologic.embed.select_dimension

Notes
Expand Down Expand Up @@ -114,6 +120,7 @@ def __init__(
check_lcc=True,
diag_aug=True,
concat=False,
svd_seed: Optional[int] = None,
):
super().__init__(
n_components=n_components,
Expand All @@ -122,6 +129,7 @@ def __init__(
n_iter=n_iter,
check_lcc=check_lcc,
concat=concat,
svd_seed=svd_seed,
)

if not isinstance(diag_aug, bool):
Expand Down
20 changes: 14 additions & 6 deletions graspologic/embed/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,21 @@
# Licensed under the MIT License.

import warnings

from abc import abstractmethod
from typing import Optional

import networkx as nx
import numpy as np
from sklearn.base import BaseEstimator
from sklearn.utils.validation import check_is_fitted

from .svd import select_svd
from ..utils import (
augment_diagonal,
import_graph,
is_almost_symmetric,
is_fully_connected,
)
from .svd import selectSVD

import networkx as nx


class BaseSpectralEmbed(BaseEstimator):
Expand Down Expand Up @@ -60,6 +59,10 @@ class BaseSpectralEmbed(BaseEstimator):
If graph(s) are directed, whether to concatenate each graph's left and right
(out and in) latent positions along axis 1.

svd_seed : int or None (default ``None``)
Only applicable for ``algorithm="randomized"``; allows you to seed the
randomized svd solver for deterministic, albeit pseudo-randomized behavior.

Attributes
----------
n_components_ : int
Expand All @@ -69,7 +72,7 @@ class BaseSpectralEmbed(BaseEstimator):

See Also
--------
graspologic.embed.selectSVD, graspologic.embed.select_dimension
graspologic.embed.select_svd, graspologic.embed.select_dimension
"""

def __init__(
Expand All @@ -80,6 +83,7 @@ def __init__(
n_iter=5,
check_lcc=True,
concat=False,
svd_seed: Optional[int] = None,
):
self.n_components = n_components
self.n_elbows = n_elbows
Expand All @@ -90,6 +94,7 @@ def __init__(
msg = "Parameter `concat` is expected to be type bool"
raise TypeError(msg)
self.concat = concat
self.svd_seed = svd_seed

def _reduce_dim(self, A, directed=None):
"""
Expand All @@ -101,12 +106,13 @@ def _reduce_dim(self, A, directed=None):
A: array-like, shape (n_vertices, n_vertices)
Adjacency matrix to embed.
"""
U, D, V = selectSVD(
U, D, V = select_svd(
A,
n_components=self.n_components,
n_elbows=self.n_elbows,
algorithm=self.algorithm,
n_iter=self.n_iter,
svd_seed=self.svd_seed,
)

self.n_components_ = D.size
Expand Down Expand Up @@ -348,6 +354,7 @@ def __init__(
check_lcc=True,
diag_aug=True,
concat=False,
svd_seed: Optional[int] = None,
):
super().__init__(
n_components=n_components,
Expand All @@ -356,6 +363,7 @@ def __init__(
n_iter=n_iter,
check_lcc=check_lcc,
concat=concat,
svd_seed=svd_seed,
)

if not isinstance(diag_aug, bool):
Expand Down
8 changes: 7 additions & 1 deletion graspologic/embed/lse.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,13 @@ class LaplacianSpectralEmbed(BaseSpectralEmbed):
singular_values_ : array, shape (n_components)
Singular values associated with the latent position matrices.

svd_seed : int or None (default ``None``)
Only applicable for ``algorithm="randomized"``; allows you to seed the
randomized svd solver for deterministic, albeit pseudo-randomized behavior.

See Also
--------
graspologic.embed.selectSVD
graspologic.embed.select_svd
graspologic.embed.select_dimension
graspologic.utils.to_laplacian

Expand Down Expand Up @@ -123,6 +127,7 @@ def __init__(
check_lcc: bool = True,
regularizer: Optional[float] = None,
concat: bool = False,
svd_seed: Optional[int] = None,
):
super().__init__(
n_components=n_components,
Expand All @@ -131,6 +136,7 @@ def __init__(
n_iter=n_iter,
check_lcc=check_lcc,
concat=concat,
svd_seed=svd_seed,
)
self.form = form
self.regularizer = regularizer
Expand Down
19 changes: 15 additions & 4 deletions graspologic/embed/mase.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
# Copyright (c) Microsoft Corporation and contributors.
# Licensed under the MIT License.

from typing import Optional

import numpy as np

from .base import BaseEmbedMulti
from .svd import select_dimension, selectSVD
from .svd import select_dimension, select_svd
from ..utils import is_almost_symmetric


Expand Down Expand Up @@ -70,6 +72,10 @@ class MultipleASE(BaseEmbedMulti):
If graph(s) are directed, whether to concatenate each graph's left and right (out and in) latent positions
along axis 1.

svd_seed : int or None (default ``None``)
Only applicable for ``algorithm="randomized"``; allows you to seed the
randomized svd solver for deterministic, albeit pseudo-randomized behavior.


Attributes
----------
Expand Down Expand Up @@ -112,6 +118,7 @@ def __init__(
scaled=True,
diag_aug=True,
concat=False,
svd_seed: Optional[int] = None,
):
if not isinstance(scaled, bool):
msg = "scaled must be a boolean, not {}".format(scaled)
Expand All @@ -124,6 +131,7 @@ def __init__(
n_iter=n_iter,
diag_aug=diag_aug,
concat=concat,
svd_seed=svd_seed,
)
self.scaled = scaled

Expand All @@ -136,11 +144,12 @@ def _reduce_dim(self, graphs):

# embed individual graphs
embeddings = [
selectSVD(
select_svd(
graph,
n_components=n_components,
algorithm=self.algorithm,
n_iter=self.n_iter,
svd_seed=self.svd_seed,
)
for graph in graphs
]
Expand Down Expand Up @@ -178,20 +187,22 @@ def _reduce_dim(self, graphs):

# Second SVD for vertices
# The notation is slightly different than the paper
Uhat, sing_vals_left, _ = selectSVD(
Uhat, sing_vals_left, _ = select_svd(
Us,
n_components=self.n_components,
n_elbows=self.n_elbows,
algorithm=self.algorithm,
n_iter=self.n_iter,
svd_seed=self.svd_seed,
)

Vhat, sing_vals_right, _ = selectSVD(
Vhat, sing_vals_right, _ = select_svd(
Vs,
n_components=self.n_components,
n_elbows=self.n_elbows,
algorithm=self.algorithm,
n_iter=self.n_iter,
svd_seed=self.svd_seed,
)
return Uhat, Vhat, sing_vals_left, sing_vals_right

Expand Down
25 changes: 21 additions & 4 deletions graspologic/embed/mds.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
# Copyright (c) Microsoft Corporation and contributors.
# Licensed under the MIT License.

from typing import Optional

import numpy as np
from sklearn.base import BaseEstimator
from sklearn.utils import check_array

from .svd import selectSVD
from .svd import select_svd
from ..utils import is_symmetric


Expand Down Expand Up @@ -72,6 +74,10 @@ class ClassicalMDS(BaseEstimator):
dissimilarity_matrix_ : array, shape (n_features, n_features)
Dissimilarity matrix

svd_seed : int or None (default ``None``)
Only applicable for ``n_components!=1``; allows you to seed the
randomized svd solver for deterministic, albeit pseudo-randomized behavior.

See Also
--------
graspologic.embed.select_dimension
Expand All @@ -82,7 +88,13 @@ class ClassicalMDS(BaseEstimator):
Aalborg University, Denmark 46.5 (2003).
"""

def __init__(self, n_components=None, n_elbows=2, dissimilarity="euclidean"):
def __init__(
self,
n_components=None,
n_elbows=2,
dissimilarity="euclidean",
svd_seed: Optional[int] = None,
):
# Check inputs
if n_components is not None:
if not isinstance(n_components, int):
Expand All @@ -101,6 +113,7 @@ def __init__(self, n_components=None, n_elbows=2, dissimilarity="euclidean"):
self.dissimilarity = dissimilarity

self.n_elbows = n_elbows
self.svd_seed = svd_seed

def _compute_euclidean_distances(self, X):
"""
Expand Down Expand Up @@ -187,8 +200,12 @@ def fit(self, X, y=None):
algorithm = "full"
else:
algorithm = "randomized"
U, D, V = selectSVD(
B, n_elbows=self.n_elbows, algorithm=algorithm, n_components=n_components
U, D, V = select_svd(
B,
n_elbows=self.n_elbows,
algorithm=algorithm,
n_components=n_components,
svd_seed=self.svd_seed,
)

self.n_components_ = len(D)
Expand Down