In [2]:
import numpy as np
import plotly.graph_objects as go
from itertools import combinations
from operator import concat
import igraph
import Surfaces
# import TwoMapper
from gtda.mapper import (
    CubicalCover,
    make_mapper_pipeline,
    Projection,
    plot_static_mapper_graph,
    plot_interactive_mapper_graph,
    MapperInteractivePlotter,
    nerve
)
from sklearn.cluster import DBSCAN
from sklearn.decomposition import PCA
from functools import reduce

from Giotto2Mapper import (two_dim_nerve, two_mapper)
import sympy as sy

## GOALS.

We wish to define a novel new higher dimensional cover for Mapper. 

Input: $f(X)$ which is the image of our data set under some continuous map $f\colon X \to \mathbb{R}^n$
1. [x] Need to identify what dimension our image is.
2. [x] Embed our image into $\mathbb{R}^{n+1}$ via the map $v = (v_1,...,v_n)\mapsto (v_1,...,v_n, -\sum_{i=1}^n v_n)$]
3. [x] Find bounding box for our image $\iota \circ f (X)$
4. [ ] Choose generator matrix $M$ associated with $A_n^*$.
-- Note we have special $M$ for $n=2,3$.
5. [ ] Find scaling coefficient $c$ and scale lattice $cM$.
-- Note we will define $c = \min_{i}\frac{1}{n-intervals}(M_i - m_i)$.
6. [ ] Find which data points lie in spheres of radius $cR(1+g)$ cenetered at lattice points in the boudning box.
-- Note $R$ is the $\textit{covering radius}$ of $A_n^*$ and $g$ is the `perc_overlap`.
7. [ ] Use this mask to define our clusters.

Our previous coverclass will be a large influence in how $A_n^*$ our constructed.

In [1]:
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils import check_array
from sklearn.utils.validation import check_is_fitted
from gtda.utils.validation import validate_params
from gtda.utils.intervals import Interval
import warnings
from gtda.mapper.utils._cover import _check_has_one_column, _remove_empty_and_duplicate_intervals

In [4]:
class LatticeCover(BaseEstimator, TransformerMixin):
    # Parameters
    _hyperparameters = {
        'n_intervals': {'type': int, 'in': Interval(1, np.inf, closed='left')},
        'overlap_frac': {'type': float, 'in': Interval(0, 1, closed = 'neither')}
    }
    ''' 
    Attributes
    -----------
    TBD:
    'bounding box'
    'lattice points (ball centers)'
    'cover radius'
    'dim'
    MORE??
    '''

    def __init__(self, n_intervals = 10, overlap_frac = 0.3):
        self.n_intervals = n_intervals
        self.overlap_frac = overlap_frac
    #TO BE CONTINUED.

In [34]:
a = np.arange(10).reshape((5,2))
#b = np.c_(-np.sum(a,axis=1))
b = np.c_[a, -np.sum(a,axis=1).T]
b[:,:3]

array([[  0,   1,  -1],
       [  2,   3,  -5],
       [  4,   5,  -9],
       [  6,   7, -13],
       [  8,   9, -17]])

In [36]:
def _check_dim(X):
    if X.shape[1] > 8:
        raise ValueError(f"Why are you using an incredibly high dimensional (dim {X.shape[1]}) cover?? Dont.")
    return X.shape[1]

'Embeds our data X\sub R^{dim} \righthookarrow R^{dim+1}'
def hyperplane_embed(X):
    embed = -np.sum(X,axis=1).T
    return np.c_[X, embed]

def _find_bounding_box(X, dim, n_intervals):
    coord_array = np.zeros((dim+1,2)) # Embed image into R^{dim+1}
    for i in range(dim):
        coord_array[i,0] = np.min(X[:,i]) # Minimum value in i-th coord
        coord_array[i,1] = np.max(X[:,i]) # Maximum value in i-th coord
    only_one_pt = all( _ == coord_array.ravel()[0] for _ in coord_array.ravel())
    if only_one_pt and n_intervals > 1:
        raise ValueError(
            f"Only one unique filter value found, cannot fit"
            f"{n_intervals} > 1 intervals.")
    if dim == 2 or dim == 3: # We have special representations for A* in dimensions 2 and 3.
        return coord_array[:,:dim]
    else:
        return coord_array