### **Import libraries**

In [14]:
import pandas as pd
import numpy as np
import kmapper as km
import warnings
import dyneusr as dsr

from mapping import MappingTDA
from sklearn.manifold import Isomap
from sklearn.cluster import DBSCAN
from sklearn.decomposition import PCA, KernelPCA
from collections import defaultdict
from umap.umap_ import UMAP

In [15]:
# Set seed for reproducibility
seed_value = 42

# Ignore warnings
warnings.filterwarnings("ignore")

### **Load data**

In [16]:
data_path = 'your data directory here'

x = pd.read_csv(data_path + 'file_name')

# Print dimensions
print(f'X shape: {x.shape}') # (n_samples, n_features)

X shape: (1109, 52)


### **Mapping framework valildation**

In [17]:
mapper = km.KeplerMapper(verbose=1)

KeplerMapper()


Choose Filter dimensionality reduction technique

In [18]:
projector = 'kpca'
if projector == 'pca':
    projector = PCA(n_components=2, random_state=seed_value)
elif projector == 'kpca':
    projector = KernelPCA(n_components=2, kernel='rbf', random_state=seed_value)
elif projector == 'isomap':
    projector = Isomap(n_components=2)

lens = projector.fit_transform(x)

Choose Cover parameters

In [19]:
cover = km.Cover(n_cubes=20, perc_overlap=0.6)

Choose Clustering technique

In [20]:
min_samples = 3
clusterer = DBSCAN(eps=32, min_samples=min_samples)

Create graph and convert it to NetworkX format

In [21]:
graph = mapper.map(lens, x,
                   cover=cover,
                   clusterer=clusterer)

nx_graph = km.adapter.to_nx(graph)

Mapping on data shaped (1109, 52) using lens shaped (1109, 2)

Creating 400 hypercubes.

Created 2573 edges and 283 nodes in 0:00:00.320267.


Visualize graph

In [22]:
mapper.visualize(graph=graph, path_html='graph.html', color_function=np.array(y_numeric))

dyneu_graph = dsr.DyNeuGraph(G=graph, y=np.array(y))
dyneu_graph.visualize(path_html='dyneu_graph.html', show=False)

Wrote visualization to: graph.html


label,group,value,row_count
meta-column-0,0,MFMMFMMMFMMFMMMFMMMFFFFFFFMMFFFMFMFFFMFMFMFMMMMFFMMFFFFMFMFFFMFMFFFMFFFMMFMFMFFMFMMMMFFFFFFFMFMMMMFFFFFFFFFMMMFFMMFMFMFFMFMFFFMFFFFMFMMFFFMFFFMMFFMFMFMMFMFFMFFFFFMMFFFMMMFFMFMMFFFFMMFMMMFMMFMFMFMFFMMMFFFMFFFMFMFFMMFFFMFFFMFFMMMMFMFMMFFMFMFFFFFMFMFMMMFMFFMMMMFMFMFMFMMFMFFFMFFFMFMFMMFMFFMFFFMMMFFFFMMMMMFMFMFMMFFFFFFMFMFFMMMFFMMMFMFMMMFMMFFFFMMMMFFMMFFMFMFFFMFFMFMMFMFMFMMMFFFMFFFMMFFFFMFFMMFMFFMMFFFMFMMMFFFFFFFFFFMFFFFFFMMFMMFFMMFMMFFFMMFFMFFFMMFMMFFFMMFFMFMFFFFMFMMFMMMFFFFMMFMFMFFFFFFMMMMMFFFMMMFFMMMFFMFMMFFMFMFFFMFFFFMFFMMFMMMMMMMFFFMFMFFFFFMMMFFFMFMMMMFMMFFFMMFMFFMMFMMFMMFFMFMFFFMFFMFFFFFMFFFMFFFFMFFMFMFMFFMMMFMFMFFFFFFFMMMMMFMMFFFMMFFMMFFMMFFFMMMMFFMFMMMFMMFFMMMFFFFFFFMFFFMMFFFFMFFMFFFMMMMFFFMMFMFFFMFMMFMFFFMFFMFMFFMFFFMMMMFMFFMFFFMFFFFMFFFFMFMMFFMFFMFFMMMMFMFFMMFFMMMFFMMFMFMFMFFFFMMMFFMFMMFFFFMFFFFFMMFMMFFMFFFFFMMMMMFFFFMMMMMMMMFMMMFFMFFFMFMFMFMFMFFMMFFMFMFFFMMFFFMMFFMFMFMMMMFMMFFMFFFFMMMFMMMFMFMMMFFMMFMFFFFFMMMMFFMMFFFFMFMFMFFFFMFMFMMMFMFFMMFMFMFFMMFMFMFFFFMFMMMMFMFFMMMFMMFMMMFFMFFFFFMMFFMFFMFFFMFFFMMFFMMFMMMFFFFFFMMMFFFMFMMMMFMFMMFMFFFFFFMMFFMMFFMFMMFFMFMMMFFFFMFMFMMMMMFMFMFFMFMFMMMFMFMMF,1109


   > Found 0 nodes for data point 255.
   > Found 0 nodes for data point 582.
   > Found 0 nodes for data point 1100.
Already serving localhost:None 
[Force Graph] file:///Users/stefanovannoni/Desktop/PhD/TDA%20Mapping%20Framework%20Normative%20Model/tda_normative/scripts/MICCAI/dyneu_graph.html


Extract graph nodes indexes

In [23]:
samples_in_nodes = {name: attributes['membership'] for name, attributes in list(nx_graph.nodes(data=True))}
nodes_idx = [int(el_pca.split('cube')[1].split('_')[0]) for el_pca in list(nx_graph.nodes())]

nodes_membership = {}
for node, members in samples_in_nodes.items():
    node_id = int(node.split('cube')[1].split('_')[0])  
    nodes_membership[node_id] = list(members)

# Print number of nodes
print(f'Number of nodes: {len(samples_in_nodes)}')

Number of nodes: 283


#### Validation of the mapping framework

In [24]:
mapping = MappingTDA(x, projector, cover)

check = []
nodes_list = []
hypercubes_list = []
sample_map = {}

for i in range(0, x.shape[0]):
    subj = x.iloc[[i]]
    subj_projected = projector.transform(subj)

    # New subject mapped on the precomputed graph
    hypercubes_idx, hypercubes, bins = mapping.mapping(subj_projected)

    graph_nodes_idx = []
    for node, members in nodes_membership.items():
        if i in members:
            graph_nodes_idx.append(node)

    if all(index in hypercubes_idx for index in graph_nodes_idx):
        check.append(True)

    for idx in hypercubes_idx:
        if len(hypercubes[idx]) >= min_samples and idx not in nodes_list:
            nodes_list.append(idx)
    
    for idx in hypercubes_idx:
        if idx not in hypercubes_list:
            hypercubes_list.append(idx)
    
    sample_map[i] = graph_nodes_idx


In [25]:
np.sum(np.array(check))

1109

In [26]:
print(len(nodes_list))
print(len(hypercubes_list))
print(len(nodes_membership))

283
342
283
