# **1. Import libraries**

In [12]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import random
import tda_tuning

# TDA
import networkx as nx
import kmapper as km
from sklearn.cluster import DBSCAN
from tmap.tda.utils import optimize_dbscan_eps
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from umap.umap_ import UMAP
from kmapper.plotlyviz import plotlyviz
from dyneusr import DyNeuGraph
import dyneusr as dsr

# Graph distance metrics
import netcomp as nc
from netrd.distance import NetSimile

In [2]:
# Set seed for reproducibility
seed_value = 3
random.seed(seed_value)

# **2. Load data**

In [3]:
fMRI_data = pd.read_csv('data.csv')

# Remove class labels
x_fMRI = fMRI_data.drop('study_group', axis=1)

# Dummy variables of class labels
y_fMRI = pd.get_dummies(fMRI_data['study_group'])
y_fMRI = y_fMRI.astype('int')

In [34]:
class_mapping = {'HC' : 1, 'ROP' : 2, 'ROD' : 3, 'CHR' : 4}
labels = [class_mapping[key] for key in fMRI_data['study_group']]

# **3. TDA**
Construct TDA graphs by considering the combination of Cover parameters identified during the hyperparameter tuning process. Specifically, focus on the Cover parameters associated with the *Stability strategy* best results, which are:
- **PCA :** (50, 0.70)
- **UMAP :** (10, 0.65)
- **t-SNE :** (10, 0.75)  

Note that the best result is the one associated with the smallest score in the matrix.

In [4]:
# Mapper
mapper = km.KeplerMapper(verbose=1)

KeplerMapper()


In [47]:
# Project data
pca = PCA(n_components=2, random_state=69)
umap = UMAP(n_components=2, init=pca.fit_transform(x_fMRI))
tsne = TSNE(n_components=2, init=pca.fit_transform(x_fMRI))

lens_pca = mapper.fit_transform(pca.fit_transform(x_fMRI, y=None), projection=[0,1])
lens_umap = mapper.fit_transform(umap.fit_transform(x_fMRI, y=None), projection=[0,1])
lens_tsne = mapper.fit_transform(tsne.fit_transform(x_fMRI, y=None), projection=[0,1])

cover_pca = dsr.mapper.utils.optimize_cover(
    x_fMRI, r=50, g=0.7,
    scale_r=not True,
    scale_limits=True
)

# Create graphs
graph_pca = mapper.map(
    lens=lens_pca,
    X=x_fMRI,
    clusterer=DBSCAN(eps=optimize_dbscan_eps(x_fMRI, threshold=95), min_samples=2),
    cover=km.Cover(n_cubes=40, perc_overlap=0.35)
)

graph_umap = mapper.map(
    lens=lens_umap,
    X=x_fMRI,
    clusterer=DBSCAN(eps=optimize_dbscan_eps(x_fMRI, threshold=95), min_samples=2),
    cover=km.Cover(n_cubes=10, perc_overlap=0.45)
)

graph_tsne = mapper.map(
    lens=lens_tsne,
    X=x_fMRI,
    clusterer=DBSCAN(eps=optimize_dbscan_eps(x_fMRI, threshold=95), min_samples=2),
    cover=km.Cover(n_cubes=10, perc_overlap=0.75)
)

..Composing projection pipeline of length 1:
	Projections: [0, 1]
	Distance matrices: False
	Scalers: MinMaxScaler()
..Projecting on data shaped (587, 2)

..Projecting data using: [0, 1]

..Scaling with: MinMaxScaler()

..Composing projection pipeline of length 1:
	Projections: [0, 1]
	Distance matrices: False
	Scalers: MinMaxScaler()
..Projecting on data shaped (587, 2)

..Projecting data using: [0, 1]

..Scaling with: MinMaxScaler()

..Composing projection pipeline of length 1:
	Projections: [0, 1]
	Distance matrices: False
	Scalers: MinMaxScaler()
..Projecting on data shaped (587, 2)

..Projecting data using: [0, 1]

..Scaling with: MinMaxScaler()

Mapping on data shaped (587, 12720) using lens shaped (587, 2)

Creating 1600 hypercubes.

Created 420 edges and 189 nodes in 0:00:00.345139.
Mapping on data shaped (587, 12720) using lens shaped (587, 2)

Creating 100 hypercubes.

Created 281 edges and 86 nodes in 0:00:00.289120.
Mapping on data shaped (587, 12720) using lens shaped (587

In [48]:
# Create DyNeuGraphs
dG_pca = DyNeuGraph(
    G=graph_pca, y=y_fMRI
)

dG_umap = DyNeuGraph(
    G=graph_umap, y=y_fMRI
)

dG_tsne = DyNeuGraph(
    G=graph_tsne, y=y_fMRI
)

label,group,value,row_count
CHR,0,106,587
HC,1,251,587
ROD,2,111,587
ROP,3,119,587


   > Found 0 nodes for data point 3.
   > Found 0 nodes for data point 22.
   > Found 0 nodes for data point 25.
   > Found 0 nodes for data point 29.
   > Found 0 nodes for data point 31.
   > Found 0 nodes for data point 32.
   > Found 0 nodes for data point 39.
   > Found 0 nodes for data point 46.
   > Found 0 nodes for data point 55.
   > Found 0 nodes for data point 56.
   > Found 0 nodes for data point 62.
   > Found 0 nodes for data point 63.
   > Found 0 nodes for data point 69.
   > Found 0 nodes for data point 74.
   > Found 0 nodes for data point 76.
   > Found 0 nodes for data point 86.
   > Found 0 nodes for data point 98.
   > Found 0 nodes for data point 104.
   > Found 0 nodes for data point 105.
   > Found 0 nodes for data point 118.
   > Found 0 nodes for data point 124.
   > Found 0 nodes for data point 130.
   > Found 0 nodes for data point 149.
   > Found 0 nodes for data point 164.
   > Found 0 nodes for data point 166.
   > Found 0 nodes for data point 167.
   >

label,group,value,row_count
CHR,0,106,587
HC,1,251,587
ROD,2,111,587
ROP,3,119,587


   > Found 0 nodes for data point 3.
   > Found 0 nodes for data point 29.
   > Found 0 nodes for data point 39.
   > Found 0 nodes for data point 56.
   > Found 0 nodes for data point 62.
   > Found 0 nodes for data point 98.
   > Found 0 nodes for data point 118.
   > Found 0 nodes for data point 130.
   > Found 0 nodes for data point 187.
   > Found 0 nodes for data point 193.
   > Found 0 nodes for data point 197.
   > Found 0 nodes for data point 230.
   > Found 0 nodes for data point 246.
   > Found 0 nodes for data point 279.
   > Found 0 nodes for data point 313.
   > Found 0 nodes for data point 389.
   > Found 0 nodes for data point 399.
   > Found 0 nodes for data point 406.
   > Found 0 nodes for data point 420.
   > Found 0 nodes for data point 423.
   > Found 0 nodes for data point 459.
   > Found 0 nodes for data point 467.
   > Found 0 nodes for data point 468.
   > Found 0 nodes for data point 482.
   > Found 0 nodes for data point 491.
   > Found 0 nodes for data poin

label,group,value,row_count
CHR,0,106,587
HC,1,251,587
ROD,2,111,587
ROP,3,119,587


   > Found 0 nodes for data point 3.
   > Found 0 nodes for data point 29.
   > Found 0 nodes for data point 39.
   > Found 0 nodes for data point 56.
   > Found 0 nodes for data point 62.
   > Found 0 nodes for data point 98.
   > Found 0 nodes for data point 118.
   > Found 0 nodes for data point 130.
   > Found 0 nodes for data point 187.
   > Found 0 nodes for data point 193.
   > Found 0 nodes for data point 197.
   > Found 0 nodes for data point 230.
   > Found 0 nodes for data point 246.
   > Found 0 nodes for data point 279.
   > Found 0 nodes for data point 313.
   > Found 0 nodes for data point 389.
   > Found 0 nodes for data point 399.
   > Found 0 nodes for data point 406.
   > Found 0 nodes for data point 420.
   > Found 0 nodes for data point 423.
   > Found 0 nodes for data point 459.
   > Found 0 nodes for data point 467.
   > Found 0 nodes for data point 468.
   > Found 0 nodes for data point 482.
   > Found 0 nodes for data point 491.
   > Found 0 nodes for data poin

In [49]:
# Visualize DyNeuGraphs
dG_pca.visualize('dyneusr_pca.html')
dG_umap.visualize('dyneusr_umap.html')
dG_tsne.visualize('dyneusr_tsne.html')

Already serving localhost:None 
[Force Graph] file:///Users/stefanovannoni/Library/CloudStorage/OneDrive-PolitecnicodiMilano/Dottorato/Tesi%20Magistrale/Tuning/dyneusr_pca.html
Already serving localhost:None 
[Force Graph] file:///Users/stefanovannoni/Library/CloudStorage/OneDrive-PolitecnicodiMilano/Dottorato/Tesi%20Magistrale/Tuning/dyneusr_umap.html
Already serving localhost:None 
[Force Graph] file:///Users/stefanovannoni/Library/CloudStorage/OneDrive-PolitecnicodiMilano/Dottorato/Tesi%20Magistrale/Tuning/dyneusr_tsne.html
