<a href="https://colab.research.google.com/github/fbeilstein/topological_data_analysis/blob/master/lecture_13_reeb_graph_and_mapper.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

[KeplerMapper Applications](https://kepler-mapper.scikit-tda.org/en/latest/applications.html)

In [None]:
!pip install kmapper

In [16]:
import kmapper as km
from sklearn import datasets
from sklearn.decomposition import PCA
from sklearn.cluster import DBSCAN
import matplotlib.pyplot as plt

# Generate sample data
data, labels = datasets.make_circles(n_samples=5000, noise=0.05)

# Initialize KeplerMapper
mapper = km.KeplerMapper(verbose=1)

# Project the data using PCA
projected_data = mapper.fit_transform(data, projection=PCA(n_components=2))

# Create the simplicial complex
graph = mapper.map(projected_data, data, clusterer=DBSCAN(eps=0.3, min_samples=5), cover=km.Cover(n_cubes=10, perc_overlap=0.2))

# Visualize the graph
#mapper.visualize(graph, path_html="mapper_output.html", title="Mapper Graph")
km.draw_matplotlib(graph)

plt.draw()
plt.show()


KeplerMapper(verbose=1)
..Composing projection pipeline of length 1:
	Projections: PCA(n_components=2)
	Distance matrices: False
	Scalers: MinMaxScaler()
..Projecting on data shaped (5000, 2)

..Projecting data using: 
	PCA(n_components=2)


..Scaling with: MinMaxScaler()

Mapping on data shaped (5000, 2) using lens shaped (5000, 2)

Creating 100 hypercubes.

Created 195 edges and 71 nodes in 0:00:00.731965.
no display found. Using non-interactive Agg backend


In [None]:
import IPython
from google.colab import output

with open("mapper_output.html", 'r') as f_in:
  html_content = ''.join(f_in.readlines())

display(IPython.display.HTML(html_content))



In [None]:
import numpy as np
import sklearn
import kmapper as km
from pathlib import Path
import matplotlib.pyplot as plt

if Path("data/cat-reference.csv").exists():
  cat_path = "data/cat-reference.csv"
elif Path("cat-reference.csv").exists():
  cat_path = "cat-reference.csv"
else:
  raise FileNotFoundError

data = np.genfromtxt(cat_path, delimiter=",")
mapper = km.KeplerMapper(verbose=2)
lens = mapper.fit_transform(data)
graph = mapper.map(lens,data,clusterer=sklearn.cluster.DBSCAN(eps=0.1, min_samples=5),
                   cover=km.Cover(n_cubes=15, perc_overlap=0.2),)
if Path("output/").is_dir():
  prepend = "output/"
else:
  prepend = "./"
mapper.visualize(graph, path_html=prepend + "cat.html")
km.draw_matplotlib(graph)
plt.show()

In [17]:
import io
import sys
import base64

import matplotlib.pyplot as plt
import numpy as np
import sklearn
from sklearn import datasets
from sklearn.preprocessing import MinMaxScaler
import kmapper as km
from pathlib import Path

try:
    from PIL import Image
except ImportError as e:
    print("This example requires Pillow. Run `pip install pillow` and then try again.")
    sys.exit()


# Load digits data
data, labels = datasets.load_digits().data, datasets.load_digits().target

# Raw data is (0, 16), so scale to 8 bits (pillow can't handle 4-bit greyscale PNG depth)
scaler = MinMaxScaler(feature_range=(0, 255))
data = scaler.fit_transform(data).astype(np.uint8)

# Create images for a custom tooltip array
tooltip_s = []
for image_data in data:
    with io.BytesIO() as output:
        img = Image.fromarray(image_data.reshape((8, 8)), "L")
        img.save(output, "PNG")
        contents = output.getvalue()
        img_encoded = base64.b64encode(contents)
        img_tag = """<img src="data:image/png;base64,{}">""".format(
            img_encoded.decode("utf-8")
        )
        tooltip_s.append(img_tag)

tooltip_s = np.array(
    tooltip_s
)  # need to make sure to feed it as a NumPy array, not a list

# Initialize to use t-SNE with 2 components (reduces data to 2 dimensions). Also note high overlap_percentage.
mapper = km.KeplerMapper(verbose=2)

# Fit and transform data
projected_data = mapper.fit_transform(data, projection=sklearn.manifold.TSNE())

# Create the graph (we cluster on the projected data and suffer projection loss)
graph = mapper.map(
    projected_data,
    clusterer=sklearn.cluster.DBSCAN(eps=0.3, min_samples=15),
    cover=km.Cover(35, 0.4),
)

# Create the visualizations (increased the graph_gravity for a tighter graph-look.)
print("Output graph examples to html")
# Tooltips with image data for every cluster member

if Path("output/").is_dir():
    prepend = "output/"
else:
    prepend = "./"

mapper.visualize(
    graph,
    title="Handwritten digits Mapper",
    path_html=prepend + "digits_custom_tooltips.html",
    color_values=labels,
    color_function_name="labels",
    custom_tooltips=tooltip_s,
)
# Tooltips with the target y-labels for every cluster member
mapper.visualize(
    graph,
    title="Handwritten digits Mapper",
    path_html=prepend + "digits_ylabel_tooltips.html",
    custom_tooltips=labels,
)

# Matplotlib examples

km.draw_matplotlib(graph, layout="spring")
plt.show()

KeplerMapper(verbose=2)
..Composing projection pipeline of length 1:
	Projections: TSNE()
	Distance matrices: False
	Scalers: MinMaxScaler()
..Projecting on data shaped (1797, 64)

..Projecting data using: 
	TSNE(verbose=2)

[t-SNE] Computing 91 nearest neighbors...
[t-SNE] Indexed 1797 samples in 0.001s...
[t-SNE] Computed neighbors for 1797 samples in 0.125s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1797
[t-SNE] Computed conditional probabilities for sample 1797 / 1797
[t-SNE] Mean sigma: 186.389054
[t-SNE] Computed conditional probabilities in 0.165s
[t-SNE] Iteration 50: error = 67.4751968, gradient norm = 0.0458516 (50 iterations in 1.925s)
[t-SNE] Iteration 100: error = 62.6570663, gradient norm = 0.0065015 (50 iterations in 1.431s)
[t-SNE] Iteration 150: error = 61.8408012, gradient norm = 0.0031341 (50 iterations in 0.981s)
[t-SNE] Iteration 200: error = 61.5365334, gradient norm = 0.0023715 (50 iterations in 1.016s)
[t-SNE] Iteration 250: error = 61.38611