In [1]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

import sklearn
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from umap import UMAP
from sklearn import datasets
import kmapper as km

In [2]:
data = pd.read_csv('./data/Test_Data_1102_non_control.csv')
y = y = data['CaseCtrl']
X = data.values

In [3]:
# Initialize to use t-SNE
mapper = km.KeplerMapper(verbose=0)

projected_data = mapper.fit_transform(X, projection=TSNE(perplexity=40))


In [6]:
lens1 = mapper.fit_transform(X, 4)
lens2 = mapper.fit_transform(X, projection=PCA(n_components=1))
filter_func = np.c_[lens1, lens2]

In [9]:
# Create the graph using DBSCAN as the clustering algorithm
graph = mapper.map(
    lens1,
    X,
    clusterer=sklearn.cluster.KMeans(n_clusters=2),
    cover=km.Cover(n_cubes=20, perc_overlap=0.30),
)

In [5]:
mapper.visualize(
    graph,
    title="Mapper visualization",
    path_html="mapper.html",
    custom_tooltips=y,
)

'<!DOCTYPE html>\n<html>\n\n<head>\n  <meta charset="utf-8">\n  <meta name="generator" content="KeplerMapper">\n  <title>Mapper visualization | KeplerMapper</title>\n\n  <link rel="icon" type="image/png" href="http://i.imgur.com/axOG6GJ.jpg" />\n\n  <link href=\'https://fonts.googleapis.com/css?family=Roboto+Mono:700,300\' rel=\'stylesheet\' type=\'text/css\'>\n  <style>* {\n  margin: 0;\n  padding: 0;\n}\n\nhtml, body {\n  height: 100%;\n}\n\nbody {\n  font-family: "Roboto Mono", "Helvetica", sans-serif;\n  font-size: 14px;\n}\n\n#logo {\n  width:  85px;\n  height: 85px;\n}\n\n#display {\n  color: #95A5A6;\n  background: #212121;\n}\n\n#header {\n  background: #111111;\n}\n\n#print {\n  color: #000;\n  background: #FFF;\n}\n\nh1 {\n  font-size: 21px;\n  font-weight: 300;\n  font-weight: 300;\n}\n\nh2 {\n  font-size: 18px;\n  padding-bottom: 20px;\n  font-weight: 300;\n}\n\nh3 {\n  font-size: 14px;\n  font-weight: 700;\n  text-transform: uppercase;\n}\n\nh4 {\n  font-size: 13px;\n  fon

## All Data
### Trying it with PCA and Kmeans

In [2]:
#Get data with paired down features
datafull = pd.read_csv('./data/Test_Data_1102_enumerated_filledin.csv')
data = datafull.drop(columns = ['RedCapID', 'CaseCtrl', 'indexredcapid',  'Month_Birth', 'Year_Birth', 'Month_Death','Year_Death', 'methoddeath'])

#standardize data
scaler = StandardScaler()
y = scaler.fit_transform(data)
X = data.values

In [3]:
## Try it with PCA and kmeans- All data

# Initialize to use PCA
mapper = km.KeplerMapper(verbose=0)
projected_data = mapper.fit_transform(X, projection=PCA(n_components=90))

# Create the graph using DBSCAN as the clustering algorithm
graph = mapper.map(
    projected_data,
    clusterer=KMeans(n_clusters=6, init='k-means++', random_state=42),
    cover=km.Cover(n_cubes=20, perc_overlap=0.30),
)

mapper.visualize(
    graph,
    title="Mapper visualization",
    path_html="mapperPCAKmeans.html",
    custom_tooltips=y,
)