# Kmapper script for all pools

## Import libraries

In [1]:
import kmapper as km
from kmapper.plotlyviz import *

import sklearn
#from sklearn import ensemble
# DBSCAN from sklearn for clustering algorithms
from sklearn.cluster import DBSCAN
# PCA from sklearn for projection/lens creation
from sklearn.decomposition import PCA
# from sklearn.manifold import MDS

# scipy for interpolation
# import scipy 
# from scipy.interpolate import *

# Plotly and Dash
import plotly.graph_objs as go
from kmapper.plotlyviz import plotlyviz
from kmapper.plotlyviz import *
from ipywidgets import (HBox, VBox)
import dash_html_components as html
import dash_core_components as dcc
import dash
from ipywidgets import interactive, HBox, VBox, widgets, interact
import warnings
warnings.filterwarnings("ignore")

## Upload data

In [2]:
import pandas as pd
predicted_df = pd.read_csv("../../LTRM data/RF interpolation/water_full.csv")

## Kmapper function

In [7]:
def mapper_func(df, DBSCAN_EPSILON = 20, DBSCAN_MIN_SAMPLES = 20, N_CUBES = [10, 10], PERC_OVERLAP = [.45, .45]):
    
    X = df[["WDP", "SECCHI", "TEMP", "DO", "TURB", "VEL", "TP", "TN", "SS", "CHLcal"]]
    continuous_variables =  ["WDP", "SECCHI", "TEMP", "DO", "TURB", "VEL", "TP", "TN", "SS", "CHLcal"]

    var_to_index = {continuous_variables[i] : i for i in range(len(continuous_variables))}
    #projected_vars = continuous_variables
    projected_var_indices = [var_to_index[var] for var in continuous_variables]

    # defining clustering and kmapper parameters
    # create instance of clustering alg
    cluster_alg = sklearn.cluster.DBSCAN(eps = DBSCAN_EPSILON, min_samples = DBSCAN_MIN_SAMPLES, metric = 'euclidean')

    # Instantiate kepler mapper object
    mapper = km.KeplerMapper(verbose = 0)
    
    # defining filter function as projection on to the first 2 component axis
    pca = PCA(n_components = 2)
    lens = pca.fit_transform(X)

    #pca.fit_transform(X)
    principle_component = max(abs(pca.components_[0].min()), abs(pca.components_[0].max()))
    max_index = 0

    for i in range(len(pca.components_[0])):
        if abs(pca.components_[0][i]) == principle_component:
            max_index = i

    print("Primary variable: ", continuous_variables[max_index])
    print("Corresponding component: ", pca.components_[0][max_index])
    print('Explained Variance: ', pca.explained_variance_ratio_)
    
    #lens = np.array(X[continuous_variables[max_index]])
    #lens = np.array(X[['PredictedTN', 'PredictedSS']])
    
    # Generate the simplicial complex
    scomplex = mapper.map(lens, X, cover = km.Cover(n_cubes = N_CUBES, perc_overlap = PERC_OVERLAP), 
                                                    clusterer = cluster_alg, remove_duplicate_nodes = True)  

    summary_variable = mapper.project(np.array(X), projection = projected_var_indices, scaler = None)

    pl_brewer = [[0.0, '#006837'],
             [0.1, '#1a9850'],
             [0.2, '#66bd63'],
             [0.3, '#a6d96a'],
             [0.4, '#d9ef8b'],
             [0.5, '#ffffbf'],
             [0.6, '#fee08b'],
             [0.7, '#fdae61'],
             [0.8, '#f46d43'],
             [0.9, '#d73027'],
             [1.0, '#a50026']]

    color_function_name = ["Distance to x-min"]
    color_values = lens [:,0] - lens[:,0].min() # X['PREDICTED_TP]
    my_colorscale = pl_brewer
    kmgraph,  mapper_summary, colorf_distribution = get_mapper_graph(scomplex, 
                                                                     color_values,  
                                                                     color_function_name = color_function_name, 
                                                                     colorscale = my_colorscale)

    bgcolor = 'rgba(10,10,10, 0.9)'
    # y_gridcolor = 'rgb(150,150,150)'# on a black background the gridlines are set on  grey

    plotly_graph_data = plotly_graph(kmgraph, graph_layout ='fr', colorscale = my_colorscale, 
                                     factor_size = 2.5, edge_linewidth = 0.5)

    plot_title =  'LTRM: Epsilon '+ str(DBSCAN_EPSILON) + str(DBSCAN_EPSILON) + ', MIN_SAMPLES ' + str(DBSCAN_MIN_SAMPLES) 
    # plot_title = 'Pool 13, Summer 1993-1999; Epsilon ' + str(DBSCAN_EPSILON) + ', MIN_SAMPLES ' + str(DBSCAN_MIN_SAMPLES) 
    
    layout = plot_layout(title = plot_title,  
                         width = 620, height = 570,
                         annotation_text = get_kmgraph_meta(mapper_summary),  
                         bgcolor = bgcolor)

    # FigureWidget is responsible for event listeners
    fw_graph = go.FigureWidget(data = plotly_graph_data, layout = layout)
    fw_hist = node_hist_fig(colorf_distribution, bgcolor = bgcolor)
    fw_summary = summary_fig(mapper_summary, height = 300)

    dashboard = hovering_widgets(kmgraph, 
                                 fw_graph, 
                                 bgcolor = bgcolor, 
                                 member_textbox_width = 600)

    # DESIRED FILE PATH, CHANGE TO FIT YOUR LOCAL MACHINE
    #directory_path = "../kmapper"
    
    #Update the fw_graph colorbar, setting its title:
    fw_graph.data[1].marker.colorbar.title = 'dist to<br>x-min'
    html_output_path = 'PCA_2' + 'Eps_' + str(DBSCAN_EPSILON) +'MinS_' + str(DBSCAN_MIN_SAMPLES) + 'NCUBES_' + str(N_CUBES) + 'PEROvLp_' + str(PERC_OVERLAP) + '.html'
    
    mapper.visualize(scomplex, color_values = color_values, color_function_name = color_function_name, 
                     path_html = html_output_path, lens = summary_variable, lens_names = continuous_variables)
    
    return scomplex, X

In [11]:
mapper_func(predicted_df, 10, 20, [50, 50], [.7, .7])

Primary variable:  SS
Corresponding component:  0.7851775312926299
Explained Variance:  [0.81970454 0.09756913]


({'nodes': {'cube5_cluster0-cube35_cluster0': [3588,
    3589,
    2567,
    3592,
    3084,
    3091,
    3095,
    26655,
    26664,
    2556,
    20077,
    20621,
    9363,
    20629,
    20630,
    20635,
    20640,
    20654,
    9393,
    9394,
    20657,
    20658,
    9397,
    20659,
    20662,
    20664,
    20666,
    20667,
    7359,
    7361,
    7362,
    7365,
    7366,
    7369,
    7370,
    9447,
    9448,
    6891,
    11504,
    11506,
    9458,
    9460,
    7416,
    7417,
    9980,
    7437,
    7438,
    7447,
    6427,
    6428,
    6429,
    6430,
    7455,
    6432,
    6433,
    20783,
    20785,
    6963,
    6975,
    6977,
    6467,
    4935,
    4936,
    4937,
    6983,
    6985,
    4941,
    4942,
    346,
    6506,
    7018,
    6516,
    7030,
    22392,
    380,
    6527,
    22402,
    4486,
    22406,
    22409,
    22423,
    22425,
    22426,
    22430,
    22431,
    3499,
    3528,
    22486,
    2522,
    22492,
    22495,
    2034,
    254