In [1]:

# Load libraries for standardizing data
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import TruncatedSVD
from scipy.sparse import csr_matrix
from sklearn import datasets
import numpy as np
import pandas as pd 

# Load MAPPER Libraries

%load_ext autoreload
%autoreload 2


%matplotlib inline

from gtda.mapper.filter import Projection
from gtda.mapper.cover import (
    OneDimensionalCover, 
    CubicalCover
)

import pandas as pd
import numpy as np

from gtda.mapper.pipeline import make_mapper_pipeline
from gtda.mapper.visualization import (
    plot_static_mapper_graph,
    plot_interactive_mapper_graph
)

from gtda.mapper import FirstSimpleGap
from gtda.mapper import FirstHistogramGap

import plotly.graph_objects as go
from matplotlib.colors import to_rgba_array, to_rgba

import matplotlib.pyplot as plt
import scipy as scp
from scipy.linalg import svd

from sklearn.cluster import AgglomerativeClustering
from sklearn.decomposition import TruncatedSVD
import warnings

warnings.filterwarnings("ignore")


In [2]:


data = pd.read_csv("mecklenberg_merged_geoid_FINAL.csv")
data = data.iloc[:,1:]
data.head()



Unnamed: 0,no_hs_edu,pop_indust_sum,yes_snap,wc-total,poor_english,prop_with_minors,below_income,renter_occ,nonwhite,above_30_rent,rural
0,0.058616,0.695721,0.0,0.965861,0.0,0.402597,0.027417,0.143545,0.128599,0.0,0.0
1,0.03792,0.639871,0.071685,0.945985,0.0,0.193548,0.098566,0.154485,0.083056,0.426829,0.0
2,0.043716,0.710027,0.070866,0.883117,0.0,0.637795,0.0,0.0,0.050056,0.0,0.0
3,0.0,0.616046,0.0,0.961194,0.0,0.308244,0.0,0.0,0.053665,0.0,0.0
4,0.526316,0.18,0.0,0.408163,0.0,0.0,0.435484,0.0,0.521739,0.0,0.0


In [3]:
# Standardize the feature matrix
mecklen = StandardScaler().fit_transform(data)


coords = mecklen

In [4]:
def plot_mapper_graph_from_pipeline(pipe):
    """ A helper function, that plots the graph
    resulting from applying the pipeline `pipe`
    on the data.
    Parameters
    -----------
    pipe : 
    """
    # generate topological graph from point cloud
    graph = pipeline.fit_transform(coords)

    # get cluster member indices
    node_elements = graph["node_metadata"]["node_elements"]

    # configure choice of layout (x,z values)
    layout = np.array([np.mean(coords[el], axis=0)[[0,10]] for el in node_elements])

    # define node coloring
    #node_colors = np.array([data.loc[el, "color"].value_counts().index[0] for el in node_elements])

    plotly_params = {
        'node_trace_marker_colorscale': None,
        'node_trace_marker_showscale': False
    }

        #initialise and display figure
    fig = plot_static_mapper_graph(pipeline, coords,layout, plotly_params=plotly_params)
    #fig.update_traces(patch={'hoverlabel_bgcolor': node_colors,'marker_color': node_colors})
    # fig.show(config={'scrollZoom':True})
    
    return fig

In [5]:
filter_func=TruncatedSVD(n_components=2)
clusterer=FirstSimpleGap(linkage='single')
pipeline = make_mapper_pipeline(
    filter_func=filter_func,
    cover=CubicalCover(overlap_frac=0.5,n_intervals=5),
    clusterer=clusterer,
)

# fig = plot_mapper_graph_from_pipeline(pipeline)

In [6]:
# Save output
# import pickle
# with open('mecklen-stand-n7.pickle', 'wb') as f:
    # pickle.dump(fig, f)

In [7]:
# infile = open("mecklen-stand-n4.pickle",'rb')
# n4_meck = pickle.load(infile)
# n4_meck.show(config={'scrollZoom':True})

In [8]:
mapper_graph=pipeline.fit_transform(coords)

In [9]:
node_id, node_elements = (
    mapper_graph["node_metadata"]["node_id"],
    mapper_graph["node_metadata"]["node_elements"],
)

In [10]:
mapper_output = pd.DataFrame(node_elements, node_id)

In [11]:
# mapper_output.to_csv('charlotte_stndrd_n2_int5.csv', index=True)

In [19]:
mapper_output

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,271,272,273,274,275,276,277,278,279,280
0,1,11.0,19.0,29.0,32.0,38.0,39.0,41.0,42.0,45.0,...,,,,,,,,,,
1,0,1.0,3.0,22.0,24.0,25.0,26.0,27.0,28.0,29.0,...,,,,,,,,,,
2,0,2.0,3.0,22.0,24.0,25.0,26.0,27.0,28.0,33.0,...,,,,,,,,,,
3,2,109.0,211.0,212.0,216.0,266.0,275.0,384.0,391.0,394.0,...,,,,,,,,,,
4,210,372.0,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89,234,,,,,,,,,,...,,,,,,,,,,
90,196,,,,,,,,,,...,,,,,,,,,,
91,154,,,,,,,,,,...,,,,,,,,,,
92,74,502.0,,,,,,,,,...,,,,,,,,,,


In [14]:
# Creating node intersection matrix: number of BGs that each node has in common

def bg_in_common(list1, list2):
    count = 0
    for val1 in list1:
        if np.isnan(val1):
            continue
        for val2 in list2:
            if np.isnan(val2):
                continue
            if val1 == val2:
                count += 1
    return count 




ind = []
data = []
for index1, row1 in mapper_output.iterrows():
    ind.append(index1)
    data_dict = dict()
    for index2, row2 in mapper_output.iterrows():
        data_dict[index2] = bg_in_common(row1, row2)
    data.append(data_dict)
        




In [15]:
# Creates pandas DataFrame. 
df = pd.DataFrame(data, index = ind)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,84,85,86,87,88,89,90,91,92,93
0,104,90,0,0,0,0,88,75,0,0,...,0,0,0,0,0,0,0,0,0,0
1,90,204,114,0,0,0,75,165,90,0,...,0,0,0,0,0,0,0,0,0,0
2,0,114,134,20,0,0,0,90,103,13,...,0,0,0,0,0,0,0,0,0,0
3,0,0,20,21,0,1,0,0,13,13,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,2,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
90,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
91,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
92,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,2,0


In [17]:
df.to_csv("intersection_matrix_StandardizedNodes.csv", index=True)