# Filtered complex visualizations

What might we want to know about our filtered complex?

In [1]:
## Load packages 

import numpy as np
import sys
import pandas as pd
import networkx as nx
import plotly.figure_factory as ff
import plotly.express as px
import plotly.graph_objects as go
import exhact_themes
import plotly.io as pio

# Load ExHACT visualization themes
pio.templates.default = "exhact"
my_charcoal = "#3f4142"

In [2]:
# DEV ONLY -- ideas

# [x] Histogram of weights
# [x] Heat map of simplex distribution over filtration steps
# [x] Same ^^ but for maximal simplices
# [x] Same ^^ but for subcomplex (for example, participation of nodes or group of nodes)

#--- Need a color that is NOT associated with dimensions... hmmmm....

In [15]:
## DEV ONLY
# Filtered simplicial complex represented as pandas data frame for easy integration with plotly

n_nodes = 50
n_edges = 100
n_2simps = 100
n_3simps = 30

max_filtration = 2
max_dim = 3

def find_nodes(face, df):
    
    nodes = []
    for cell in face:
        nodes.append(df[df["cell_id"]==cell]["nodes"].item())
    return np.unique(nodes)


node_data = np.arange(n_nodes)
cell_id = np.arange(n_nodes)
node_dim = np.zeros(n_nodes)
node_nodes = [[i] for i in np.arange(n_nodes)]
node_weight = np.random.rand(n_nodes)
node_faces = [[] for i in np.arange(n_nodes)]

fsc_df = pd.DataFrame({"cell_id": cell_id, "dim": node_dim, "nodes": node_nodes, "weight": 5+node_weight,
                      "faces": node_faces})


dim1_df = pd.DataFrame({"cell_id": np.arange(n_nodes,n_nodes+n_edges),
                        "dim": np.ones(n_edges),
                        "weight": 4.3+np.random.rand(n_edges),
                        "faces": [np.random.choice(fsc_df.cell_id, 2) for i in np.arange(n_edges)]})
dim1_df["nodes"] = dim1_df.faces


dim2_df = pd.DataFrame({"cell_id": np.arange(n_nodes+n_edges, n_nodes+n_edges+n_2simps),
                        "dim": 2*np.ones(n_2simps),
                        "weight": 3+np.random.rand(n_2simps),
                        "faces": [np.random.choice(dim1_df.cell_id, 3) for i in np.arange(n_2simps)]})



## NOTE we will likely have 6 nodes per 2-simp because I do not ensure edges actually connect
dim2_df["nodes"] = dim2_df["faces"].apply(find_nodes, df = dim1_df)   


dim3_df = pd.DataFrame({"cell_id": np.arange(n_nodes+n_edges+n_2simps, n_nodes+n_edges+n_2simps+n_3simps),
                        "dim": 3*np.ones(n_3simps),
                        "weight": 2.4+np.random.rand(n_3simps),
                        "faces": [np.random.choice(dim2_df.cell_id, 4) for i in np.arange(n_3simps)]})



## This method is exploding so just pick some nodes
dim3_df["nodes"] = [np.random.choice(fsc_df.cell_id, 4) for i in np.arange(n_3simps)]  

# Concatenate them all together. Sort and rank weights
fsc = pd.concat([fsc_df, dim1_df, dim2_df, dim3_df])
fsc.dim = fsc.dim.astype(int)
fsc["rank"] = np.argsort(-fsc.weight).astype(float)

# Add column for maximal simplex flag. I'll save the computation of this column for later :)
fsc["is_maximal"] = [np.random.randint(0, 2) for i in np.arange(fsc.shape[0])]

fsc.head()

Unnamed: 0,cell_id,dim,nodes,weight,faces,rank,is_maximal
0,0,0,[0],5.94846,[],31.0,1
1,1,0,[1],5.540372,[],13.0,0
2,2,0,[2],5.79019,[],18.0,1
3,3,0,[3],5.15724,[],26.0,0
4,4,0,[4],5.097504,[],0.0,1


In [16]:
## Histogram of simplex weights
    
fig = px.histogram(fsc,x="weight",color="dim", marginal="violin", opacity=0.8)
fig.update_layout(barmode='overlay')

fig.show()

In [17]:
## Count simplices of each dimension
#-- Ann - "filtered" is confusing here
# Function to keep only simplices with weight or rank at least some value
filter_on = "weight"
threshold = 3.2
fsc_filtered = fsc.loc[fsc[filter_on] >= threshold]


fig = px.histogram(fsc_filtered, x="dim",nbins = max_dim+1, color_discrete_sequence=["teal"])
fig.show()

In [18]:
## Heatmap of simplex count-by-dimension across filtration steps
## Repeat with subcomplex flag to do this for only certain simplices
#-- Could update to map the xaxis to the filtration steps

# Given list of filtration steps
# Assume filtering by weight - could also be rank
filtration_steps = np.arange(6,2,-0.01)

# Need an array of the appropriate size.
count_across_filtration = np.zeros((filtration_steps.shape[0],(max_dim+1)))

# Loop across the array and fill the count for each
for i,filtration_step in enumerate(filtration_steps) :

    thresholded_fsc_temp = fsc.loc[fsc["weight"] >= filtration_step]
    count_df_temp = thresholded_fsc_temp.dim.value_counts().to_frame()
#     print(count_df_temp)
    count_across_filtration[i, count_df_temp.index.to_numpy()] = count_df_temp.dim.to_numpy(copy=True)[count_df_temp.index.to_numpy()]


    
fig = px.imshow(np.transpose(count_across_filtration),
                 height=600,
                 aspect="auto",
                 labels=dict(x="Filtration step", y="Dimension", color="Count simplices"))

fig.show()

In [39]:
## For maximal simplices...
## Histogram of simplex weights faceted by maximal simplex flag

fig = px.histogram(fsc, x="weight", facet_row = "is_maximal", color="dim", opacity = 0.9)
fig.update_layout(barmode='overlay')
fig.show()

In [43]:
## Histogram overlaid -- this works at a B level because nbinsx specifies the max bins allowed, not exactly
# how many bins I would like. This will work nicely if we can ensure the same number of bins for each histogram
## This is a little better than above because we are comparing the counts of maximal simplices to the counts
# of ALL simplices, not just the non-maximal simplices as in the facet plot.

n_bins = 20
fig = px.histogram(fsc.loc[fsc.is_maximal==1], x="weight", color="dim", nbins=n_bins)
fig.update_layout(barmode='overlay')
fig.add_trace(go.Histogram(x = fsc.weight, opacity = 0.1, nbinsx=n_bins, name ="Entire complex"))
fig.show()

In [46]:
## Histogram of simplex participation. Let's say I have a simplex or node I care about. I want to know the number 
# of simplices by dimension in which this node or simplex of interest participates.
# I will skirt the computations here by assuming I already know which simplices contain the node I care about :)

# Add a column of the fsc that indicates participation
fsc["in_induced_subcomplex"] = [np.random.randint(0, 2) for i in np.arange(fsc.shape[0])]

# Repeat above histogram plots
n_bins = 20
fig = px.histogram(fsc.loc[fsc.in_induced_subcomplex==1], x="weight", color="dim", nbins=n_bins)
fig.update_layout(barmode='overlay')
fig.add_trace(go.Histogram(x = fsc.weight, opacity = 0.1, nbinsx=n_bins, name="All simplices"))
fig.show()