# Distributions

In [1]:

pip install ..


Processing /home/jovyan
Building wheels for collected packages: aptviz
  Building wheel for aptviz (setup.py) ... [?25ldone
[?25h  Created wheel for aptviz: filename=aptviz-0.0.1-py3-none-any.whl size=5776 sha256=e3d1ac39e2fbdad7e703fc978539e71d0ee8e7b504fef4fe30d5237e9ac3934d
  Stored in directory: /tmp/pip-ephem-wheel-cache-sciwtovq/wheels/fc/c4/49/78b5bd16ca276f2916d0829d47c131046b6e4575f7dd51e987
Successfully built aptviz
Installing collected packages: aptviz
  Attempting uninstall: aptviz
    Found existing installation: aptviz 0.0.1
    Uninstalling aptviz-0.0.1:
      Successfully uninstalled aptviz-0.0.1
Successfully installed aptviz-0.0.1
Note: you may need to restart the kernel to use updated packages.


In [2]:
## Load packages 

import numpy as np
import sys
import pandas as pd
import networkx as nx
import plotly.figure_factory as ff
import plotly.express as px
import plotly.graph_objects as go

# sys.path.insert(1, "../aptviz")
from aptviz import aptviz_themes
from aptviz.supporting import *
import plotly.io as pio

# Load ExHACT visualization themes
pio.templates.default = "aptviz"
my_charcoal = "#3f4142"

In [3]:
# Generate fake filtered simplicial complex data frame (fsc_df)
n_nodes = 100
n_simps = [n_nodes, 80, 40]
max_dim = int(len(n_simps)-1)
print(max_dim)

### Ann make optional "is_maximal" or "in_subcomplex" options for the following function######
#### RANK SHOULDL BE INTEGRER
fsc_df = create_fake_fsc_df(n_nodes, n_simps, max_dim)
fsc_df.head()

2
Created df with length 220. Expected 220.


Unnamed: 0,cell_id,dim,nodes,weight,faces,rank,is_maximal
0,0,0,[0],2.506776,[],15,1
1,1,0,[1],2.144549,[],72,0
2,2,0,[2],2.383796,[],19,0
3,3,0,[3],2.086846,[],34,0
4,4,0,[4],2.062305,[],36,1


In [10]:
def fsc_histogram_by_dim(fsc_df, x_col = "weight"):
    fig = px.histogram(fsc_df,x=x_col, color="dim", marginal="violin", opacity=0.8)
    fig.update_layout(barmode='overlay')
    fig.update_layout(title= f'Distribution of simplex {x_col} by dimension')
    return fig

fig = fsc_histogram_by_dim(fsc_df, "rank")
fig.show()

In [12]:
## Suppose we want to see the distribution of simplex attributes for those simplices alive after a certain threshold
# or rank

def fsc_histogram_thresholded(fsc_df, x_col = "dim", filter_on = "weight", threshold = 0, compare = "geq"):
    
    if compare == "geq":
        fsc_filtered = fsc_df.loc[fsc_df[filter_on] >= threshold]
    elif compare == "leq":
        fsc_filtered = fsc_df.loc[fsc_df[filter_on] <= threshold]
    
    fig = px.histogram(fsc_filtered, x=x_col, color_discrete_sequence=["teal"])
    return fig

fig = fsc_histogram_thresholded(fsc_df, x_col = "dim", filter_on = "rank", threshold = 500, compare = "leq")
fig.show()

In [38]:
# Given list of filtration steps
# Assume filtering by weight - could also be rank
# filtration_steps = np.arange(max(fsc_df["weight"])-0.1,min(fsc_df["weight"]),-0.01)
filtration_steps = np.arange(min(fsc_df["rank"])+0.1,max(fsc_df["rank"]),1)


def fsc_attribute_across_filtration(fsc_df, filtration_steps, filtration_col="weight"):
    

    max_dim = np.max(fsc_df.dim)
    
    # Need an array of the appropriate size.
    count_across_filtration = np.zeros((filtration_steps.shape[0],(max_dim+1)))

    # Loop across the array and fill the count for each
    for i,filtration_step in enumerate(filtration_steps) :

        if (filtration_col == "weight"):
            thresholded_fsc_temp = fsc_df.loc[fsc_df[filtration_col] >= filtration_step]
        elif (filtration_col == "rank"):
            thresholded_fsc_temp = fsc_df.loc[fsc_df[filtration_col] <= filtration_step]
        else:
            print("Please provide a valid threhsold column (weight or rank)")
        

        count_df_temp = thresholded_fsc_temp.dim.value_counts().to_frame()
       
        count_across_filtration[i, count_df_temp.index.to_numpy()] = count_df_temp.dim.to_numpy(copy=True)[count_df_temp.index.to_numpy()]



    fig = px.imshow(np.transpose(count_across_filtration),
                     height=600,
                     aspect="auto",
                     labels=dict(x="Filtration step", y="Dimension", color="Count simplices"))
    
    return fig

fig = fsc_attribute_across_filtration(fsc_df, filtration_steps, "rank")

fig.show()

In [8]:
## For maximal simplices...
## Histogram of simplex weights faceted by maximal simplex flag

fig = px.histogram(fsc_df, x="weight", facet_row = "is_maximal", color="dim", opacity = 0.9)
fig.update_layout(barmode='overlay')
fig.show()

In [9]:
## Histogram overlaid -- this works at a B level because nbinsx specifies the max bins allowed, not exactly
# how many bins I would like. This will work nicely if we can ensure the same number of bins for each histogram
## This is a little better than above because we are comparing the counts of maximal simplices to the counts
# of ALL simplices, not just the non-maximal simplices as in the facet plot.

n_bins = 20
fig = px.histogram(fsc_df.loc[fsc_df.is_maximal==1], x="weight", color="dim", nbins=n_bins)
fig.update_layout(barmode='overlay')
fig.add_trace(go.Histogram(x = fsc_df.weight, opacity = 0.1, nbinsx=n_bins, name ="Entire complex"))
fig.show()