### Persistent homology

* Ripser [paper](https://www.theoj.org/joss-papers/joss.00925/10.21105.joss.00925.pdf) [code](https://github.com/scikit-tda/ripser.py) (fast)
* Dionysus 2 [code](https://mrzv.org/software/dionysus2/) (representative examples)
* Nico's [code](https://github.com/nhchristianson/Math-text-semantic-networks)
* Ann's [code](https://github.com/asizemore/PH_tutorial/blob/master/Tutorial_day1.ipynb)

In [3]:
import dionysus as d

# simplices
s = d.Simplex([0,1,2])
print("Dimension:", s.dimension())

for v in s:
    print(v)

for sb in s.boundary():
    print(sb)

s.data = 5
print(s)

simplex9 = d.Simplex([0,1,2,3,4,5,6,7,8,9])
sphere8  = d.closure([simplex9], 8)
print(len(sphere8))

Dimension: 2
0
1
2
<1,2> 0
<0,2> 0
<0,1> 0
<0,1,2> 5
1022


In [4]:
# filtration
simplices = [([2], 4), ([1,2], 5), ([0,2], 6),
             ([0], 1),   ([1], 2), ([0,1], 3)]
f = d.Filtration()
for vertices, time in simplices:
    f.append(d.Simplex(vertices, time))
f.sort()
print(len(f[2]))
for s in f:
    print(s)
print(f.index(d.Simplex([1,2])))

2
<0> 1
<1> 2
<0,1> 3
<2> 4
<1,2> 5
<0,2> 6
4


In [5]:
# persistent homology
m = d.homology_persistence(f)
for i,c in enumerate(m):
     print(i, c)

0 
1 
2 1*0 + 1*1
3 
4 1*1 + 1*3
5 


In [6]:
# diagram
dgms = d.init_diagrams(m, f)
print(dgms)
for i, dgm in enumerate(dgms):
    for pt in dgm:
        print(i, pt.birth, pt.death)

[Diagram with 3 points, Diagram with 1 points]
0 1.0 inf
0 2.0 3.0
0 4.0 5.0
1 6.0 inf


#### Load graphs

In [1]:
topics = ['biochemistry']
path_base = '/Users/harangju/Developer/data/wiki/graphs/'

In [2]:
import networkx as nx

graphs = {}
for topic in topics:
    graphs[topic] = nx.read_gexf(path_base + 'dated_filled_' + topic + '.gexf')
graphs

{'biochemistry': <networkx.classes.digraph.DiGraph at 0x119c0c310>}

In [12]:
import itertools as it
import networkx as nx

def compute_simplices(graph, n_max=2):
    """
    Parameters
    ----------
    graph: networkx.Graph
    n_max: int
        the max n for an n-simplex
        
    Returns
    -------
    simplices: dict {int: list}
        key is n & returns a list of n-simplices
    """
    simplices = {n: [] for n in range(1, n_max+1)}
    for node in graph.nodes:
        for n in range(1, n_max+1):
            neighbors = graph.neighbors(node)
            simplices[n] += list(it.combinations(neighbors, n+1))
    for k, v in simplices.items():
        simplices[k] = list(set(v))
    return simplices

In [25]:
graph = graphs[topics[0]]
simplices = compute_simplices(nx.Graph(graph),
                              n_max=2)

In [67]:
import math

for name in graph.nodes:
    if 'year' not in graph.nodes[name].keys():
        graph.nodes[name]['year'] = math.inf
# [graph.nodes[n] for n in graph.nodes]

In [86]:
node_years = nx.get_node_attributes(graph, 'year')
years = sorted(list(set(node_years.values())))
simplices = []
for year in years[:2]:
    print('Year =', year)
    subnodes = [n for n in graph.nodes 
                if graph.nodes[n]['year'] == year]
    subgraph = graph.subgraph(subnodes)
    simplices += (compute_simplices(nx.Graph(graph), n_max=2), year)
simplices

Year = -2500
Year = -2000


[{1: [('Ion channel', 'Leucine'),
   ('Amino acid', 'Vitellogenin'),
   ('Oxygen', 'Hemoglobin'),
   ('Cystatin', 'Hemopexin'),
   ('G protein-coupled receptor', 'C-terminus'),
   ('Chemical element', 'Isoleucine'),
   ('Amide', 'Antibiotic'),
   ('Chemistry', 'Retinoblastoma protein'),
   ('Atomic mass unit', 'Reading frame'),
   ('Proton', 'Stoichiometry'),
   ('Electron', 'Peptide'),
   ('Alpha-fetoprotein', 'Glutamic acid'),
   ('Basic fibroblast growth factor', 'Cancer'),
   ('Electron transport chain', 'Endomembrane system'),
   ('Immune system', 'Dendritic cell'),
   ('Electrophile', 'Molecular engineering'),
   ('Protein dimer', 'Serine'),
   ('Non-competitive inhibition', 'Serpin'),
   ('Genetics', 'Mitochondrion'),
   ('Protein structure prediction', 'Metabolism'),
   ('Lipid', 'Morphogenesis'),
   ('Denaturation (biochemistry)', 'Polymerase chain reaction'),
   ('Glucose', 'Apoptosis'),
   ('Dna', 'Fatty acid'),
   ('Heterotroph', 'Photobiology'),
   ('Chaperone (protein)', 

In [84]:
# filtration
f = d.Filtration()
for vertices, time in simplices:
#     f.append(d.Simplex(vertices, time))
    print(vertices, time)
f.sort()
print(len(f[2]))
for s in f:
    print(s)
# print(f.index(d.Simplex([1,2])))

1 2


TypeError: cannot unpack non-iterable int object