## Import required Python Libraries

In [None]:
import sys
print("Current version of your system:")
print(sys.version)

In [None]:
import numpy as np
import tensorflow as tf
from sklearn.ensemble import *
from sklearn.svm import *
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV, KFold, ShuffleSplit
from tensorflow import random_uniform_initializer as rui

In [None]:
sys.path.append('./perslay/perslay')
from perslay import PerslayModel

In [None]:
sys.path.append('./perslay/tutorial')
from experiments import *

## Generate predefined persistence diagram

In [None]:
dataset="MUTAG"

In [None]:
generate_diagrams_and_features(dataset, path_dataset='./perslay/tutorial/data/MUTAG/')

In [None]:
diags_dict, F, L = load_data(dataset, path_dataset = './perslay/tutorial/data/MUTAG/')

In [None]:
F = np.array(F, dtype=np.float64)

In [None]:
visualize_diagrams(diags_dict)

## Preprocess persistence diagram

In [None]:
import gudhi.representations as tda

thresh = 500

tmp = Pipeline([("Selector", tda.DiagramSelector(use=True, point_type="finite")),
               ("ProminentPts", tda.ProminentPoints(use=True)),
               ("Scaler", tda.DiagramScaler(use=True, scalers=[([0,1], MinMaxScaler())])),
               ("Padding", tda.Padding(use=True))])

prm = {filt: {"ProminentPts__num_pts": min(thresh, max([len(dgm) for dgm in diags_dict[filt]]))}
      for filt in diags_dict.keys() if max([len(dgm) for dgm in diags_dict[filt]]) > 0 }

diags = []
for dt in prm.keys():
    param = prm[dt]
    tmp.set_params(**param)
    diags.append(tmp.fit_transform(diags_dict[dt]))
    
D,npts = [], len(diags[0])

for dt in range(len(prm.keys())):
    D.append(np.array(np.concatenate([diags[dt][i][np.newaxis,:] for i in range(npts)], axis=0), dtype=np.float32))

## Using PersLay in a neural network

In [None]:
model, optimizer, loss, metrics = get_model(dataset, F.shape[1])

In [None]:
perslay_parameters = []

In [None]:
perslay_channel = {}

In [None]:
perslay_channel["pweight_train"] = True
perslay_channel["layer_train"]   = True
perslay_channel["final_model"]   = "identity"

In [None]:
perslay_channel["layer"]           = "Image"
perslay_channel["image_size"]      = (20, 20)
perslay_channel["image_bnds"]      = ((-.001, 1.001), (-.001, 1.001))
perslay_channel["lvariance_init"]  = 3.

In [None]:
perslay_channel["layer"]           = "PermutationEquivariant"
perslay_channel["lpeq"]            = [(5, "max")]
perslay_channel["lweight_init"]    = rui(0.0, 1.0)
perslay_channel["lbias_init"]      = rui(0.0, 1.0)
perslay_channel["lgamma_init"]     = rui(0.0, 1.0)

In [None]:
perslay_channel["layer"]           = "Exponential"
perslay_channel["lnum"]            = 25
perslay_channel["lmean_init"]      = rui(0.0, 1.0)
perslay_channel["lvariance_init"]  = rui(3.0, 3.0)

In [None]:
perslay_channel["layer"]           = "Rational"
perslay_channel["lnum"]            = 25
perslay_channel["lmean_init"]      = rui(0.0, 1.0)
perslay_channel["lvariance_init"]  = rui(3.0, 3.0) 
perslay_channel["lalpha_init"]     = rui(3.0, 3.0) 

In [None]:
perslay_channel["layer"]           = "RationalHat"
perslay_channel["lnum"]            = 25
perslay_channel["lmean_init"]      = rui(0.0, 1.0)
perslay_channel["lr_init"]         = rui(3.0, 3.0) 
perslay_channel["q"]               = 2

In [None]:
perslay_channel["layer"]           = "Landscape"
perslay_channel["lsample_num"]     = 100
perslay_channel["lsample_init"]    = rui(0.0, 1.0) 

In [None]:
perslay_channel["layer"]           = "BettiCurve"
perslay_channel["theta"]           = 10
perslay_channel["lsample_num"]     = 100
perslay_channel["lsample_init"]    = rui(0.0, 1.0) 

In [None]:
perslay_channel["layer"]           = "Entropy"
perslay_channel["theta"]           = 10
perslay_channel["lsample_num"]     = 100
perslay_channel["lsample_init"]    = rui(0.0, 1.0) 

In [None]:
perslay_channel["pweight"]       = "power"
perslay_channel["pweight_init"]  = 1.
perslay_channel["pweight_power"] = 1

In [None]:
perslay_channel["pweight"]       = "grid"
perslay_channel["pweight_size"]  = [20,20]
perslay_channel["pweight_bnds"]  = ((-.001, 1.001), (-.001, 1.001))
perslay_channel["pweight_init"]  = rui(1.0, 1.0)

In [None]:
perslay_channel["pweight"]       = "gmix"
perslay_channel["pweight_num"]   = 3
perslay_channel["pweight_init"]  = np.array(np.vstack([np.random.uniform(0.,1.,[2,3]), 5.*np.ones([2,3])]), dtype=np.float32)

In [None]:
perslay_channel["pweight"]       = None

In [None]:
perslay_channel["perm_op"] = "sum"

In [None]:
perslay_channel["perm_op"] = "topk"
perslay_channel["keep"]    = 5

In [None]:
perslay_channel["perm_op"] = "max"

In [None]:
perslay_channel["perm_op"] = "mean"

In [None]:
perslay_parameters = [perslay_channel for _ in range(len(D))]

In [None]:
mirrored_strategy = tf.distribute.MirroredStrategy()
with mirrored_strategy.scope():
    
    # Final rho network is a simple dense layer to the number of labels 
    rho = tf.keras.Sequential([tf.keras.layers.Dense(L.shape[1], activation="sigmoid", input_shape=(16039,))])
    model = PerslayModel(name="PersLay", diagdim=2, perslay_parameters=perslay_parameters, rho=rho)

    # Optimizer is Adam with exponential decay of learning rate and moving average of variables
    lr = tf.keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=.01, decay_steps=20, decay_rate=0.5)
    optimizer = tf.keras.optimizers.Adam(learning_rate=lr, epsilon=1e-4)
    optimizer = tfa.optimizers.MovingAverage(optimizer, average_decay=0.9) 

    # Loss is cross-entropy
    loss = tf.keras.losses.CategoricalCrossentropy()

    # Metric is accuracy
    metrics = [tf.keras.metrics.CategoricalAccuracy()]

### Train

In [None]:
num_pts = len(D[0])

In [None]:
test_size = .3
epochs    = 100

In [None]:
random_perm = np.random.permutation(num_pts)
train, test = random_perm[:int((1-test_size)*num_pts)], random_perm[int((1-test_size)*num_pts):]

In [None]:
_, tr, te = evaluate_model(L,F,D,train,test,model,optimizer,loss,metrics,num_epochs=epochs,verbose=0,plots=True)

In [None]:
print("Train accuracy = " + str(tr[1]) + ", test accuracy = " + str(te[1]))

In [None]:
vectors = model.compute_representations(D).numpy()
train_vect, test_vect = np.hstack([vectors[train], F[train]]), np.hstack([vectors[test], F[test]])
train_labs, test_labs = np.argmax(L[train], axis=1), np.argmax(L[test], axis=1)

In [None]:
clf      = Pipeline([("Estimator", SVC())])
clf_prms = [{"Estimator":         [RandomForestClassifier()]},
            {"Estimator":         [SVC()],
             "Estimator__kernel": ["linear", "rbf"], 
             "Estimator__C":      [0.1, 1, 10]},
            {"Estimator":         [AdaBoostClassifier()]}]
classifier = GridSearchCV(clf, clf_prms, cv=5)

In [None]:
classifier.fit(train_vect, train_labs)
tr = classifier.score(train_vect, train_labs)
te = classifier.score(test_vect,  test_labs)

In [None]:
print("Train accuracy = " + str(tr) + ", test accuracy = " + str(te))