In [None]:
import gudhi                 as gd
import pandas                as pd
import matplotlib.pyplot     as plt
import numpy                 as np
import gudhi.representations as gdrep
from sklearn.preprocessing   import MinMaxScaler
from sklearn.pipeline        import Pipeline
from sklearn.svm             import SVC
from sklearn.neighbors       import KNeighborsClassifier
from sklearn.ensemble        import RandomForestClassifier

plt.rcParams['font.size'] = 16
plt.rcParams['font.family'] = 'serif'

strong_password_data_frame = pd.read_csv('data/DSL-StrongPasswordData.csv',
                                   # declare type of 'subject' column
                                   dtype = {'subject' : 'string'},
                                   index_col = ['subject', 'sessionIndex', 'rep'])

In [None]:
def subjects_in_range(start, stop):
    """Returns a list of labels for subjects in the subject column.

    :param start: integer between 2 and 57, inclusive
    :param stop: integer between 2 and 57, inclusive. Should be greater than or
                 equal to start.
    :returns: list of zero-padded subject labels beginning with s{start} to s{stop}
    """
    return [f's{i:03}' for i in range(start, 1 + stop) if i not in [6, 9, 14, 23, 45]]

people = [strong_password_data_frame.loc[subject] for subject in subjects_in_range(2,57)]

In [None]:
people[0].iloc[200:400]

In [None]:
train_diagrams_for_person = []

In [None]:
labels = []
label_idx = 0

for person, name in zip(people, subjects_in_range(2,57)):
    diagrams = []
    for idx in range(1,5):
        labels.append(label_idx)
        points = person.loc[idx] # get ith session
        simplicial_complex = gd.RipsComplex(points = points.to_numpy(),
                                           max_edge_length=2.0)
        simplex_tree = simplicial_complex.create_simplex_tree(max_dimension = 3)
        diagram = simplex_tree.persistence()
        gd.plot_persistence_diagram(diagram)
        plt.show()
        diagrams.append(simplex_tree.persistence_intervals_in_dimension(1))
        
    train_diagrams_for_person.append(diagrams)
    label_idx = label_idx + 1
    print(f'Training diagrams for {name} complete.')

In [None]:
test_diagrams_for_person = []

In [None]:
for person, name in zip(people, subjects_in_range(2,57)):
    diagrams = []
    for idx in range(5,9):
        points = person.loc[idx] # get ith session
        simplicial_complex = gd.RipsComplex(points = points.to_numpy(),
                                            max_edge_length = 2.0)
        simplex_tree = simplicial_complex.create_simplex_tree(max_dimension = 3)
        diagram = simplex_tree.persistence()
        diagrams.append(simplex_tree.persistence_intervals_in_dimension(1))
    
    test_diagrams_for_person.append(diagrams)
    print(f'Test diagrams for {name} complete.')

In [None]:
model = Pipeline([
    ("Separator", gd.representations.DiagramSelector(use=True, limit=np.inf, point_type="finite")),
    ("TDA",       gd.representations.BottleneckDistance(0.001)),
    ("Estimator", KNeighborsClassifier(n_neighbors=4, metric='precomputed'))
])

In [None]:
training_data = np.array(train_diagrams_for_person).flatten()

In [None]:
model = model.fit(training_data, labels)

In [None]:
print(model.score(test_diagrams_for_person[0], [0,0,0,0]))

In [None]:
gd.bottleneck_distance(train_diagrams_for_person[0][1], train_diagrams_for_person[0][3])

In [None]:
list(range(2,4))