In [19]:
import numpy as np
import h5py

from gtda.homology import VietorisRipsPersistence
from gtda.diagrams import PairwiseDistance
from gtda.time_series import SingleTakensEmbedding

from itertools import chain
from random import choices

In [2]:
embedder = SingleTakensEmbedding(
    parameters_type='fixed', 
    time_delay=8,
    dimension=3)
ripser = VietorisRipsPersistence(homology_dimensions=[0, 1])

In [11]:
with h5py.File('../files/train.h5') as f:
    chunks_train = np.array(f["chunks"])
    names_train  = np.array(f["filenames"])
    classes_train = np.array(f["classes"])

with h5py.File('../files/test.h5') as f:
    chunks_test = np.array(f["chunks"])
    names_test  = np.array(f["filenames"])
    classes_test = np.array(f["classes"])

In [8]:
tss = [embedder.fit_transform(ch[0, :500]) for ch in chain(chunks_train, chunks_test)]
name = list(chain(names_train, names_test))

In [9]:
diags = ripser.fit_transform(tss)

In [12]:
with h5py.File('../files/diags-all-8.h5', mode='w') as f:
    d = f.create_dataset("diagrams", data=diags)
    d.attrs['takens-time-delay'] = 8
    d.attrs['chunk-size'] = 500
    f.create_dataset("filenames", data=name)
    f.create_dataset("classes", data=np.append(classes_train, classes_test))

In [15]:
classes = np.append(classes_train, classes_test)

In [22]:
unique_classes = np.unique(classes)
chosen = []
k = 5
for c in unique_classes:
    ind = choices([i for i, c2 in enumerate(classes_train) if c == c2], k = k)
    chosen.extend(ind)
chosen = np.array(chosen)

In [48]:
chose_to_train = []
k = 100 

for c in unique_classes:
    ind = choices([i for i, c2 in enumerate(classes_train) 
                   if c == c2 and i not in chosen], k = k)
    chose_to_train.extend(ind)
chose_to_train = np.array(chose_to_train)

In [45]:
dw  = PairwiseDistance(metric='wasserstein', order=None)
dbn = PairwiseDistance(metric='bottleneck', order=None)
dbc = PairwiseDistance(metric='betti', order=None)
dls = PairwiseDistance(metric='landscape', order=None)

In [52]:
chosen_diags = diags[chosen]
test_diags = diags[-80:]
train_diags = diags[chose_to_train]

In [67]:
full_train_diags = np.delete(diags[:-80], chosen, axis=0)

In [68]:
full_train_indexes = np.array([i for i in range(diags.shape[0] - 80) if i not in chosen])
full_train_indexes

array([   0,    1,    2, ..., 2625, 2626, 2627])

In [69]:
with h5py.File('../files/metrics-train-all.h5', mode='w') as f:
    f.create_dataset("reference-index", data=chosen)
    f.create_dataset("train-index", data=full_train_indexes)
    for d in [dw, dbn, dbc, dls]:
        d.fit(chosen_diags)
        matrix = d.transform(full_train_diags)
        f.create_dataset(d.get_params()['metric'], data=matrix)

In [53]:
with h5py.File('../files/metrics-train.h5', mode='w') as f:
    f.create_dataset("reference-index", data=chosen)
    f.create_dataset("train-index", data=chose_to_train)
    for d in [dw, dbn, dbc, dls]:
        d.fit(chosen_diags)
        matrix = d.transform(train_diags)
        f.create_dataset(d.get_params()['metric'], data=matrix)

In [46]:
with h5py.File('../files/metrics-test.h5', mode='w') as f:
    f.create_dataset("reference-index", data=chosen)
    f.create_dataset("test-index", data=np.array(range(diags.shape[0]-80, diags.shape[0])))
    for d in [dw, dbn, dbc, dls]:
        d.fit(chosen_diags)
        matrix = d.transform(test_diags)
        f.create_dataset(d.get_params()['metric'], data=matrix)

In [54]:
with h5py.File('../files/metrics-train.h5') as f:
    for k in f.keys():
        print(k, f[k].shape)

betti (400, 20, 2)
bottleneck (400, 20, 2)
landscape (400, 20, 2)
reference-index (20,)
train-index (400,)
wasserstein (400, 20, 2)
