In [18]:
from gtda import diagrams, plotting
import h5py 
import numpy as np

In [2]:
diags = []
suffixes = ["-4", "-8", "-14"]

for suf in suffixes:
    with h5py.File(f"../files/diags-all{suf}.h5") as f:
        diags.append(np.array(f["diagrams"]))

In [4]:
BC = diagrams.BettiCurve()
PL = diagrams.PersistenceLandscape()

In [5]:
betti = [BC.fit_transform(d) for d in diags]
perld = [PL.fit_transform(d) for d in diags]

In [6]:
for curves in betti:
    curves[:,0,:] += 1

In [9]:
with h5py.File(f"../files/diags-all-4.h5") as f:
    classes = np.array(f["classes"])

array([b'Sound_Drum', b'Sound_Guitar', b'Sound_Guitar', ...,
       b'Sound_Violin', b'Sound_Piano', b'Sound_Drum'], dtype='|S12')

In [15]:
unique_classes = np.unique(classes)
average_betti = []
average_lands = []

for cl in unique_classes:
    indices = classes[:-80] == cl
    average_betti.append([np.mean(cv[:-80][indices], axis=0) for cv in betti])
    average_lands.append([np.mean(cv[:-80][indices], axis=0) for cv in perld])

In [16]:
avr_betti_per_diags = [np.stack(arrays) for arrays in zip(*average_betti)]
avr_lands_per_diags = [np.stack(arrays) for arrays in zip(*average_lands)]

In [17]:
with h5py.File('../files/curves.h5', mode='w') as f:
    for i, suf in enumerate(suffixes):
        f.create_dataset(f"betti{suf}", data=betti[i])
        f.create_dataset(f"landscape{suf}", data=perld[i])
        f.create_dataset(f"avr-betti{suf}", data=avr_betti_per_diags[i])
        f.create_dataset(f"avr-landscape{suf}", data=avr_lands_per_diags[i])
    f.create_dataset("class order", data=unique_classes)

In [52]:
PL.plot(avr_lands_per_diags[0], sample=3)

In [40]:
def lp_distance(curve1, curve2, p=2):
    return np.power(np.sum(np.power(curve1 - curve2, p), axis=1), 1/p)

In [59]:
metrics_betti = []
for avr_cv, curves in zip(avr_betti_per_diags, betti):
    metrics = np.ndarray((curves.shape[0], 4, 2))
    for i, cv in enumerate(curves):
        for j, avc in enumerate(avr_cv):
            metrics[i][j] = lp_distance(cv, avc)
    metrics_betti.append(np.reshape(metrics, (curves.shape[0], 8)))

metrics_lands = []
for avr_cv, curves in zip(avr_lands_per_diags, perld):
    metrics = np.ndarray((curves.shape[0], 4, 2))
    for i, cv in enumerate(curves):
        for j, avc in enumerate(avr_cv):
            metrics[i][j] = lp_distance(cv, avc)
    metrics_lands.append(np.reshape(metrics, (curves.shape[0], 8)))

In [61]:
with h5py.File('../files/avr-metrics.h5', mode='w') as f:
    for i, suf in enumerate(suffixes):
        f.create_dataset(f"betti{suf}", data=metrics_betti[i])
        f.create_dataset(f"landscape{suf}", data=metrics_lands[i])