In [18]:
import numpy as np
from gtda.diagrams import PersistenceEntropy, ComplexPolynomial
from sklearn.model_selection import ShuffleSplit
from tqdm import tqdm
from sklearn.linear_model import LinearRegression

from notebooks.utils import get_files
from notebooks.utils.experiments import get_labeled_data, run_experiment

In [27]:
def nonstat_features_for_dim(pd, dim):
	b, d, q = pd[pd[:,2] == dim].T
	if dim == 0:
		assert d[-1] == np.inf
		d[-1] = 1
	return [
		PersistenceEntropy().fit_transform([np.c_[b, d, q]])[0],
		ComplexPolynomial().fit_transform([np.c_[b, d, q]])[0],
		np.pad(np.sort(d - b)[:-11:-1], (0, max(0, 10 - len(b))))
	]

nonstat_features = {
	'entropy': 1,
	'poly': 20,
	'pooling': 10
}

def nonstat_features_all_dims(pd):
	arrs = [np.concatenate(nonstat_features_for_dim(pd, dim)) for dim in (0, 1)]
	return np.vstack(arrs)

In [28]:
files = next(get_files(1, sampler='Random'))
X, y = get_labeled_data(1, files, nonstat_features_all_dims)

In [37]:
def get_results(task, files):
	X, y = get_labeled_data(task, files, nonstat_features_all_dims)
	masks = [slice(1), slice(1,21), slice(21, None), slice(0, None)]
	return run_experiment(X, y, masks, 300, LinearRegression)

samplers, files_task = zip(*get_files(1))
results = np.array(
	[get_results(1, files_sampler) for files_sampler in files_task])

100%|██████████| 4/4 [00:02<00:00,  1.60it/s]
100%|██████████| 4/4 [00:01<00:00,  2.03it/s]
100%|██████████| 4/4 [00:02<00:00,  1.42it/s]
100%|██████████| 4/4 [00:01<00:00,  2.75it/s]
100%|██████████| 4/4 [00:02<00:00,  1.59it/s]
100%|██████████| 4/4 [00:01<00:00,  2.87it/s]
100%|██████████| 4/4 [00:01<00:00,  3.07it/s]
100%|██████████| 4/4 [00:01<00:00,  2.14it/s]
