# Importing

In [None]:
# Data wrangling
import numpy as np
import pandas as pd  # Not a requirement of giotto-tda, but is compatible with the gtda.mapper module
import os
import time
import itertools
import open3d as o3d

# Data viz
from gtda.plotting import plot_point_cloud
from gtda.plotting import plot_diagram

# TDA magic
from gtda.homology import VietorisRipsPersistence
from gtda.mapper import (
    CubicalCover,
    make_mapper_pipeline,
    Projection,
    plot_static_mapper_graph,
    plot_interactive_mapper_graph,
    MapperInteractivePlotter
)

# ML tools
from sklearn import datasets
from sklearn.cluster import DBSCAN
from sklearn.decomposition import PCA

from src.feature_vectors import create_feature_vector

# Prepare cloud points

In [None]:
def get_ply_files(folder):
    files = list(filter(lambda file: file.split('.')[-1]=='ply', os.listdir(folder)))
    files = list(map(lambda file: os.path.join(folder, file),files))
    return files

ply_files  = get_ply_files('data/tablesPly')
ply_files += get_ply_files('data/chairsPly')
ply_files += get_ply_files('data/octopusPly')
ply_files += get_ply_files('data/spidersPly')

# TODO: is to be changed for binar classification
labels, index = np.zeros(len(ply_files)), len(os.listdir('data/tablesPly')), 
index2 = index + len(os.listdir('data/chairsPly'))
labels[index:index2] = 1
index, index2 = index2, index2 + len(os.listdir('data/octopusPly'))
labels[index:index2] = 2
labels[index2:] = 3

pcd = [o3d.io.read_point_cloud(file) for file in ply_files]
pcd = [np.asarray(pc.points) for pc in pcd]

## Persistance and pipe

In [None]:
# Track connected components, loops, and voids
homology_dimensions = [0, 1, 2]

# Collapse edges to speed up H2 persistence calculation!
persistence = VietorisRipsPersistence(
    metric="euclidean",
    homology_dimensions=homology_dimensions,
    n_jobs=6,
    collapse_edges=True,
)

#filter_func = Projection(columns=[0,1,2])
filter_func = PCA(n_components=2)

cover = CubicalCover(n_intervals=4, overlap_frac=0.08)
#cover = OneDimensionalCover(kind='uniform', n_intervals=10, overlap_frac=0.1)

clusterer = DBSCAN(eps=10, metric="chebyshev")

n_jobs = 1

pipe = make_mapper_pipeline(
    filter_func=filter_func,
    cover=cover,
    clusterer=clusterer,
    verbose=False,
    n_jobs=n_jobs,
)

# Feature vector creation

In [None]:
entropy_feature_vectors = []
feature_vectors = []
start = time.time()
for i, pc in enumerate(pcd):
    print('\r', f"{int((i/len(pcd))*100)}%", end="")
    e_fv, fv = create_feature_vector(pc, pipe, persistence)

    entropy_feature_vectors.append(e_fv)
    feature_vectors.append(fv)
end = time.time()
print("Time to compute create feature vectors:", end - start, "s")

## With homologies

In [None]:
num_features = len(feature_vectors[0])
# We take one homology and up to three other features

for homology_idx in range(3):
    final_fvs = []
    
    # First add homology and nothing else
    for entropy_fv in entropy_feature_vectors:
        final_fvs.append(entropy_fv[homology_idx])

    # TODO add train and test

    for number_of_additional_features in range(1,4):
        combinations = list(itertools.combinations(range(num_features), number_of_additional_features))

        # Loop through all posible feature subsets of size
        for combination in combinations:
            print(combination)
            final_fvs = []
            # First add homology and a certain number of features
            for fv_idx, entropy_fv in enumerate(entropy_feature_vectors):
                extracted_fv = [x for x in entropy_fv[homology_idx]]

                extracted_fv += [feature_vectors[fv_idx][i] for i in combination]

                final_fvs.append(extracted_fv)

            # TODO add train and test

            print(final_fvs)


## Without homologies

In [None]:
for number_of_additional_features in range(1,4):
    combinations = list(itertools.combinations(range(num_features), number_of_additional_features))

    # Loop through all posible feature subsets of size
    for combination in combinations:
        print(combination)
        final_fvs = []
        # First add homology and a certain number of features
        for fv_idx, fv in enumerate(feature_vectors):
            extracted_fv = [fv[i] for i in combination]

            final_fvs.append(extracted_fv)

        # TODO add train and test

        print(final_fvs)
