# Test the distance preservation properties of FJLT

In [None]:
from tqdm import tqdm
from random import sample
import sys
sys.path.append('../../../utils')
from TurboFJLT import *

In [None]:
data_path="../../../data/fine_airfoil_cascade.h5"

In [None]:
class FJLT:
    def __init__(self, vec_dim, snapshots_dim, max_distortion):
        self.vec_dim = vec_dim
        self.snapshots_dim = snapshots_dim
        self.max_distortion = max_distortion
        self.embedding_dim = 8*int(np.log(self.snapshots_dim)/self.max_distortion/self.max_distortion)
        self.sparsity = np.log(self.snapshots_dim)*np.log(self.snapshots_dim)/self.vec_dim
        self.__gen_matrices()

    def __str__(self):
        return (
            "State vector dimensions:        {vec_dim}\n"
            "Number of embedded snapshots:   {snapshots_dim}\n"
            "Maximum distortion:             {max_distortion}\n"
            "Embedding dimension:            {embedding_dim}\n"
            "Sparsity:                       {sparsity:.5e}\n"
            "Generating matrices took:       {perf_counter_matrices:.5f} seconds"
        ).format(**self.__dict__)

    def __gen_matrices(self):
        t0    = time.time()
        self.P,self.s,self.D = fjlt_Matrices(self.vec_dim,
                                             self.snapshots_dim,
                                             self.embedding_dim,
                                             self.sparsity)
        t1    = time.time()
        self.perf_counter_matrices = t1-t0

In [None]:
# Class to read in data for turbomachinery problem
class TurboHDF5Reader:
    def __init__(self, file):
        self.file = file
        self.__load_parameters()
        return None

    def __str__(self):
        return ("Extracting cascade data from: {file}\n"
                "TEMPORAL\n"
                "Number of snapshots:          {num_snapshots}\n"
                "Timestep:                     {timestep:.5e}\n"
                "SPATIAL\n"
                "Number of passages:           {num_passages}\n"
                "Number of regions:            {num_regions}\n"
                "Region shapes:                {regions_shape}\n"
                "State vector dimension:       {state_dim}"
               ).format(**self.__dict__)

    def __load_parameters(self):
        # The number of degrees of freedom is 4 in this case
        dofs = 4
        with h5.File(self.file, 'r') as f:
            self.keys = list(f.keys())

            # Temporal parameters
            self.num_snapshots = len(self.keys)-2
            self.timestep = f[self.keys[1]].attrs['t'][0]-f[self.keys[0]].attrs['t'][0]

            # Spatial parameters
            self.num_regions = len(f["/{}/field".format(self.keys[0])])
            self.num_passages = self.num_regions-2
            self.regions_shape = f["/{}/field".format(self.keys[0])].attrs["param"]
            self.state_dim = ((
                self.regions_shape[0] +
                self.regions_shape[2])*(self.regions_shape[3]-1)*self.num_passages +
                self.regions_shape[1]*self.regions_shape[3]
                )*self.num_passages*dofs

        return None

    def __load_snapshot(self, h5_file, snapshot_id):
        qs = []
        for region in range(self.num_regions):
            qs.append(h5_file["/{}/field/{}".format(self.keys[snapshot_id], region)][()].flatten())
        Q = np.hstack(qs)
        return Q

    def __load_snapshot_chunk(self, snap_chunk_list):
        with h5.File(self.file, 'r') as f:
            snapshots = [self.__load_snapshot(f, snap_ind) for snap_ind in snap_chunk_list]
        return snapshots

    def __setup_chunking(self, snapshot_list, chunk_dim):
        num_full_chunks = len(snapshot_list)//chunk_dim
        self.snapshot_chunks_inds = [snapshot_list[i*chunk_dim:(i+1)*chunk_dim] for i in range(num_full_chunks)]
        if len(snapshot_list)%chunk_dim != 0:
            self.snapshot_chunks_inds.append(snapshot_list[num_full_chunks*chunk_dim:])
        # Extra params for data
        self.chunk_dim = chunk_dim
        self.num_chunks = len(self.snapshot_chunks_inds)
        return None

    def reset_chunked_loading(self, snapshot_list, chunks_dim):
        self.q_chunk = None
        assert np.all(np.array(snapshot_list)>=0) and np.all(np.array(snapshot_list)<self.num_snapshots), "Index out of range in snapshot list"
        chunks_dim = len(snapshot_list) if chunks_dim > len(snapshot_list) else chunks_dim
        self.__setup_chunking(snapshot_list, chunks_dim)
        self.__current_index = -1
        return None

    def load_next(self):
        self.__current_index += 1

        in_chunk_index = self.__current_index%self.chunk_dim
        chunk_index = self.__current_index//self.chunk_dim

        # Load only when we move to a new chunk
        if in_chunk_index == 0:
            self.q_chunk = self.__load_snapshot_chunk(self.snapshot_chunks_inds[chunk_index])
        return self.q_chunk[in_chunk_index]

In [None]:
reader = TurboHDF5Reader(data_path)
print(reader)

## Calculate the FJLT for pair of vectors

In [None]:
def fjlt_distortion_statistics(num_snapshot_linkage, num_samples, memory_chunk_size=50):
    fjlt = FJLT(reader.state_dim, num_snapshot_linkage, 0.01)
    print(fjlt)

    # Generate non-repeating pairs of indices
    random_pairs = num_samples
    chunks_dim = memory_chunk_size

    tuple_set = set()
    for _ in range(random_pairs):
        i = np.random.randint(0,reader.num_snapshots-1)
        j = np.random.randint(i+1,reader.num_snapshots)
        tup = (i, j)
        tuple_set.add(tup)

    snapshot_list = []
    for tup in tuple_set:
        snapshot_list.append(tup[0])
        snapshot_list.append(tup[1])

    reader.reset_chunked_loading(snapshot_list, chunks_dim)

    # Compute pairwise distortion
    q_pair = [None, None]
    b_pair = [None, None]
    distortion = []
    dist_xy = []
    smallest_vec_len = []
    for i, _ in enumerate(tqdm(snapshot_list)):
        q_pair[i%2] = reader.load_next()
        b_pair[i%2] = applyFJLT(q_pair[i%2],fjlt.P,fjlt.s,fjlt.D)
        if i%2==1:
            xy  = np.linalg.norm(q_pair[0]-q_pair[1])
            XY  = np.linalg.norm(b_pair[0]-b_pair[1])
            dist_xy.append(xy)
            distortion.append(abs(xy-XY)/xy*100)
            vec_0_len = np.linalg.norm(q_pair[0])
            vec_1_len = np.linalg.norm(q_pair[1])
            smallest_vec_len.append(min([vec_0_len, vec_1_len]))

    # Free a few 100 MBs of memory
    reader.reset_chunked_loading(snapshot_list, chunks_dim)

    return distortion, dist_xy, smallest_vec_len

In [None]:
num_linking_snapshots = [2, 4, 6]
for n_sp in num_linking_snapshots:
    distortion, dist_xy, smallest_vec_len = fjlt_distortion_statistics(n_sp, num_samples=60000)
    with h5.File("../data/distortion_metrics_{}_linking_snapshots.h5".format(n_sp), 'w') as f:
        f.create_dataset("/distortion", data=distortion)
        f.create_dataset("/dist_xy", data=dist_xy)
        f.create_dataset("/vec_len", data=smallest_vec_len)