# TEST : readFiles

Here we execute similar cells as in 1_readFiles, but test out changes when executing cells twice etc.

Comparison is mostly done using 
> assert arr0 == arr0_2test
If more calculations are involved and hence small calculation differences, we use the maximal difference.

The naming of the second computed values is always appended with 
> _2test

After each comparison the _2test is deleted from the namespace.

At the end the namespace is deleted via
> %reset -f
to clear up space.

In [101]:
%load_ext autoreload
%autoreload 2

# change current working directory to parent folder, to execute script as if we execute ther
%pwd
%cd ..

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
/Users/janfelixsenge


In [102]:
import numpy as np
import pandas as pd
import time

import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.patches as patches  # for plotSubregions
import seaborn as sns

from pathlib import Path

# import path variables
from src.config import data_path_raw_simulation, data_path_interim
# import the read and check method files
from src.preprocess.read_simulation_files import (
    read_heightmaps_sequences, check_if_regulargrid,
    interpolate_grid, interpolate_regular_grid)

# Read and inteprolate

Read numerical Simulation csv-files and interpolate them on a regular grid for different approaches.

## Read

Read the csv files in the dir_read_path and create lists of z-values, 
x-values and y-values as well as a dataframe grabbing the coverage 
class, sequence number as well as number of impacts from the filenames.

Since the coverage classes are not representative of the actual coverage values they will be replaced later on.

In [103]:
# read all csv files for the different files for the numerical simulation
z, x, y, df_info = read_heightmaps_sequences(data_path_raw_simulation,
                                             z_name='U3',
                                             x_name='x',
                                             y_name='y')

z_2test, x_2test, y_2test, df_info_2test = read_heightmaps_sequences(data_path_raw_simulation,
                                             z_name='U3',
                                             x_name='x',
                                             y_name='y')

assert (z==z_2test).all()
assert (x==x_2test).all()
assert (y==y_2test).all()
assert (df_info == df_info_2test).all().all()

# change the coverage values contained in the filenames of the
# heightmap to have it for later
df_info['coverage_old'] = df_info['coverage'].values


# Interpolate to get regular grid

Compare the regular grid interpolations:

In [105]:
# use a interpolation size so that we are close to the grid values of the 
# original 74x74 grids. Using np.linspace in interpolate_regular_grid, 
# the intrpolation_size giving the same grid values for an evenly spaced
# regular grid, is:
interpolation_size = 1+73*14
znew, xnew, ynew = interpolate_regular_grid(z, x, y, 
                                            interpolation_size, 
                                            bigger_grid=False)

znew_2test, xnew_2test, ynew_2test = interpolate_regular_grid(z, x, y, 
                                            interpolation_size, 
                                            bigger_grid=False)

assert (znew==znew_2test).all()
assert (xnew==xnew_2test).all()
assert (ynew==ynew_2test).all()

# Estimate the coverage values

In [107]:
# load data, get the according values and delete the loaded data again
data = np.load(data_path_interim / 'surface_numSimulation.npz')
znew = data['values']
xnew = data['x_grid']
ynew = data['y_grid']

df_info = pd.read_csv(data_path_interim / 'surface_numSimulation_information.csv')

In [108]:
from src.preprocess.approximate_coverage import get_local_minima

# size of the indent circles and the larger circle
radius_indent = 0.055/2
radius_circ = 0.2/2

# important to guaranteu that certain points can be a little bit closer than the complete circle
eps = 0.005

minima_arr = get_local_minima(z=znew,
                              grid=xnew,
                              radius_indent=radius_indent,
                              eps=eps,
                              df_info=df_info)

minima_arr_2test = get_local_minima(z=znew,
                                    grid=xnew,
                                    radius_indent=radius_indent,
                                    eps=eps,
                                    df_info=df_info)


Get rid of the np.nan values inside the arrays by unstacking them and sorting them (to account for different order in the method). 

unstack --> list of numpy arrays of different size (ni,2).

Then compare them by comparing each of the list elements.

In [109]:
unstack = [minima_arr[i][~np.isnan(minima_arr[i][:, 0]), :].astype('int')
           for i in range(minima_arr.shape[0])]
unstack = [unstack[i][np.argsort(unstack[i][:, 0])]
           for i in range(len(unstack))]

unstack_2test = [minima_arr_2test[i][~np.isnan(minima_arr_2test[i][:, 0]), :].astype('int')
                 for i in range(minima_arr_2test.shape[0])]
unstack_2test = [unstack_2test[i][np.argsort(unstack_2test[i][:, 0])]
                 for i in range(len(unstack_2test))]


# check if they are the same
assert len(unstack) == len(unstack_2test)
tmp_2test = np.array([(unstack[i]==unstack_2test[i]).all()
                      for i in range(len(unstack))])
assert tmp_2test.all()

now check the masks and the coverage values:

In [111]:
from src.preprocess.approximate_coverage import approximate_coverage

coverage_list, mask_list =\
    approximate_coverage(minima_arr=minima_arr,
                         xgrid=xnew,
                         ygrid=ynew,
                         radius_circ=radius_circ, 
                         radius_indent=radius_indent,
                         df_info=df_info)
    
coverage_list_2test, mask_list_2test =\
    approximate_coverage(minima_arr=minima_arr,
                         xgrid=xnew,
                         ygrid=ynew,
                         radius_circ=radius_circ, 
                         radius_indent=radius_indent,
                         df_info=df_info)
    
# check: convert to numpy arrays and check all entries
assert (np.array(coverage_list) == np.array(coverage_list_2test)).all()
assert (np.array(mask_list) == np.array(mask_list_2test)).all()

## Reset complete namespace

So that the jupyter notebook doesn't clutter anything, we delete it's namespace in the end.

In [112]:
# %reset -f