# Too Many or Too Few
### Sampling Bounds for Topological Descriptors
This [IPython](http://ipython.org/) notebook is an electronic supplementry material of the article.


## Preprocessing
#### The following code will download the MPEG7 and EMNIST data and will create and store the data in the appropriate folders. 
You can find the MPEG7 data at: http://www.dabi.temple.edu/~shape/MPEG7
    
You can find the EMNIST data at: https://www.nist.gov/itl/products-and-services/emnist-dataset

In [None]:
from get_data import *

preprocess_data(dir_list)

#### The following code will create the graphs for RANDPTS, MPEG7, and EMNIST for .005 boundary.

In [None]:
from generate_graphs import *
eps = .005
graphs_dir = "graphs_005_approx" 
main()

#### The following code will create the graphs for RANDPTS, MPEG7, and EMNIST for .001 boundary.

In [None]:
from generate_graphs import *
eps = .001
graphs_dir = "graphs_001_approx" 
main()

## Running experiments
Once the graphs are created we can choose which experiment we would like to run. 
Note that you can change graphs_dir and out_graphs_dir for which boundary you would like.
You can also choose to run this experiment for all datasets or just a particular dataset by specifying data_type.

### Smallest Stratum Experiment

In [None]:
from main import *
# modify for that approximation type for emnist and mpeg7
# choices include graphs_001_approx and graphs_005_approx
graphs_dir = "graphs_005_approx"
# same as above but specifies where to write results
out_graphs_dir = "output_005_approx"

# main function for setting up and executing experiments
if __name__ == "__main__":
  start = time.time()
  # Set for random experiments only
  random.seed(423652346)
  np.random.seed(423652346)

  #### exp type is:
  #       1 for stratification experiment (distribution_exp)
  #       2 for random sample experiment (sample_exp)
  #       3 for smallest angle experiment (smallest_angle_exp)
  #       4 for a uniform random sample experiment (uniform_sample_exp)
  #       5 for all four exps
  exp_type = 3
  #### data is:
  #       1 for random
  #       2 for MPEG7 (classes from PHT paper - Turner et al.)
  #       3 for EMNIST
  #       4 for all three
  #       5 for test
  data_type = 2

  exp_list = get_exp_graphs(data_type)

  counter = 1
  for e in exp_list:
    print("Graph "+str(counter)+" of "+str(len(exp_list)))
    exp(e["G"], e["output_file"], exp_type)
    counter+=1

  print("Execution time: "+str(time.time() - start)+"(s)")

### Stratification Experiment

In [None]:
from main import *
# modify for that approximation type for emnist and mpeg7
# choices include graphs_001_approx and graphs_005_approx
graphs_dir = "graphs_005_approx"
# same as above but specifies where to write results
out_graphs_dir = "output_005_approx"

# main function for setting up and executing experiments
if __name__ == "__main__":
  start = time.time()
  # Set for random experiments only
  random.seed(423652346)
  np.random.seed(423652346)

  #### exp type is:
  #       1 for stratification experiment (distribution_exp)
  #       2 for random sample experiment (sample_exp)
  #       3 for smallest angle experiment (smallest_angle_exp)
  #       4 for a uniform random sample experiment (uniform_sample_exp)
  #       5 for all four exps
  exp_type = 1
  #### data is:
  #       1 for random
  #       2 for MPEG7 (classes from PHT paper - Turner et al.)
  #       3 for EMNIST
  #       4 for all three
  #       5 for test
  data_type = 2

  exp_list = get_exp_graphs(data_type)

  counter = 1
  for e in exp_list:
    print("Graph "+str(counter)+" of "+str(len(exp_list)))
    exp(e["G"], e["output_file"], exp_type)
    counter+=1

  print("Execution time: "+str(time.time() - start)+"(s)")

### Random Sample Experiment

In [None]:
from main import *
# modify for that approximation type for emnist and mpeg7
# choices include graphs_001_approx and graphs_005_approx
graphs_dir = "graphs_005_approx"
# same as above but specifies where to write results
out_graphs_dir = "output_005_approx"

# main function for setting up and executing experiments
if __name__ == "__main__":
  start = time.time()
  # Set for random experiments only
  random.seed(423652346)
  np.random.seed(423652346)

  #### exp type is:
  #       1 for stratification experiment (distribution_exp)
  #       2 for random sample experiment (sample_exp)
  #       3 for smallest angle experiment (smallest_angle_exp)
  #       4 for a uniform random sample experiment (uniform_sample_exp)
  #       5 for all four exps
  exp_type = 2
  #### data is:
  #       1 for random
  #       2 for MPEG7 (classes from PHT paper - Turner et al.)
  #       3 for EMNIST
  #       4 for all three
  #       5 for test
  data_type = 2

  exp_list = get_exp_graphs(data_type)

  counter = 1
  for e in exp_list:
    print("Graph "+str(counter)+" of "+str(len(exp_list)))
    exp(e["G"], e["output_file"], exp_type)
    counter+=1

  print("Execution time: "+str(time.time() - start)+"(s)")

### Uniform Random Sample Experiment

In [None]:
from main import *
# modify for that approximation type for emnist and mpeg7
# choices include graphs_001_approx and graphs_005_approx
graphs_dir = "graphs_005_approx"
# same as above but specifies where to write results
out_graphs_dir = "output_005_approx"

# main function for setting up and executing experiments
if __name__ == "__main__":
  start = time.time()
  # Set for random experiments only
  random.seed(423652346)
  np.random.seed(423652346)

  #### exp type is:
  #       1 for stratification experiment (distribution_exp)
  #       2 for random sample experiment (sample_exp)
  #       3 for smallest angle experiment (smallest_angle_exp)
  #       4 for a uniform random sample experiment (uniform_sample_exp)
  #       5 for all four exps
  exp_type = 4
  #### data is:
  #       1 for random
  #       2 for MPEG7 (classes from PHT paper - Turner et al.)
  #       3 for EMNIST
  #       4 for all three
  #       5 for test
  data_type = 2

  exp_list = get_exp_graphs(data_type)

  counter = 1
  for e in exp_list:
    print("Graph "+str(counter)+" of "+str(len(exp_list)))
    exp(e["G"], e["output_file"], exp_type)
    counter+=1

  print("Execution time: "+str(time.time() - start)+"(s)")

## Figures