In [1]:
import numpy as np
import os

In [2]:
train_test_folder = os.path.abspath('./cosz')
index_files = [os.path.join(train_test_folder, "Xy_indx{}_sel5_doms.npz".format(i+1)) for i in range(100)]

dir_xy = os.path.join("/", "data", "km3net", "Xy_multi_data_files")
xy_filelist = [(os.path.join(dir_xy, "Xy_numu_{}_multi_data.npz".format(i+1)), 
                os.path.join(dir_xy, "Xy_nue_{}_multi_data.npz".format(i+1))) for i in range(100)]
metadata_keylist = ["E", "dirx", "diry", "dirz", "posx","posy","posz", "dist"]

In [3]:
os.path.exists(index_files[0]), os.path.exists(xy_filelist[0][0])

(True, True)

In [4]:
from generators import get_n_iterations, metadata_generator

In [5]:
from export_train_test import INDEX_TEST_KEY, INDEX_TRAINING_KEY, INDEX_VALIDATION_KEY

In [12]:
from tqdm import tqdm
import pandas as pd

In [24]:
metadata = None
it_steps, n_events = get_n_iterations(index_files, batch_size=64, target_key=INDEX_VALIDATION_KEY)
print(it_steps, n_events)

metadata_gen  = metadata_generator(index_files, xy_filelist, metadata_keylist)
for i in tqdm(range(it_steps)):
    metadata_batch = next(metadata_gen)
    if metadata is None:
        metadata = metadata_batch
    else:
        metadata = pd.concat((metadata, metadata_batch))

  9%|▊         | 56/648 [00:00<00:01, 559.82it/s]

(648, 41451)


100%|██████████| 648/648 [00:01<00:00, 537.16it/s]


In [25]:
metadata.shape

(41472, 10)

In [36]:
def stratify_on_energies(E):
    logE = np.log10(E)
    minE, maxE = np.min(logE), np.max(logE)
    BIN_EDGES = sorted([minE, 1.0, 2.8, 3.5, 4.0, 7.0, maxE+0.1])
    hist, _ = np.histogram(logE, bins=np.asarray(BIN_EDGES))
    no_split_indx = (np.where(hist < 2)[0]) + 1  #
    BIN_EDGES = sorted([edge for idx, edge in enumerate(BIN_EDGES) if idx not in no_split_indx])
    assert np.all(np.histogram(logE, bins=np.asarray(BIN_EDGES))[0] > 1)
    return np.digitize(logE, np.asarray(BIN_EDGES))

In [27]:
test_strat = stratify_on_energies(metadata['E'].as_matrix())

In [28]:
np.unique(test_strat)

array([1, 2, 3, 4, 5, 6])

In [29]:
from export_train_test import export_train_validation_test

In [38]:
def process_cosz(dirz):
    print('DIRZ: ', dirz)
    print(dirz.shape)
    y = np.copy(dirz)
    y[y > 0] = 1
    y[y <= 0] = 0
    print('y: ', y)
    print(y.shape)
    return y
    
export_train_validation_test(i, xy_filelist[6][0], xy_filelist[6][1], out_dir=output_dir, target_key='dirz',
                             stratify_key='dirz', fstratify=process_cosz)
print('Export Complete {}'.format(i + 1))

('DIRZ: ', array([ 0.842471,  0.721094, -0.919277, ..., -0.018315, -0.248913,
       -0.248913]))
(2548,)
('y: ', array([ 1.,  1.,  0., ...,  0.,  0.,  0.]))
(2548,)
('DIRZ: ', array([ 0.72128 ,  0.480404, -0.012188, ..., -0.567179,  0.755481,
        0.210344]))
(2038,)
('y: ', array([ 1.,  1.,  0., ...,  0.,  1.,  1.]))
(2038,)
Export Complete 100


In [35]:
export_train_validation_test(i, xy_filelist[6][0], xy_filelist[6][1], out_dir=output_dir, target_key='dirx',
                             stratify_key='E', fstratify=stratify_on_energies, ftarget=lambda E: np.log10(E))
print('Export Complete {}'.format(i + 1))

(array([   3,  318,  676,  459, 1037,   55]), array([-0.37345229,  1.        ,  2.8       ,  3.5       ,  4.        ,
        7.        ,  8.00893945]))
(array([  2, 254, 541, 367, 830,  44]), array([-0.37345229,  1.        ,  2.8       ,  3.5       ,  4.        ,
        7.        ,  8.00893945]))
Export Complete 7


In [37]:
output_dir = os.path.join('.', 'test_energy_strat')
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
    
for i in range(7, 100):
    export_train_validation_test(i, xy_filelist[i][0], xy_filelist[i][1], out_dir=output_dir, target_key='E',
                                 stratify_key='E', fstratify=stratify_on_energies)
    print('Export Complete {}'.format(i + 1))

Export Complete 8
Export Complete 9
Export Complete 10
Export Complete 11
Export Complete 12
Export Complete 13
Export Complete 14
Export Complete 15
Export Complete 16
Export Complete 17
Export Complete 18
Export Complete 19
Export Complete 20
Export Complete 21
Export Complete 22
Export Complete 23
Export Complete 24
Export Complete 25
Export Complete 26
Export Complete 27
Export Complete 28
Export Complete 29
Export Complete 30
Export Complete 31
Export Complete 32
Export Complete 33
Export Complete 34
Export Complete 35
Export Complete 36
Export Complete 37
Export Complete 38
Export Complete 39
Export Complete 40
Export Complete 41
Export Complete 42
Export Complete 43
Export Complete 44
Export Complete 45
Export Complete 46
Export Complete 47
Export Complete 48
Export Complete 49
Export Complete 50
Export Complete 51
Export Complete 52
Export Complete 53
Export Complete 54
Export Complete 55
Export Complete 56
Export Complete 57
Export Complete 58
Export Complete 59
Export Complet