In [1]:
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
from tabulate import tabulate as tab

import generate_synthetic_data as gsd


# Read Data and Generate Datasets

In [2]:
data, labels = gsd.read_data_txt(IDs = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
data_merged, labels_merged, neutrons_merged, gammas_merged = gsd.merge_cases_together(data, labels)

case    total    gammas    neutrons    ratio (g/n)    Amax total    Amax gammas    Amax neutrons
------  -------  --------  ----------  -------------  ------------  -------------  ---------------
case1   623142   511540    111602      4.6            1.6763        1.6763         0.7248
case2   452712   353630    99082       3.6            1.6847        1.6847         0.7123
case3   368758   276193    92565       3.0            1.6799        1.6799         1.3376
case4   318295   229068    89227       2.6            1.7077        1.7077         0.8486
case5   286039   199074    86965       2.3            1.6881        1.6881         0.4909
case6   265001   178813    86188       2.1            1.6737        1.6737         1.0577
case7   247812   163450    84362       1.9            1.6787        1.6787         0.6179
case8   239239   154336    84903       1.8            1.6719        1.6719         0.8129
case9   230631   146351    84280       1.7            1.6673        1.6673         0

### Chose binning for templates

In [27]:
bin_edges = np.linspace(0.05, 0.5, 11) # 11 edges â†’ 10 bins
# noise = 0.0
# noise = 0.001
# noise = 0.01
noise = 0.05


### Training Dataset

In [28]:
X_train, Y_train, time_shifts_pileup_train = gsd.genereate_synthetic_data(
    data = data_merged, 
    labels = labels_merged, 
    bin_edges = bin_edges,
    statistics_n_g_pu = [40000, 40000, 40000], 
    voltage_range = [0.05, 0.5], 
    sigma_noise = noise
    )

                    count    percentage of total [%]
------------------  -------  -------------------------
Ntot                3213985
count_minThreshold  2385711  74.2
count_maxThreshold  0        0.0
count_switch        0        0.0
count_beforePulses  12081    0.4
count_afterPulses   14005    0.4
---                 ---      ---
Nsel                802186   25.0

neutrons clean (tight selection)
 sample shape (107014, 296)
 peak amplitude (min, max) -0.0023 1.3376
 average peak amplitude 0.005321013539400496
 counts per bin: [56952 26196 10944  6248  3722  1438   907   456    87    21]

gammas clean (tight selection)
 sample shape (695172, 296)
 peak amplitude (min, max) -0.0029 1.7077
 average peak amplitude 0.004025050554764526
 counts per bin: [502274  51528  35105  22845  15709  12289  10027   7492   6378   6119]
NEUTRONS: 40000
Clamped fraction: 0.0495
GAMMAS: 40000
Clamped fraction: 0.0512
PILEUP: 40000

X shape (120000, 296)
sanity check, Y shape (120000,)
time_shifts shape 

In [None]:
# np.savez(
#     f"synthetic_training_allCases_120k_noise_{noise}.npz",
#     X=X_train,
#     y=Y_train,
#     meta=time_shifts_pileup_train   # any third array (labels, dt, class, etc.)
# )

### Test Dataset

In [30]:
X_test, Y_test, time_shifts_pileup_test = gsd.genereate_synthetic_data(
    data = data_merged, 
    labels = labels_merged, 
    bin_edges = bin_edges,
    statistics_n_g_pu = [160000, 160000, 160000], 
    voltage_range = [0.05, 0.5], 
    sigma_noise = noise
    )

                    count    percentage of total [%]
------------------  -------  -------------------------
Ntot                3213985
count_minThreshold  2385711  74.2
count_maxThreshold  0        0.0
count_switch        0        0.0
count_beforePulses  12081    0.4
count_afterPulses   14005    0.4
---                 ---      ---
Nsel                802186   25.0

neutrons clean (tight selection)
 sample shape (107014, 296)
 peak amplitude (min, max) -0.0023 1.3376
 average peak amplitude 0.005321013539400496
 counts per bin: [56952 26196 10944  6248  3722  1438   907   456    87    21]

gammas clean (tight selection)
 sample shape (695172, 296)
 peak amplitude (min, max) -0.0029 1.7077
 average peak amplitude 0.004025050554764526
 counts per bin: [502274  51528  35105  22845  15709  12289  10027   7492   6378   6119]
NEUTRONS: 160000
Clamped fraction: 0.0501625
GAMMAS: 160000
Clamped fraction: 0.04900625
PILEUP: 160000

X shape (480000, 296)
sanity check, Y shape (480000,)
time_shi

In [None]:
# np.savez(
#     f"synthetic_test_allCases_480k_noise_{noise}.npz",
#     X=X_test,
#     y=Y_test,
#     meta=time_shifts_pileup_test   # any third array (labels, dt, class, etc.)
# )

# Read Synthetic Datasets 

In [32]:
data = np.load(f"../synthetic_data/synthetic_training_allCases_120k_noise_{noise}.npz")

X = data["X"]
Y = data["y"]
dt = data["meta"]
# Sanity Check
print(np.unique(X == X_train))
print(np.unique(Y == Y_train))
print(np.unique(dt == time_shifts_pileup_train))

[ True]
[ True]
[ True]


In [33]:
data = np.load(f"../synthetic_data/synthetic_test_allCases_480k_noise_{noise}.npz")

X = data["X"]
Y = data["y"]
dt = data["meta"]
# Sanity Check
print(np.unique(X == X_test))
print(np.unique(Y == Y_test))
print(np.unique(dt == time_shifts_pileup_test))

[ True]
[ True]
[ True]
