In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# utilities:
def load_spectrum(filename):
  number_channels = 1016
  # print(f'loading data from : "{filename}"')
  f = open(filename, 'r')
  data = [[float(item2) for item2 in item.split('\t') if len(item2) > 0]
    for item in f.read().split('\n') if len(item) > 0]
  data_unique = set([len(item) for item in data])
  if len(data) != number_channels:
    print(f'ERROR: Total number of channels is not {number_channels}.')
    exit()
  elif data_unique == {2} or data_unique == {3}:
    energy = np.array([item[0] for item in data])
    counts = np.array([item[1] for item in data])
    if 'Exp' in filename:
      # i.e., if spectrum is NaI generated, normalize w.r.t aquisition time:
      aquisition_time = int([item for item in filename.split('_') if 'sec' in item][0].split('sec')[0])
      counts = counts/aquisition_time
    if data_unique == {2}:
      percentage_error = np.empty(energy.shape)*np.nan
    elif data_unique == {3}:
      percentage_error = np.array([item[1] for item in data])
    return energy, counts, percentage_error
  else:
    print('ERROR: At some point or all, spectrum data format does not match with either "Energy\tcounts" or "Energy\tcounts\tError".')
    exit()
  print('ERROR: cannot load spectrum data')
  exit()

In [3]:
# initialization:
class_labels = {
  'bkg':   0,
  'Ba133': 1,
  'Cs137': 2,
  'Co60':  3
}
num_classes = len(class_labels)

In [4]:
for combination in [['D6', 'train']]:
  print('\n', combination)
  directory, datafilename = combination[0], combination[1]

  rn_tag = 'bkg_Exp_base'
  filename = [item for item in os.listdir('base_spectrums') if rn_tag in item][0]
  spect0 = load_spectrum(f'base_spectrums/{filename}')

  # signal_spectrum:
  for RN in [*class_labels][1:]:
    csv = []  # to collect produced spectrums
    ID = -1

    rn_tag = f'{RN}_Exp_base'
    filename = [item for item in os.listdir('base_spectrums') if rn_tag in item][0]
    spect1 = load_spectrum(f'base_spectrums/{filename}')

    # normalize spect w.r.t total counts:
    spect0_norm = [spect0[0], spect0[1]/spect0[1].sum(), spect0[2]]
    spect1_norm = [spect1[0], spect1[1]/spect1[1].sum(), spect1[2]]

    '''
    Now,
    the SNR (signal to noise mixing ratio) ~ random.uniform(low=0.05, high=1.1)
    '''
    SNR_list = list(np.random.uniform(low=0.05, high=1.1, size=64))

    # the superimposed spetrum:
    # energy-wise addition:
    for SNR in SNR_list:
      counts = spect0_norm[1] + SNR*spect1_norm[1]
      spect_norm = [spect0[0], counts/counts.sum(), spect0[2]]
      
      '''
      Now,
      the gross count variation:
      '''
      gross_counts_list = list(np.random.uniform(low=500, high=10000, size=64))

      for gross_counts in gross_counts_list:
        good_spect = [spect_norm[0], spect_norm[1]*gross_counts, spect_norm[2]]
        counts = []
        for i in range(len(good_spect[1])):
          counts.append(np.random.poisson(lam=good_spect[1][i]))
        counts = np.array(counts)
        bad_spect_norm = [spect_norm[0], counts/counts.sum(), spect_norm[2]]

        ID += 1
        csv.append([ID] + [f'{item}' for item in bad_spect_norm[1]] + [SNR] + [gross_counts])

    csv = [['ID'] + [f'feature_{item}' for item in range(1016)] + ['SNR'] + ['gross_counts']] + csv

    f = open(f'{directory}/{datafilename}{directory}class{class_labels[RN]}_detailed.csv', 'w')
    f.write('\n'.join([','.join([str(item2) for item2 in item]) for item in csv]))
    f.close()

    f = open(f'{directory}/{datafilename}{directory}class{class_labels[RN]}.csv', 'w')
    f.write('\n'.join([','.join([str(item2) for item2 in item[:-1]]) for item in csv]))
    f.close()



 ['D6', 'train']
