In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 
import tensorflow as tf
from tensorflow.keras import backend as K
import numpy as np
from tqdm import tqdm
import pandas as pn

In [2]:
set_name = 'test' #or 'trainining'; 'validation'

lst = os.listdir(f'./raw_data/{set_name}/')

tb_lst = []
ts_lst = []
xh_lst = []
delta_lst = []
for i in lst:
    if i[:3]=='tb_': tb_lst.append(i)
    if i[:3]=='ts_': ts_lst.append(i)
    if i[:3]=='xh_': xh_lst.append(i)
    if i[:6]=='delta_': delta_lst.append(i)

soritng input files to match random seeds

In [3]:
tb_seeds = []
for i in tb_lst:
    p=i.find('_T')
    tb_seeds.append(i[3:p])

ts_seeds = []
for i in ts_lst:
    p=i.find('_T')
    ts_seeds.append(i[3:p])

xh_seeds = []
for i in xh_lst:
    p=i.find('_T')
    xh_seeds.append(i[3:p])

delta_seeds = []
for i in delta_lst:
    p=i.find('_T')
    delta_seeds.append(i[6:p])

In [4]:
tb_lst=np.array(tb_lst)[np.argsort(tb_seeds)]
ts_lst=np.array(ts_lst)[np.argsort(ts_seeds)]
xh_lst=np.array(xh_lst)[np.argsort(xh_seeds)]
delta_lst=np.array(delta_lst)[np.argsort(delta_seeds)]

preprocessing of the datasets

In [5]:
def noramlize_and_debias(y_data,ymean=None,ystd=None):
    if ymean is None:
        ymean = np.mean(y_data)
    if ystd is None:
        ystd  = y_data.reshape(len(y_data),-1).std()

    y_data = y_data - ymean
    y_data /= ystd
    print('mean value is shifted by:',ymean)
    print('data is rescaled with stddev:',ystd)

    return(y_data)

In [6]:
def load_and_cut(set_name,i,cut_lim = 0.995):
    j = np.load(f'./raw_data/{set_name}/{i}')
    j[j>np.quantile(j,0.995)]=np.quantile(j,0.995)
    return j

In [7]:
tb = np.array([np.load(f'./raw_data/{set_name}/{i}') for i in tb_lst])
ts = np.array([load_and_cut(set_name,i) for i in ts_lst])  ### upper value cut is applied as described in section 3.3
xh = np.array([np.load(f'./raw_data/{set_name}/{i}') for i in xh_lst])
delta = np.array([np.load(f'./raw_data/{set_name}/{i}') for i in delta_lst])

In [8]:
if set_name=='test':
    tb=noramlize_and_debias(tb,ymean=0.,ystd=1.)[:,:,:,:,np.newaxis] 
### validation and training set are renormalized and shifted with the values of the training set
else:
    tb=noramlize_and_debias(tb,ymean=-138.4901,ystd=13.6841755)[:,:,:,:,np.newaxis] 

mean: 0.0
stddev: 1.0


In [9]:
if set_name=='test':
    ts=noramlize_and_debias(ts,ymean=0.,ystd=1.)[:,:,:,:,np.newaxis] 
### validation and training set are renormalized and shifted with the values of the training set
else:
    ts=noramlize_and_debias(ts,ymean=8.6781,ystd=1.4251205)[:,:,:,:,np.newaxis] 

mean: 0.0
stddev: 1.0


In [10]:
if set_name=='test':
    xh=noramlize_and_debias(xh,ymean=0.,ystd=1.)[:,:,:,:,np.newaxis] 
### validation and training set are renormalized and shifted with the values of the training set
else:
    xh=noramlize_and_debias(xh,ymean=0.99348676,ystd=0.0198706)[:,:,:,:,np.newaxis] 

mean: 0.0
stddev: 1.0


In [11]:
if set_name=='test':
    delta=noramlize_and_debias(delta,ymean=0.,ystd=1.)[:,:,:,:,np.newaxis] 
### validation and training set are renormalized and shifted with the values of the training set
else:
    delta=noramlize_and_debias(delta,ymean=4.8816316e-11,ystd=0.10748814)[:,:,:,:,np.newaxis] 

mean: 0.0
stddev: 1.0


writing set in single file for convenient use

In [12]:
np.save(f'./processed_data/tb_{set_name}.npy',tb)
np.save(f'./processed_data/ts_{set_name}.npy',ts)
np.save(f'./processed_data/xh_{set_name}.npy',xh)
np.save(f'./processed_data/delta_{set_name}.npy',delta)