## Combine generated datasets for training and testing

In this notebook, we take the generated datasets for triangle singularity (2 loops) and poles (3 configurations) and combine them to create an input-output tuple readable by Chainer. We do this for both the training and testing datasets. We export them as ```chainer_train.pkl``` and ```chainer_test.pkl```. 

Note that for the training set, we apply a shuffling of the data. Specify the number of shuffling in ```NShuffle```.

In [None]:
import numpy as np
import pickle
import os
import sklearn
from sklearn.utils import shuffle
import chainer
from chainer.dataset import convert

# Select directory where datasets are stored
directory = 'Datasets'
out = directory

# Set number to shuffle the dataset
NShuffle = 2500

Combine all the training datasets (2 triangle sets and 3 pole sets).

In [None]:
# Get all inputs
TSAin = pickle.load(open(os.path.join(out,'T00Ainputs_train.pkl'),'rb'))
TSBin = pickle.load(open(os.path.join(out,'T00Binputs_train.pkl'),'rb'))
P01in = pickle.load(open(os.path.join(out,'P01inputs_train.pkl'),'rb'))
P02in = pickle.load(open(os.path.join(out,'P02inputs_train.pkl'),'rb'))
P03in = pickle.load(open(os.path.join(out,'P03inputs_train.pkl'),'rb'))
# Combine all the input training
inputtraining = np.concatenate((TSAin,TSBin,P01in,P02in,P03in),axis=0)
inputtraining = np.float32(np.asarray(inputtraining))

# Get all outputs
TSAout = pickle.load(open(os.path.join(out,'T00Aoutputs_train.pkl'),'rb'))
TSBout = pickle.load(open(os.path.join(out,'T00Boutputs_train.pkl'),'rb'))
P01out = pickle.load(open(os.path.join(out,'P01outputs_train.pkl'),'rb'))
P02out = pickle.load(open(os.path.join(out,'P02outputs_train.pkl'),'rb'))
P03out = pickle.load(open(os.path.join(out,'P03outputs_train.pkl'),'rb'))
# Combine all the output training
outputtraining = np.concatenate((TSAout,TSBout,P01out,P02out,P03out),axis=0)
outputtraining = np.float32(np.asarray(outputtraining))
outputtraining = outputtraining.astype(int)

print('size of training set:', len(inputtraining))
print('number of input nodes:', len(inputtraining[0]))

# Shuffle the combined training inputs and outputs
for ndx in range(NShuffle):
    inputtraining, outputtraining = shuffle(inputtraining, outputtraining)

# Create input-output tuples that can be read by Chainer
chainer_train = chainer.datasets.TupleDataset(inputtraining, outputtraining)

# Export the training set
pickle.dump(chainer_train, open(os.path.join(out,'chainer_train.pkl'),'wb'), protocol=4)
print('Done exporting!')

Combine all the testing datasets (2 triangle sets and 3 pole sets).

In [None]:
# Get all inputs:
TSAin = pickle.load(open(os.path.join(out,'T00Ainputs_test.pkl'),'rb'))
TSBin = pickle.load(open(os.path.join(out,'T00Binputs_test.pkl'),'rb'))
P01in = pickle.load(open(os.path.join(out,'P01inputs_test.pkl'),'rb'))
P02in = pickle.load(open(os.path.join(out,'P02inputs_test.pkl'),'rb'))
P03in = pickle.load(open(os.path.join(out,'P03inputs_test.pkl'),'rb'))
# Combine all the input testing
inputtest = np.concatenate((TSAin,TSBin,P01in,P02in,P03in),axis=0)
inputtest = np.float32(np.asarray(inputtest))

# Get all outputs
TSAout = pickle.load(open(os.path.join(out,'T00Aoutputs_test.pkl'),'rb'))
TSBout = pickle.load(open(os.path.join(out,'T00Boutputs_test.pkl'),'rb'))
P01out = pickle.load(open(os.path.join(out,'P01outputs_test.pkl'),'rb'))
P02out = pickle.load(open(os.path.join(out,'P02outputs_test.pkl'),'rb'))
P03out = pickle.load(open(os.path.join(out,'P03outputs_test.pkl'),'rb'))
# Combine all the output testing
outputtest = np.concatenate((TSAout,TSBout,P01out,P02out,P03out),axis=0)
outputtest = np.float32(np.asarray(outputtest))
outputtest = outputtest.astype(int)

print('size of testing set:', len(inputtest))
print('number of input nodes:', len(inputtest[0]))

# No need to shuffle the combined testing inputs and outputs

# Create input-output tuples that can be read by Chainer
chainer_test = chainer.datasets.TupleDataset(inputtest, outputtest)

# Export the testing set
pickle.dump(chainer_test, open(os.path.join(out,'chainer_test.pkl'),'wb'), protocol=4)
print('Done exporting!')