In [1]:
import matplotlib.pyplot as plt
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="3"

from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import FunctionTransformer, LabelBinarizer
from sklearn.model_selection import StratifiedKFold
import tensorflow as tf
import pickle
import numpy as np
import pandas as pd
from random import choice
from glob import glob

from annsa.template_sampling import *

In [2]:
import tensorflow.contrib.eager as tfe

In [3]:
tf.enable_eager_execution()

#### Import model, training function 

In [5]:
from annsa.model_classes import (dnn_model_features,
                                 DNN,
                                 dae_model_features,
                                 DAE,
                                 save_model,
                                 train_earlystop)

from annsa.load_pretrained_network import load_pretrained_dae_into_dnn

## Dataset Construction

#### Load dataset

In [6]:
background_dataset = pd.read_csv('../../source-interdiction/training_testing_data/background_template_dataset.csv')
source_dataset = pd.read_csv('../../source-interdiction/training_testing_data/shielded_templates_200kev_dataset.csv')

#### Dataset details

In [7]:
print('sourcedist: ' + str(sorted(set(source_dataset['sourcedist']))))
print('sourceheight: ' + str(sorted(set(source_dataset['sourceheight']))))
print('alum shieldingdensity: ' + str(sorted(set(source_dataset[source_dataset['shielding']=='alum']['shieldingdensity']))))
print('iron shieldingdensity: ' + str(sorted(set(source_dataset[source_dataset['shielding']=='iron']['shieldingdensity']))))
print('lead shieldingdensity: ' + str(sorted(set(source_dataset[source_dataset['shielding']=='lead']['shieldingdensity']))))
print('fwhm: ' + str(sorted(set(source_dataset['fwhm']))))

sourcedist: [50.0, 112.5, 175.0, 237.5, 300.0]
sourceheight: [50.0, 75.0, 100.0, 125.0, 150.0]
alum shieldingdensity: [1.82, 4.18, 7.49, 13.16]
iron shieldingdensity: [1.53, 3.5, 6.28, 11.02]
lead shieldingdensity: [0.22, 0.51, 0.92, 1.61]
fwhm: [6.0, 6.5, 7.0, 7.5, 8.0, 8.5, 9.0]


#### Customize dataset

In [8]:
source_dataset = source_dataset[(source_dataset['fwhm']==7.0) | 
                                (source_dataset['fwhm']==7.5) |
                                (source_dataset['fwhm']==8.0)]

source_dataset = source_dataset[(source_dataset['sourcedist']==50.5) | 
                                (source_dataset['sourcedist']==175.0) | 
                                (source_dataset['sourcedist']==300.0)]

source_dataset = source_dataset[(source_dataset['sourceheight']==50.0) |
                                (source_dataset['sourceheight']==100.0) |
                                (source_dataset['sourceheight']==150.0)]

# remove 80% shielding
source_dataset = source_dataset[source_dataset['shieldingdensity']!=13.16]
source_dataset = source_dataset[source_dataset['shieldingdensity']!=11.02]
source_dataset = source_dataset[source_dataset['shieldingdensity']!=1.61]

#### Remove empty spectra

In [9]:
zero_count_indicies = np.argwhere(np.sum(source_dataset.values[:,6:],axis=1) == 0).flatten()

print('indicies dropped: ' +str(zero_count_indicies))

source_dataset.drop(source_dataset.index[zero_count_indicies], inplace=True)

indicies dropped: [552 553 554 555 556 557 564 565 566 582 583 584 585 586 587 594 595 596
 612 613 614 615 616 617 624 625 626 642 643 644 645 646 647 654 655 656
 672 673 674 675 676 677 684 685 686 702 703 704 705 706 707 714 715 716]


#### Add empty spectra for background 

In [10]:
blank_spectra = []
for fwhm in set(source_dataset['fwhm']):
    num_examples = source_dataset[(source_dataset['fwhm']==fwhm) &
                                  (source_dataset['isotope']==source_dataset['isotope'].iloc()[0])].shape[0]
    for k in range(num_examples):
        blank_spectra_tmp = [0]*1200
        blank_spectra_tmp[5] = fwhm
        blank_spectra_tmp[0] = 'background'
        blank_spectra_tmp[3] = 'background'
        blank_spectra.append(blank_spectra_tmp)

source_dataset = source_dataset.append(pd.DataFrame(blank_spectra,
                                                    columns=source_dataset.columns))

#### Create dataset from spectra

In [11]:
spectra_dataset = source_dataset.values[:,5:].astype('float64')
all_keys = source_dataset['isotope'].values

## Define Training Parameters

#### Define online data augmentation

In [12]:
def integration_time():
    return np.random.uniform(np.log10(10),np.log10(3600))

def background_cps():
    return np.random.poisson(200)

def signal_to_background():
    return np.random.uniform(0.1,2)

def calibration():
    return [np.random.uniform(0,10),
            np.random.uniform(2500/3000,3500/3000),
            0]

online_data_augmentation = online_data_augmentation_vanilla(background_dataset,
                                background_cps,
                                integration_time,
                                signal_to_background,
                                calibration,)

#### Create temporary testing dataset based on training dataset 

In [13]:
examples_per_isotope=10

testing_spectra = []
testing_keys = []

for key in np.unique(all_keys):
    for _ in range(examples_per_isotope):
        testing_spectra_tmp = source_dataset[source_dataset['isotope']==key].sample().values[:,5:].astype('float64')
        testing_spectra_tmp = online_data_augmentation(testing_spectra_tmp).numpy()[0]
        testing_keys.append(key)
        testing_spectra.append(testing_spectra_tmp)

testing_spectra = np.array(testing_spectra)

## Train network

In [14]:
model_id='DSCAE-CNN_onlinedataaugfull_update'

In [16]:
all_dae_models = all_dae_models = [x.split('checkpoint_')[1][:-7] for x in
                                   glob('../hyperparameter_search/hyperparameter-search-results/BSDAE_full_tmp_checkpoint_[0-9]*index')]

In [17]:
all_dae_models

['128_64',
 '1024_1024_128',
 '1024_1024_32',
 '64',
 '1024_1024_64',
 '256_128',
 '512_512_128',
 '256_64',
 '256',
 '128']

In [18]:
mlb=LabelBinarizer()

training_errors = []
total_networks = 10
for network_id in range(total_networks):

    # reset model on each iteration
    all_keys_binarized = mlb.fit_transform(all_keys.reshape([all_keys.shape[0],1]))
    testing_keys_binarized = mlb.transform(testing_keys)
    for model_id in all_dae_models:

        model, model_features = load_pretrained_dae_into_dnn(dae_features_filename = '../hyperparameter_search/hyperparameter-search-results/BSCAE_full_fullnetwork_'+model_id,
                                                    dae_weights_filename = '../hyperparameter_search/hyperparameter-search-results/BSCAE_full_fullnetwork_checkpoint_'+model_id+'_',
                                                    dnn_dense_nodes = [128],)
        optimizer = tf.train.AdamOptimizer(model_features.learining_rate)

        _, f1_error = model.fit_batch(
            (spectra_dataset, all_keys_binarized),
            (testing_spectra, testing_keys_binarized),
            optimizer=optimizer,
            num_epochs=1500,
            verbose=1,
            obj_cost=model.cross_entropy,
            earlystop_cost_fn=model.f1_error,
            earlystop_patience=1000,
            data_augmentation=online_data_augmentation,
            augment_testing_data=False,
            print_errors=True,
            record_train_errors=False)

        # training_errors.append(f1_error['test'])
        #np.save('./final-models/final_test_errors_'+model_id, training_errors)
        # model.save_weights('./final-models/'+model_id+'_checkpoint_'+str(network_id))


FileNotFoundError: [Errno 2] No such file or directory: '../hyperparameter_search/hyperparameter-search-results/BSCAE_full_fullnetwork_128_64'