In [None]:
import os
import itertools

import tensorflow as tf
import numpy as np
import pandas as pd
from pandas import DataFrame as DF

import mcbn.data.dataset_loaders as dl
from mcbn.data.dataset import Dataset

from mcbn.models.model_bn import ModelBN
from mcbn.models.model_do import ModelDO

from mcbn.utils.metrics import rmse, pll, crps, pll_maximum, crps_minimum
from mcbn.utils.helper import get_setup
from mcbn.utils.helper import get_grid_search_results
from mcbn.utils.helper import get_tau_results
from mcbn.utils.helper import dump_yaml
from mcbn.utils.helper import get_new_dir_in_parent_path
from mcbn.utils.helper import make_path_if_missing
from mcbn.utils.helper import get_train_and_evaluation_models
from mcbn.utils.helper import get_logger

from mcbn.environment.constants import TEST_EVAL_PATH

In [None]:
logger = get_logger()

logger.info("STEP 6: Test set evaluation")

RANDOM_SEED_NP_FIRST_RUN = 1
RANDOM_SEED_TF_FIRST_RUN = 1

# Read in config
s = get_setup()

# Read in grid search results
g = get_grid_search_results()

# Read in tau optimization results
t = get_tau_results()

In [None]:
def evaluate_dataset(c, X_train, y_train, X_test, y_test, tf_seed):
    
    dataset = Dataset(X_train, 
                      y_train, 
                      X_test, 
                      y_test, 
                      s['discard_leftovers'],
                      normalize_X=s['normalize_X'], 
                      normalize_y=s['normalize_y'])
    
    # Initialize results dict
    results = {k: [] for k in ['model', 'epoch', 'PLL', 'CRPS', 'RMSE']}
    
    # Get and initialize model
    with tf.Graph().as_default() as g:

        with g.device('/cpu:0'):
            
            # Set random generator seed for reproducible models
            tf.set_random_seed(tf_seed)
            
            # Note: Tau must be set to base model (MCBN or MCDO) tau for get_mc_moments 
            # to be able to find var (overridden for const in metrics calc)
            if c['base_model_name'] in ['BN', 'MCBN']:
                model = ModelBN(s['n_hidden'],
                                K=c['k'],
                                nonlinearity=s['nonlinearity'],
                                bn=True,
                                do=False,
                                tau=c['taus'][c['base_model_name']],
                                dataset=dataset,
                                in_dim=c['in_dim'],
                                out_dim=c['out_dim'])
            elif c['base_model_name'] in ['DO', 'MCDO']:
                keep_prob = 1 - c['dropout']
                model = ModelDO(s['n_hidden'], 
                                K=c['k'], 
                                nonlinearity=s['nonlinearity'], 
                                bn=False, 
                                do=True,
                                tau=c['taus'][c['base_model_name']], 
                                dataset=dataset, 
                                in_dim=c['in_dim'], 
                                out_dim=c['out_dim'],
                                first_layer_do=True)

            model.initialize(l2_lambda=c['lambda'], learning_rate=s['learning_rate'])

        # Start session (regular session is default session in with statement)
        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=False,
                log_device_placement=False,
                inter_op_parallelism_threads=1,
                intra_op_parallelism_threads=1)) as sess:
            
            sess.run(tf.global_variables_initializer())
            
            # Train model n_epochs iterations and get test results
            last_epoch = 1
            while dataset.curr_epoch <= c['n_epochs']:
                
                new_epoch = last_epoch < dataset.curr_epoch
                last_epoch = dataset.curr_epoch
                
                # Each eval_interval:th epoch, save all metrics
                if new_epoch and (dataset.curr_epoch % s['test_eval_interval'] == 0
                                  or dataset.curr_epoch == c['n_epochs']):
                    
                    # BN BASED MODELS
                    if c['base_model_name'] == 'MCBN':
                        # MCBN
                        samples = model.get_mc_samples(s['mc_samples'], dataset.X_test, c['batch_size'])
                        mean, var = model.get_mc_moments(samples)
                        
                        results['model'] += ['MCBN']
                        results['epoch'] += [dataset.curr_epoch]
                        results['PLL']   += [pll(samples, dataset.y_test, s['mc_samples'], c['taus']['MCBN'])]
                        results['RMSE']  += [rmse(mean, dataset.y_test)]
                        results['CRPS']  += [crps(dataset.y_test, mean, var)]

                        # MCBN const
                        results['model'] += ['MCBN const']
                        results['epoch'] += [dataset.curr_epoch]
                        results['PLL']   += [pll(np.array([mean]), dataset.y_test, 1, c['taus']['MCBN const'])]
                        results['RMSE']  += [rmse(mean, dataset.y_test)]
                        results['CRPS']  += [crps(dataset.y_test, mean, c['taus']['MCBN const']**(-1))]
                        
                        # At final epoch, save prediction mean, var and true y
                        if dataset.curr_epoch == c['n_epochs']:
                            final_predictions = {
                                'yHat': mean.ravel(),
                                'MCBN var': var.ravel(),
                                'MCBN const var': [c['taus']['MCBN const']**(-1)] * len(mean),
                                'y': dataset.y_test.ravel()
                            }
                            optimum_predictions = {
                                'MCBN PLL_opt': [pll_maximum(mean, dataset.y_test)],
                                'MCBN CRPS_opt': [crps_minimum(mean, dataset.y_test)]
                            }
                    
                    elif c['base_model_name'] == 'BN':
                        model.update_layer_statistics(dataset.X_train)
                        samples = model.predict(dataset.X_test)
                        
                        results['model'] += ['BN']
                        results['epoch'] += [dataset.curr_epoch]
                        results['PLL']   += [pll(np.array([samples]), dataset.y_test, 1, model.tau)]
                        results['RMSE']  += [rmse(samples, dataset.y_test)]
                        results['CRPS']  += [crps(dataset.y_test, samples, model.tau**(-1))]
                    
                    # DO BASED MODELS
                    elif c['base_model_name'] == 'MCDO':
                        # MCDO
                        samples = model.get_mc_samples(s['mc_samples'], dataset.X_test, keep_prob)
                        mean, var = model.get_mc_moments(samples)

                        results['model'] += ['MCDO']
                        results['epoch'] += [dataset.curr_epoch]
                        results['PLL']   += [pll(samples, dataset.y_test, s['mc_samples'], c['taus']['MCDO'])]
                        results['RMSE']  += [rmse(mean, dataset.y_test)]
                        results['CRPS']  += [crps(dataset.y_test, mean, var)]

                        # MCDO const
                        results['model'] += ['MCDO const']
                        results['epoch'] += [dataset.curr_epoch]
                        results['PLL']   += [pll(np.array([mean]), dataset.y_test, 1, c['taus']['MCDO const'])]
                        results['RMSE']  += [rmse(mean, dataset.y_test)]
                        results['CRPS']  += [crps(dataset.y_test, mean, c['taus']['MCDO const']**(-1))]
                        
                        # At final epoch, save prediction mean, var and true y
                        if dataset.curr_epoch == c['n_epochs']:
                            final_predictions = {
                                'yHat': mean.ravel(),
                                'MCDO var': var.ravel(),
                                'MCDO const var': [c['taus']['MCDO const']**(-1)] * len(mean),
                                'y': dataset.y_test.ravel()
                            }
                            optimum_predictions = {
                                'MCDO PLL_opt': [pll_maximum(mean, dataset.y_test)],
                                'MCDO CRPS_opt': [crps_minimum(mean, dataset.y_test)]
                            }
                        
                    elif c['base_model_name'] == 'DO':
                        samples = model.predict(dataset.X_test, 1)
                                                
                        results['model'] += ['DO']
                        results['epoch'] += [dataset.curr_epoch]
                        results['PLL']   += [pll(np.array([samples]), dataset.y_test, 1, model.tau)]
                        results['RMSE']  += [rmse(samples, dataset.y_test)]
                        results['CRPS']  += [crps(dataset.y_test, samples, model.tau**(-1))]
                
                
                batch = dataset.next_batch(c['batch_size'])
                if c['base_model_name'] in ['BN', 'MCBN']:
                    model.run_train_step(batch)
                elif c['base_model_name'] in ['DO', 'MCDO']:
                    model.run_train_step(batch, keep_prob)
    
    if 'MC' in c['base_model_name']:
        return DF(results), DF(final_predictions), DF(optimum_predictions)
    return DF(results), None, None

In [None]:
def save_dataset_results(results_df, final_predictions_df, bm_opt_df, eval_path, dataset_name, base_model_name):
    # Get dataset dir
    dataset_path = os.path.join(eval_path, dataset_name)
    make_path_if_missing(dataset_path)
    
    # Group by model and save
    for model_name, model_df in results_df.groupby('model'):
        model_df.reset_index(drop=True).to_csv(os.path.join(dataset_path, model_name + '.csv'))
        
    # Save final predictions dataframe
    final_predictions_df.to_csv(os.path.join(dataset_path, base_model_name + ' final_predictions.csv'))
    
    # Save optimum predictions dataframe
    bm_opt_df.to_csv(os.path.join(dataset_path, base_model_name + ' optimum_predictions.csv'))

In [None]:
# Create parent evaluation dir
eval_path = get_new_dir_in_parent_path(TEST_EVAL_PATH)

# Save used setup, grid search and tau results
dump_yaml(s, eval_path, 'eval_setup.yml')
dump_yaml(g, eval_path, 'eval_grid_search_results.yml')
dump_yaml(t, eval_path, 'tau_results.yml')

all_results = None

# Iterate over datasets to be evaluated
for dataset_name in t.keys():
    
    logger.info("Dataset: " + dataset_name)
    
    # Load dataset into memory
    X_train, y_train, X_test, y_test = dl.load_uci_data_test(dataset_name)
    feature_indices, target_indices = dl.load_uci_info(dataset_name)
    
    # Iterate over base optimization models (BN or DO)
    train_and_evaluation_models = get_train_and_evaluation_models(s['models'])
    
    for bn_or_do_model, evaluation_models in train_and_evaluation_models.iteritems():
                
        # Get grid search parameters
        bm_results_df = None
        bm_final_pred_df = None
        bm_opt_df = None
        for base_model_name in evaluation_models:
            
            # Run multiple times based on n_testruns
            for run_count in range(s['n_testruns']):
            
                # Set random generator seed for reproducible batch order
                # Common for all base models for a certain dataset
                np_seed = RANDOM_SEED_NP_FIRST_RUN + run_count
                tf_seed = RANDOM_SEED_TF_FIRST_RUN + run_count
                
                np.random.seed(np_seed)
                opt_dict = g[dataset_name][base_model_name]


                logger.info("Model: {}, run: {} of {}".format(base_model_name, run_count+1, s['n_testruns']))

                # Get dataset configuration
                c = {'base_model_name': base_model_name,
                     'in_dim': len(feature_indices),
                     'out_dim': len(target_indices),
                     'k': s['k_specific'].get(dataset_name) or s['k'],
                     'lambda': opt_dict['lambda'],
                     'batch_size': opt_dict['batch_size'],
                     'dropout': opt_dict.get('dropout'), # Can be None
                     'taus': t[dataset_name],
                     'n_epochs': g[dataset_name][base_model_name]['cv_epoch']
                    }

                df, df_fp, df_opt = evaluate_dataset(c, X_train, y_train, X_test, y_test, tf_seed)
                
                df['run_count'] = run_count+1
                logger.info(df)
                
                bm_results_df = df if bm_results_df is None else bm_results_df.append(df, ignore_index=True)
                
                if df_fp is not None:
                    df_fp['run_count'] = run_count+1
                    bm_final_pred_df = df_fp if bm_final_pred_df is None else bm_final_pred_df.append(df_fp, ignore_index=True)
                
                if df_opt is not None:
                    df_opt['run_count'] = run_count+1
                    logger.info(df_opt)
                    bm_opt_df = df_opt if bm_opt_df is None else bm_opt_df.append(df_opt, ignore_index=True)
                
        save_dataset_results(bm_results_df, bm_final_pred_df, bm_opt_df, eval_path, dataset_name, 'MC'+bn_or_do_model)

        bm_results_df['dataset'] = dataset_name
        all_results = bm_results_df if all_results is None else all_results.append(bm_results_df, ignore_index=True)

    # Save all results
    all_results.to_csv(os.path.join(eval_path, 'results.csv'))
    
logger.info("DONE STEP 6")