# Deep MicroBiome

Aug. 14. 2019
@ Youngwon (youngwon08@gmail.com)

In [1]:
import os
import json
import numpy as np
import pandas as pd
import copy
import logging
import sys

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
os.environ['CUDA_VISIBLE_DEVICES']='0'

import configuration
from loss_and_metric import metric_test, metric_texa_test
import readers
import build_network
import configuration
from utils import file_path_fold, plot_history

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
import keras.backend as k

config = k.tf.ConfigProto()
config.gpu_options.allow_growth=True
#config.gpu_options.per_process_gpu_memory_fraction = 0.4
k.set_session(k.tf.Session(config=config))

In [4]:
logging.basicConfig(format = '[%(name)-8s|%(levelname)s|%(filename)s:%(lineno)s] %(message)s',
                    level=logging.DEBUG)
log = logging.getLogger()

## Pick Models

In [5]:
# %%bash
# ls

In [6]:
save = False
kfold=1000
network_model_keys = ['optimizer','lr','decay']
architecture_keys = ['weight_decay', 'weight_l1_penalty', #'weight_l2_penalty',
                     'tree_thrd', 'weight_initial',
                     'batch_normalization','drop_out']
network_training_keys = ['batch_size','epochs']

In [7]:
#######################################################################

filenames = 'simulation_s0.Rmd'
models = [
          'simulation_s0_v0/simulation_s0_deep',
          'simulation_s0_v0/simulation_s0_deep_l1',
          'simulation_s0_v0/simulation_s0_deepbiome',
          'simulation_s0_v0/simulation_s0_deepbiome_l1',
#           'simulation_s0_v0/simulation_s0_deepbiome_wo_noise',
# #           'simulation_s0_v0/simulation_s0_deepbiome_wo_noise_trueinit',
#           'simulation_s0_v0/simulation_s0_deepbiome_noise_decay',
         ]

models_aka = [
          'DNN',
          'DNN+l1',
          'DeepBiome',
          'DeepBiome+l1',
#           'deepbiome_wo_noise',
# #           'deepbiome_wo_noise_trueinit',
#           'deepbiome_noise_decay',
         ]
num_classes = 0

########################################################################

# filenames = 'simulation_s1.Rmd'
# models = [
#           'simulation_s1_v0/simulation_s1_deep',
#           'simulation_s1_v0/simulation_s1_deep_l1',
#           'simulation_s1_v0/simulation_s1_deepbiome',
#           'simulation_s1_v0/simulation_s1_deepbiome_l1',
# #           'simulation_s1_v0/simulation_s1_deepbiome_wo_noise',
# #           'simulation_s1_v0/simulation_s1_deepbiome_wo_noise_trueinit',
# #           'simulation_s1_v0/simulation_s1_deepbiome_noise_decay',
#          ]

# models_aka = [
#           'DNN',
#           'DNN+l1',
#           'DeepBiome',
#           'DeepBiome+l1',
# #           'deepbiome_wo_noise',
# #           'deepbiome_wo_noise_trueinit',
# #           'deepbiome_noise_decay',
#          ]
# num_classes = 0

#######################################################################

# filenames = 'simulation_s2.Rmd'
# models = [
#           'simulation_s2_v0/simulation_s2_deep',
#           'simulation_s2_v0/simulation_s2_deep_l1',
#           'simulation_s2_v0/simulation_s2_deepbiome',
#           'simulation_s2_v0/simulation_s2_deepbiome_l1',
# #           'simulation_s2_v0/simulation_s2_deepbiome_wo_noise',
# #           'simulation_s2_v0/simulation_s2_deepbiome_wo_noise_trueinit',
# #           'simulation_s2_v0/simulation_s2_deepbiome_noise_decay',
#          ]

# models_aka = [
#           'DNN',
#           'DNN+l1',
#           'DeepBiome',
#           'DeepBiome+l1',
# #           'deepbiome_wo_noise',
# #           'deepbiome_wo_noise_trueinit',
# #           'deepbiome_noise_decay',
#          ]
# num_classes = 1

# ########################################################################

# filenames = 'simulation_s2_v1.Rmd'
# models = [
#           'simulation_s2_v1/simulation_s2_deep',
#           'simulation_s2_v1/simulation_s2_deep_l1',
#           'simulation_s2_v1/simulation_s2_deepbiome',
#           'simulation_s2_v1/simulation_s2_deepbiome_l1',
#           'simulation_s2_v1/simulation_s2_deepbiome_wo_noise',
#           'simulation_s2_v1/simulation_s2_deepbiome_wo_noise_trueinit',
#           'simulation_s2_v1/simulation_s2_deepbiome_noise_decay',
#          ]

# models_aka = [
#           'DNN',
#           'DNN+l1',
#           'DeepBiome',
#           'DeepBiome+l1',
#           'deepbiome_wo_noise',
#           'deepbiome_wo_noise_trueinit',
#           'deepbiome_noise_decay',
#          ]
# num_classes = 1

# #######################################################################

# filenames = 'simulation_s3.Rmd'
# models = [
#           'simulation_s3_v0/simulation_s3_deep',
#           'simulation_s3_v0/simulation_s3_deep_l1',
#           'simulation_s3_v0/simulation_s3_deepbiome',
#           'simulation_s3_v0/simulation_s3_deepbiome_l1',
# #           'simulation_s3_v0/simulation_s3_deepbiome_wo_noise',
# #           'simulation_s3_v0/simulation_s3_deepbiome_wo_noise_trueinit',
# #           'simulation_s3_v0/simulation_s3_deepbiome_noise_decay',
#          ]

# models_aka = [
#           'DNN',
#           'DNN+l1',
#           'DeepBiome',
#           'DeepBiome+l1',
# #           'deepbiome_wo_noise',
# #           'deepbiome_wo_noise_trueinit',
# #           'deepbiome_noise_decay',
#          ]
# num_classes = 3

# # ########################################################################

# filenames = 'simulation_s4.Rmd'
# models = [
#           'simulation_s4_v0/simulation_s4_deep',
#           'simulation_s4_v0/simulation_s4_deep_l1',
#           'simulation_s4_v0/simulation_s4_deepbiome',
#           'simulation_s4_v0/simulation_s4_deepbiome_l1',
# #           'simulation_s4_v0/simulation_s4_deepbiome_wo_noise',
# #           'simulation_s4_v0/simulation_s4_deepbiome_wo_noise_trueinit',
# #           'simulation_s4_v0/simulation_s4_deepbiome_noise_decay',
#          ]

# models_aka = [
#           'DNN',
#           'DNN+l1',
#           'DeepBiome',
#           'DeepBiome+l1',
# #           'deepbiome_wo_noise',
# #           'deepbiome_wo_noise_trueinit',
# #           'deepbiome_noise_decay',
#          ]
# num_classes = 0

########################################################################

In [8]:
model_network_info = {}
model_path_info = {}
for model_path in models:
    config_data = configuration.Configurator('%s/config/path_info.cfg' % model_path, log, verbose=False)
    config_data.set_config_map(config_data.get_section_map())
    config_network = configuration.Configurator('%s/config/network_info.cfg' % model_path, log, verbose=False)
    config_network.set_config_map(config_network.get_section_map())

    model_path_info[model_path] = config_data.get_config_map()
    model_network_info[model_path] = config_network.get_config_map()

In [9]:
if num_classes == 0: y_names = ['loss','correlation_coefficient']
elif num_classes==1: y_names = ['loss','binary_accuracy','sensitivity','specificity','gmeasure', 'auc']
else: y_names=['loss','categorical_accuracy','precision','recall','f1', 'auc']

if num_classes == 0: measure_index = np.array([0,1])
elif num_classes==1: measure_index = np.array([2,3,4,1,5])
else: measure_index = np.array([1,2,3,4,5])

## History

#### per Epochs

In [10]:
# fig = plot_history(models, models_aka=models_aka, history_types = ['validation','train'], 
#          y_names = y_names, x_name='epochs', mode='summary')
# if save: fig.savefig('%s/%s.png' % (fig_dir, 'hist'), pad_inches=0.01)
# fig.show()

#### per Iterations

In [11]:
# fig = plot_history(models, models_aka=models_aka,
#              history_types = ['validation','train'], 
#              y_names = ['binary_accuracy','sensitivity','specificity','gmeasure'], x_name='iterations', niters=niters)
# if save: fig.savefig('%s/%s.png' % (fig_dir, 'hist'), pad_inches=0.01)
# fig.show()

## Accuracy

In [12]:
results = []
# log.info('%20s & %s' % ('model', '& '.join(['%s ' % name for name in np.array(y_names)[[measure_index]]])))
print('%20s & %s \\\\\ \hline' % ('model', '& '.join(['%s ' % name for name in np.array(y_names)[[measure_index]]])))
# for model, aka in zip(models, models_aka):
#     evaluation = np.load('%s/eval.npy' % model)
#     log.info('%20s: %s' % (aka, ''.join(['%10.4f (%10.4f)'%(mean, std) for mean, std in zip(np.mean(evaluation, axis=0),np.std(evaluation, axis=0))])))
#     results.append(np.vstack([np.mean(evaluation, axis=0),np.std(evaluation, axis=0)]).transpose())
for model, aka in zip(models, models_aka):
    train_evaluation = np.load('%s/train_eval.npy' % model)[:,measure_index]
    train_res = '&'.join(['%10.3f & %10.3f'%(mean, std) for mean, std in zip(np.mean(train_evaluation, axis=0),np.std(train_evaluation, axis=0))])
    test_evaluation = np.load('%s/test_eval.npy' % model)[:,measure_index]
    test_res = '&'.join(['%10.3f & %10.3f'%(mean, std) for mean, std in zip(np.mean(test_evaluation, axis=0),np.std(test_evaluation, axis=0))])
#     log.info('%s & %s & %s \\\\' % (aka, train_res, test_res))
    print('%s & %s & %s \\\\' % (aka, test_res, train_res))
#     results.append(np.vstack([np.mean(evaluation, axis=0),np.std(evaluation, axis=0)]).transpose())

               model & loss & correlation_coefficient  \\\ \hline
DNN &      2.570 &      6.577&     0.911 &      0.054 &      1.603 &      5.867&     0.943 &      0.041 \\
DNN+l1 &      2.719 &      7.555&     0.911 &      0.048 &      1.794 &      7.875&     0.941 &      0.059 \\
DeepBiome &      2.618 &      5.509&     0.896 &      0.096 &      1.941 &      5.018&     0.919 &      0.083 \\
DeepBiome+l1 &      2.907 &      6.967&     0.887 &      0.120 &      2.188 &      6.570&     0.914 &      0.097 \\


# Choose Model

In [13]:
num=1
model_path = models[num]
model_aka = models_aka[num]

config_data = configuration.Configurator('%s/config/path_info.cfg' % model_path, log, verbose=False)
config_data.set_config_map(config_data.get_section_map())
config_network = configuration.Configurator('%s/config/network_info.cfg' % model_path, log, verbose=False)
config_network.set_config_map(config_network.get_section_map())

path_info = config_data.get_config_map()
network_info = config_network.get_config_map()

path_info['data_info']['data_path'] = '/'.join(path_info['data_info']['data_path'].split('/')[2:])
path_info['data_info']['tree_info_path'] = '/'.join(path_info['data_info']['tree_info_path'].split('/')[2:])
try: path_info['data_info']['count_list_path'] = '/'.join(path_info['data_info']['count_list_path'].split('/')[2:])
except: pass
try: path_info['data_info']['count_path'] = '/'.join(path_info['data_info']['count_path'].split('/')[2:])
except: pass
path_info['data_info']['idx_path'] = '/'.join(path_info['data_info']['idx_path'].split('/')[2:])
try: path_info['data_info']['disease_weight_path'] = '/'.join(path_info['data_info']['disease_weight_path'].split('/')[2:])
except: pass

log.info('%22s : %s' % ('model', model_path))
log.info('%22s : %s' % ('model_aka', model_aka))
for k in architecture_keys:
    log.info('%22s : %s' % (k, network_info['architecture_info'].get(k, None)))
for k in network_model_keys:
    log.info('%22s : %s' % (k, network_info['model_info'].get(k, None)))
for k in network_training_keys:
    log.info('%22s : %s' % (k, network_info['training_info'].get(k, None)))

[root    |INFO|<ipython-input-13-3ecb0a3edfce>:23]                  model : simulation_s0_v0/simulation_s0_deep_l1
[root    |INFO|<ipython-input-13-3ecb0a3edfce>:24]              model_aka : DNN+l1
[root    |INFO|<ipython-input-13-3ecb0a3edfce>:26]           weight_decay : None
[root    |INFO|<ipython-input-13-3ecb0a3edfce>:26]      weight_l1_penalty : 0.01
[root    |INFO|<ipython-input-13-3ecb0a3edfce>:26]              tree_thrd : None
[root    |INFO|<ipython-input-13-3ecb0a3edfce>:26]         weight_initial : glorot_uniform
[root    |INFO|<ipython-input-13-3ecb0a3edfce>:26]    batch_normalization : False
[root    |INFO|<ipython-input-13-3ecb0a3edfce>:26]               drop_out : 0
[root    |INFO|<ipython-input-13-3ecb0a3edfce>:28]              optimizer : adam
[root    |INFO|<ipython-input-13-3ecb0a3edfce>:28]                     lr : 0.01
[root    |INFO|<ipython-input-13-3ecb0a3edfce>:28]                  decay : 0.0001
[root    |INFO|<ipython-input-13-3ecb0a3edfce>:30]             

In [15]:
evaluation = np.load('%s/test_eval.npy' % model_path)
log.info('\t %s'%' '.join(['%s' % name for name in y_names]))

_ = [log.info('%d fold : %s' % (i,line)) for i, line in enumerate(evaluation)]
log.info('Mean   : %s' % np.mean(evaluation, axis=0))
log.info('Std   : %s' % np.std(evaluation, axis=0))

# _ = [print('%d fold & %s \\tabularnewline' % (i, ' & '.join(['%.3f'% v for v in line]))) for i, line in enumerate(evaluation)]
# print('Mean & %s \\tabularnewline' % (' & '.join(['%.3f'% v for v in np.mean(evaluation, axis=0)])))
# print('Sd & %s \\tabularnewline' % (' & '.join(['%.3f'% v for v in np.std(evaluation, axis=0)])))

[root    |INFO|<ipython-input-15-65b4b58a34e1>:2] 	 loss correlation_coefficient
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 0 fold : [4.22195339 0.90251946]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 1 fold : [1.62369931 0.93354899]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 2 fold : [1.56435883 0.91182041]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 3 fold : [1.11601484 0.93383145]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 4 fold : [4.17477274 0.87744462]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 5 fold : [1.9615711  0.89653951]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 6 fold : [1.2990464  0.94176751]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 7 fold : [0.56929946 0.97591466]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 8 fold : [1.26305914 0.95151985]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 9 fold : [4.77997541 0.87628257]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 10 fold : [3.20676303 0.83381879]
[root

[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 97 fold : [2.81707454 0.89760411]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 98 fold : [4.36709118 0.83923447]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 99 fold : [1.83036709 0.93397719]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 100 fold : [1.77165854 0.93769383]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 101 fold : [2.38209057 0.89933461]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 102 fold : [3.27013731 0.88043362]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 103 fold : [1.86713481 0.92052042]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 104 fold : [2.7715435  0.91559714]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 105 fold : [1.32605374 0.95738298]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 106 fold : [1.02593768 0.93289268]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 107 fold : [7.96587276 0.72029418]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 108 fold : [1.6200

[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 194 fold : [2.3589716  0.85550088]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 195 fold : [0.28358391 0.98061663]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 196 fold : [2.61960244 0.93352884]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 197 fold : [1.19724607 0.92717135]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 198 fold : [1.23037505 0.91698623]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 199 fold : [1.66341114 0.94070023]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 200 fold : [1.2776351  0.93102539]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 201 fold : [2.54070401 0.85023874]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 202 fold : [1.95512044 0.91119796]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 203 fold : [2.26407695 0.93702042]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 204 fold : [1.96168423 0.91325253]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 205 fold : [3.8

[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 291 fold : [2.9546237  0.88917965]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 292 fold : [0.96796674 0.97419065]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 293 fold : [3.50899553 0.91258109]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 294 fold : [1.39153993 0.92658591]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 295 fold : [1.74014151 0.93472189]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 296 fold : [2.74501228 0.90391272]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 297 fold : [1.25621402 0.90647006]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 298 fold : [2.84056759 0.94211644]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 299 fold : [4.40662766 0.84750777]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 300 fold : [0.23659945 0.98864436]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 301 fold : [1.51724291 0.91867042]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 302 fold : [4.8

[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 388 fold : [2.92487812 0.85996419]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 389 fold : [0.83477873 0.96876073]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 390 fold : [3.71132398 0.91354316]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 391 fold : [4.67941952 0.8798095 ]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 392 fold : [1.3671875  0.93717134]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 393 fold : [0.80352676 0.97072035]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 394 fold : [4.40766668 0.86382484]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 395 fold : [2.31695652 0.92366678]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 396 fold : [1.71889806 0.93172842]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 397 fold : [0.93320519 0.92874318]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 398 fold : [1.38452244 0.950647  ]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 399 fold : [1.9

[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 485 fold : [2.19770169 0.90229148]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 486 fold : [2.60845304 0.89169282]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 487 fold : [2.23313737 0.90421265]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 488 fold : [1.54892516 0.9114036 ]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 489 fold : [1.01086771 0.9313134 ]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 490 fold : [1.16557157 0.94781685]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 491 fold : [1.89876103 0.94699645]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 492 fold : [0.568196   0.98249501]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 493 fold : [2.14553189 0.91180992]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 494 fold : [2.01757932 0.94363642]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 495 fold : [1.80799997 0.94028848]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 496 fold : [2.1

[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 582 fold : [1.83303058 0.90533251]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 583 fold : [7.53250885 0.84385109]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 584 fold : [3.54253507 0.89075774]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 585 fold : [1.18063354 0.89969164]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 586 fold : [0.76362324 0.94208211]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 587 fold : [2.95684862 0.88225091]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 588 fold : [2.37021327 0.89298612]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 589 fold : [0.99401635 0.93213809]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 590 fold : [1.46050739 0.92152858]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 591 fold : [2.88179183 0.87941569]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 592 fold : [1.59978664 0.94315982]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 593 fold : [2.5

[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 679 fold : [1.11932039 0.90199   ]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 680 fold : [0.46490347 0.97847182]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 681 fold : [2.38099408 0.90791309]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 682 fold : [1.96600425 0.91866404]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 683 fold : [2.99825144 0.92515528]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 684 fold : [0.44545501 0.98355448]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 685 fold : [1.2056675  0.93440187]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 686 fold : [1.63716304 0.93222755]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 687 fold : [3.57824397 0.90631491]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 688 fold : [1.13962603 0.88225752]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 689 fold : [3.47893023 0.84827954]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 690 fold : [2.0

[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 776 fold : [2.41515112 0.89283407]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 777 fold : [2.43843222 0.92498511]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 778 fold : [3.4621563  0.90248334]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 779 fold : [2.41860747 0.9189871 ]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 780 fold : [1.15237081 0.89979196]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 781 fold : [1.74724674 0.93318218]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 782 fold : [1.98358774 0.92167735]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 783 fold : [1.10839283 0.90999466]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 784 fold : [2.7910285  0.90656579]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 785 fold : [1.21818101 0.92604333]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 786 fold : [1.93326664 0.9193247 ]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 787 fold : [2.6

[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 873 fold : [0.57025635 0.96067953]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 874 fold : [0.89302772 0.94000494]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 875 fold : [3.27604651 0.89326644]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 876 fold : [3.0959568  0.86430246]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 877 fold : [1.60456884 0.94410741]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 878 fold : [1.7790091  0.93791634]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 879 fold : [0.58725142 0.90785867]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 880 fold : [4.42086983 0.79943359]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 881 fold : [1.88651741 0.94122434]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 882 fold : [5.44020748 0.90194291]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 883 fold : [1.05516768 0.94399297]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 884 fold : [0.9

[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 970 fold : [2.20695233 0.91613722]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 971 fold : [2.87404752 0.90044737]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 972 fold : [2.16078901 0.92765087]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 973 fold : [0.83125246 0.95056063]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 974 fold : [1.2794199  0.94013298]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 975 fold : [0.52062231 0.93192947]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 976 fold : [3.49050164 0.89495206]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 977 fold : [1.73061597 0.9122867 ]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 978 fold : [2.03982568 0.88312483]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 979 fold : [1.41093898 0.95093387]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 980 fold : [2.74439573 0.8936761 ]
[root    |INFO|<ipython-input-15-65b4b58a34e1>:4] 981 fold : [2.0

## Weight estimation of DeepBiom

We identify the largest weight estimatio of neurons in two hidden layers; by doing this, we can identify the strongest phylogenetic connections. We compute the True Positive Rate (``TPR``, sensitivity), True Negative Rate (``TNR``, specificity), and their geometric mean (i.e., ``g-Measure``). The false discovery rate (FDR) would be ``FDR = 1-TPR`` in our case.

In [16]:
def texa_selection_accuracy(tree_weight_list, true_tree_weight_list):
    accuracy_list = []
    for i in range(len(true_tree_weight_list)):
        tree_tw = true_tree_weight_list[i].astype(np.int32)
#         tree_tw = (true_tree_weight_list[i]>0).astype(np.int32)
        tree_w = np.zeros_like(tree_tw, dtype=np.int32)
        tree_w_abs = np.abs(tree_weight_list[i])
        for row, maxcol in enumerate(np.argmax(tree_w_abs, axis=1)):
            tree_w[row,maxcol] = tree_w_abs[row,maxcol]
#         tree_w = (tree_w > 1e-2).astype(np.int32)
        tree_w = (tree_w > 0).astype(np.int32)
        num_selected_texa = np.sum(np.sum(tree_w, axis=1)>0)
        sensitivity, specificity, gmeasure, accuracy = metric_texa_test(tree_tw.flatten(), tree_w.flatten())
        accuracy_list.append([num_selected_texa, sensitivity, specificity, gmeasure, accuracy])
    return accuracy_list

def texa_selection_accuracy_2(tree_weight_list, true_tree_weight_list):
    accuracy_list = []
    for i in range(len(true_tree_weight_list)):
        tree_tw = true_tree_weight_list[i].astype(np.int32)
#         tree_tw = (true_tree_weight_list[i]>0).astype(np.int32)
        tree_w = np.zeros_like(tree_tw, dtype=np.int32)
        tree_w_abs = np.abs(tree_weight_list[i])
        for row in range(tree_w_abs.shape[0]):
#             tree_w[row,:] = (tree_w_abs[row,:]> 0).astype(np.int32)
            tree_w[row,:] = (tree_w_abs[row,:]> 1e-2).astype(np.int32)
        num_selected_texa = np.sum(np.sum(tree_w, axis=1)>0)
        sensitivity, specificity, gmeasure, accuracy = metric_texa_test(tree_tw.flatten(), tree_w.flatten())
        accuracy_list.append([num_selected_texa, sensitivity, specificity, gmeasure, accuracy])
    return accuracy_list

### Accuracy

In [17]:
model_weight_path = './%s/%s' % (model_path, path_info['model_info']['weight'])

network_class = getattr(build_network, network_info['model_info']['network_class'].strip()) 
# network = network_class(network_info, path_info['data_info'], log, fold=0, num_classes=max(1,num_classes))
network = network_class(network_info, path_info['data_info'], log, fold=0, num_classes=num_classes)
network.model_compile()

[root    |INFO|build_network.py:507] ------------------------------------------------------------------------------------------
[root    |INFO|build_network.py:508] Read phylogenetic tree information from data/genus48/genus48_dic.csv
[root    |INFO|build_network.py:512] Phylogenetic tree level list: ['Genus', 'Family', 'Order', 'Class', 'Phylum']
[root    |INFO|build_network.py:513] ------------------------------------------------------------------------------------------
[root    |INFO|build_network.py:518]      Genus: 48
[root    |INFO|build_network.py:518]     Family: 40
[root    |INFO|build_network.py:518]      Order: 23
[root    |INFO|build_network.py:518]      Class: 17
[root    |INFO|build_network.py:518]     Phylum: 9
[root    |INFO|build_network.py:521] ------------------------------------------------------------------------------------------
[root    |INFO|build_network.py:522] Phylogenetic_tree_dict info: ['Number', 'Family', 'Order', 'Phylum', 'Genus', 'Class']
[root    |IN

Instructions for updating:
Colocations handled automatically by placer.


Instructions for updating:
Colocations handled automatically by placer.
[root    |INFO|build_network.py:635] ------------------------------------------------------------------------------------------
[root    |INFO|build_network.py:56] Build Network
[root    |INFO|build_network.py:57] Optimizer = adam
[root    |INFO|build_network.py:58] Loss = mean_squared_error
[root    |INFO|build_network.py:59] Metrics = correlation_coefficient


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input (InputLayer)           (None, 48)                0         
_________________________________________________________________
l1_dense (Dense_with_new_tre (None, 40)                1960      
_________________________________________________________________
l1_activation (Activation)   (None, 40)                0         
_________________________________________________________________
l2_dense (Dense_with_new_tre (None, 23)                943       
_________________________________________________________________
l2_activation (Activation)   (None, 23)                0         
_________________________________________________________________
l3_dense (Dense_with_new_tre (None, 17)                408       
_________________________________________________________________
l3_activation (Activation)   (None, 17)                0         
__________

In [18]:
accuracy_list = []
for fold in range(kfold):
    network.fold = fold
    network.load_weights(file_path_fold(model_weight_path, fold), verbose=False)
    true_tree_weight_list = network.load_true_tree_weight_list(path_info['data_info']['data_path'])
    tree_weight_list = network.get_trained_weight()
    accuracy_list.append(np.array(texa_selection_accuracy(tree_weight_list, true_tree_weight_list)))
accuracy_list = np.array(accuracy_list)[:,:,1:]

# print('%7s, %12s, %12s, %12s, %12s, %12s, %12s' % ('Model','True (Total)','Selected','Sensitivity','Specificity','gMeasure','Accuracy'))
# print('---------------------------------------------------------------------------------------------------------------')
# values = []
# for i, (mean, std) in enumerate(zip(np.mean(accuracy_list, axis=0), np.std(accuracy_list, axis=0))):
#     tree_tw = true_tree_weight_list[i].astype(np.int32)
#     args = [network.tree_level_list[i], np.sum(np.sum(tree_tw, axis=1)>0), tree_tw.shape[0]]+ np.stack([mean, std]).T.flatten().tolist()
#     value = '%7s, %7d (%2d), %7d (%2d), %5.3f (%5.3f), %5.3f (%5.3f), %5.3f (%5.3f), %5.3f (%5.3f)' % tuple(args)
#     values.append(value.split(','))
    
print('%7s, %12s, %12s, %12s, %12s, %12s, %12s' % ('Model','True (Total)','Selected','Sensitivity','Specificity','gMeasure','Accuracy'))
print('---------------------------------------------------------------------------------------------------------------')
values = []
for i, (mean, std) in enumerate(zip(np.mean(accuracy_list, axis=0), np.std(accuracy_list, axis=0))):
    tree_tw = true_tree_weight_list[i].astype(np.int32)
    args = [network.tree_level_list[i], np.sum(np.sum(tree_tw, axis=1)>0), tree_tw.shape[0]]
    value = '%7s & %7d (%2d)' % tuple(args)
    value = '%s & %s \\\\' % (value, '&'.join(['%6.3f & %6.3f'%(m,s) for m, s in zip(mean, std)]))
    if i == 0: print('%10s & %s' % (model_aka, value))
    else: print('%10s & %s' % ('', value))
    values.append(value.split(','))
    
# if save: 
#     # filenametexa = '.'.join(["%s_select_texa_1" % filename.split('.')[0], filename.split('.')[1]])
#     colname = ['Tree','True (Total)','Selected','Sensitivity','Specificity','gMeasure','Accuracy']
#     with open('%s/%s' % (analysis_dir, filename), mode='a') as f:
#     #     f.write('---\ntitle: "%s texa selection ver.1"\noutput: html_document\n---\n\n' % filename.split('.')[0])
#         f.write('\n## Texa Selection Preformance (ver 1): %s\n\n' % model_aka)
#         f.write('| %s |\n' % ('|'.join([v for v in colname])))
#         f.write('|'+'---|'*len(colname)+'\n')
#         for value in values:
#             f.write('| %s |\n' % ('|'.join(value)))

  Model, True (Total),     Selected,  Sensitivity,  Specificity,     gMeasure,     Accuracy
---------------------------------------------------------------------------------------------------------------
    DNN+l1 &   Genus &      31 (48) &  0.002 &  0.008& 0.999 &  0.002& 0.011 &  0.043& 0.982 &  0.002 \\
           &  Family &      23 (40) &  0.001 &  0.007& 0.999 &  0.001& 0.004 &  0.030& 0.974 &  0.001 \\
           &   Order &       9 (23) &  0.000 &  0.004& 1.000 &  0.001& 0.000 &  0.011& 0.977 &  0.001 \\
           &   Class &       7 (17) &  0.000 &  0.000& 1.000 &  0.001& 0.000 &  0.000& 0.954 &  0.001 \\


In [19]:
accuracy_list = []
for fold in range(kfold):
    network.fold = fold
    network.load_weights(file_path_fold(model_weight_path, fold), verbose=False)
    true_tree_weight_list = network.load_true_tree_weight_list(path_info['data_info']['data_path'])
    tree_weight_list = network.get_trained_weight()
    accuracy_list.append(np.array(texa_selection_accuracy(tree_weight_list, true_tree_weight_list)))
accuracy_list = np.array(accuracy_list)[:,:,1:]

# print('%7s, %12s, %12s, %12s, %12s, %12s, %12s' % ('Model','True (Total)','Selected','Sensitivity','Specificity','gMeasure','Accuracy'))
# print('---------------------------------------------------------------------------------------------------------------')
# values = []
# for i, (mean, std) in enumerate(zip(np.mean(accuracy_list, axis=0), np.std(accuracy_list, axis=0))):
#     tree_tw = true_tree_weight_list[i].astype(np.int32)
#     args = [network.tree_level_list[i], np.sum(np.sum(tree_tw, axis=1)>0), tree_tw.shape[0]]+ np.stack([mean, std]).T.flatten().tolist()
#     value = '%7s, %7d (%2d), %7d (%2d), %5.3f (%5.3f), %5.3f (%5.3f), %5.3f (%5.3f), %5.3f (%5.3f)' % tuple(args)
#     values.append(value.split(','))
    
print('%7s, %12s, %12s, %12s, %12s, %12s, %12s' % ('Model','True (Total)','Selected','Sensitivity','Specificity','gMeasure','Accuracy'))
print('---------------------------------------------------------------------------------------------------------------')
values = []
for i, (mean, std) in enumerate(zip(np.mean(accuracy_list, axis=0), np.std(accuracy_list, axis=0))):
    tree_tw = true_tree_weight_list[i].astype(np.int32)
    args = [network.tree_level_list[i], np.sum(np.sum(tree_tw, axis=1)>0), tree_tw.shape[0]]
    value = '%7s & %7d (%2d)' % tuple(args)
    value = '%s & %s \\\\' % (value, '&'.join(['%6.3f & %6.3f'%(m,s) for m, s in zip(mean, std)]))
    if i == 0: print('%10s & %s' % (model_aka, value))
    else: print('%10s & %s' % ('', value))
    values.append(value.split(','))
    
# if save: 
#     # filenametexa = '.'.join(["%s_select_texa_1" % filename.split('.')[0], filename.split('.')[1]])
#     colname = ['Tree','True (Total)','Selected','Sensitivity','Specificity','gMeasure','Accuracy']
#     with open('%s/%s' % (analysis_dir, filename), mode='a') as f:
#     #     f.write('---\ntitle: "%s texa selection ver.1"\noutput: html_document\n---\n\n' % filename.split('.')[0])
#         f.write('\n## Texa Selection Preformance (ver 1): %s\n\n' % model_aka)
#         f.write('| %s |\n' % ('|'.join([v for v in colname])))
#         f.write('|'+'---|'*len(colname)+'\n')
#         for value in values:
#             f.write('| %s |\n' % ('|'.join(value)))

  Model, True (Total),     Selected,  Sensitivity,  Specificity,     gMeasure,     Accuracy
---------------------------------------------------------------------------------------------------------------
    DNN+l1 &   Genus &      31 (48) &  0.002 &  0.008& 0.999 &  0.002& 0.011 &  0.043& 0.982 &  0.002 \\
           &  Family &      23 (40) &  0.001 &  0.007& 0.999 &  0.001& 0.004 &  0.030& 0.974 &  0.001 \\
           &   Order &       9 (23) &  0.000 &  0.004& 1.000 &  0.001& 0.000 &  0.011& 0.977 &  0.001 \\
           &   Class &       7 (17) &  0.000 &  0.000& 1.000 &  0.001& 0.000 &  0.000& 0.954 &  0.001 \\
