In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
import networkx as nx
import toolz as tz

from eden.util import configure_logging
import logging
logger = logging.getLogger()
configure_logging(logger, verbosity=1)

import warnings
warnings.filterwarnings('ignore')

from IPython.core.display import HTML
HTML('<style>.container { width:95% !important; }</style><style>.output_png {display: table-cell;text-align: center;vertical-align: middle;}</style>')

# Data setup

In [None]:
%%time

EXPERIMENT_TYPE = 'ARTIFICIAL'

if EXPERIMENT_TYPE == 'ARTIFICIAL':
    from utils_artificial import build_artificial_experiment
    from utils_artificial import display_ktop_graphs, draw_graphs, draw_history
    from utils_artificial import display_score_statistics
    from utils_artificial import remove_duplicates
    res = build_artificial_experiment(
        GRAPH_TYPE='degree', # path  tree  degree  regular  dense
        n_init_instances=10, 
        n_domain_instances=100,
        alphabet_size=4, 
        diversity=5, 
        max_score_threshold=.7)
    
if EXPERIMENT_TYPE == 'CHEMICAL':
    from utils_chemoinformatics import build_chemical_experiment
    from utils_chemoinformatics import display_ktop_graphs, draw_graphs, draw_history
    from utils_chemoinformatics import display_score_statistics
    from utils_chemoinformatics import remove_duplicates
    res = build_chemical_experiment(
        assay_id='743219',  # assay_ids = ['624466','492992','463230','651741','743219','588350','492952','624249','463213','2631','651610']
        n_init_instances=50, 
        n_domain_instances=300,
        max_score_threshold=.8,
        n_targets=2)

init_graphs, domain_graphs, oracle_func, target_graph = res
print('Generated %d graphs'%len(domain_graphs))
display_score_statistics(domain_graphs, oracle_func)

print('Best graphs in initial sample of %d'%len(init_graphs))
display_ktop_graphs(init_graphs+[target_graph], oracle_func, n_max=6)

# Experiments

In [None]:
%%time
from ego.optimization.optimize import optimizer_setup, optimize

# performance monitor
from utils_monitor_with_target import make_monitor
monitor = make_monitor(target_graph, oracle_func, draw_graphs, draw_history)

from ego.setup import *
decomposition_function = do_decompose(
    decompose_nodes_and_edges, 
    decompose_path(length=2), 
    decompose_neighborhood, 
    decompose_neighborhood(radius=2), 
    decompose_neighborhood(radius=3), 
    decompose_cycles)

decomposition_score_estimator = decomposition_function
decomposition_fixed_grammar = decomposition_function
decomposition_adaptive_grammar = decomposition_function

neighborhood_estimators, score_estimator = optimizer_setup(
    decomposition_score_estimator=decomposition_score_estimator,
    use_UCB_estimator=False,
    use_RandomForest_estimator=True,
    use_Linear_estimator=False,
    use_EI_estimator=False,
    n_estimators=200,
    exploration_vs_exploitation=0,
    
    use_edge_swapping=True,
    n_neighbors_edge_swapping=None, n_edge_swapping=1,

    use_edge_label_swapping=False,
    n_neighbors_edge_label_swapping=None, n_edge_label_swapping=1,

    use_edge_label_mutation=False,
    n_neighbors_edge_mutation=100, n_edge_mutation=1,

    use_edge_removal=True,
    n_neighbors_edge_removal=None, n_edge_removal=1,

    use_edge_addition=True,
    n_neighbors_edge_addition=100, n_edge_addition=1,

    use_node_label_swapping=False,
    n_neighbors_node_label_swapping=None, n_node_label_swapping=1,

    use_node_label_mutation=True,
    n_neighbors_node_mutation=100, n_node_mutation=1,
    
    use_node_removal=False,
    n_neighbors_node_removal=None, n_node_removal=1,
    
    use_node_addition=True,
    n_neighbors_node_addition=100, n_node_addition=1,
    
    use_fixed_grammar=True,
    n_neighbors_fixed_grammar=None,
    conservativeness_fixed_grammar=3,
    context_size_fixed_grammar=1,
    decomposition_fixed_grammar=decomposition_fixed_grammar,
    domain_graphs_fixed_grammar=domain_graphs,

    use_adaptive_grammar=True,
    n_neighbors_adaptive_grammar=None,
    conservativeness_adaptive_grammar=2,
    context_size_adaptive_grammar=1,
    part_size_adaptive_grammar=4,
    decomposition_adaptive_grammar=decomposition_adaptive_grammar)

graphs = init_graphs[:]
graphs = optimize(
    graphs, 
    oracle_func, 
    n_iter=100, 
    n_queries_to_oracle_per_iter=200,
    frac_instances_to_remove_per_iter=.5,
    sample_size_to_perturb=4, 
    n_steps_driven_by_estimator=1,
    sample_size_for_grammars=40,
    neighborhood_estimators=neighborhood_estimators,
    score_estimator=score_estimator, 
    monitor=monitor)

In [None]:
print('Target')
display_ktop_graphs([target_graph], oracle_func, n_max=6)

print('Evolution of best synthesized graph in set of size %d'%len(graphs))
draw_history(graphs, oracle_func)

print('Best synthesized graphs in set of size %d'%len(graphs))
display_ktop_graphs(graphs, oracle_func, n_max=6*2)

---