# Prediction Algorithms Complete

## Preliminaries

In [14]:
import os
import sys
import networkx as nx
import numpy as np

from os.path import dirname
from networkx.drawing.nx_pydot import to_pydot

# Import morpheus
note_dir = os.getcwd()
root_dir = dirname(note_dir)
src_dir = os.path.join(root_dir, "src")

sys.path.append(src_dir)

import morpheus

from morpheus.tests import (default_dataset,
                            default_m_list_for_mercs,
                            random_m_list_for_mercs)

from morpheus.graph import (model_to_graph)

from morpheus.algo import (mi_algorithm)

### Methods

In [25]:
def to_dot(g, dname='tmp', fname='test', extension='.dot', return_fname=True, ortho=False):
    """
    Convert a graph to a dot file.
    """
    
    dot = nx.drawing.nx_pydot.to_pydot(g)
    dot.set('rankdir', 'BT')
    
    if ortho:
        dot.set('splines', 'ortho')
    
    full_fname = os.path.join(dname, fname+extension)
    
    print(full_fname)
    
    with open(full_fname, "w") as f:
        print(dot.to_string, file=f)
    
    if return_fname:
        return full_fname
    else:
        return

In [27]:
to_dot(g_res)

tmp/test.dot


FileNotFoundError: [Errno 2] No such file or directory: 'tmp/test.dot'

In [28]:
with open("tmp/lala.dot", "w") as f:
        print("yah", file=f)

FileNotFoundError: [Errno 2] No such file or directory: 'tmp/lala.dot'

## Collect Basic Building Blocks

In [10]:
data, _ = default_dataset()
data = data.values
m_list = default_m_list_for_mercs(data)

g_list = [model_to_graph(m, idx) for idx, m in enumerate(m_list)]


        Learning model with desc ids:    [0, 1, 2, 3, 4, 6, 7]
                            targ ids:    [5]
        

        Learning model with desc ids:    [0, 1, 2, 3, 4, 5, 6]
                            targ ids:    [7]
        

        Learning model with desc ids:    [0, 1, 2, 3, 4, 5, 7]
                            targ ids:    [6]
        

        Learning model with desc ids:    [1, 2, 3, 4, 5, 6, 7]
                            targ ids:    [0]
        

        Learning model with desc ids:    [0, 2, 3, 4, 5, 6, 7]
                            targ ids:    [1]
        

        Learning model with desc ids:    [0, 1, 2, 4, 5, 6, 7]
                            targ ids:    [3]
        

        Learning model with desc ids:    [0, 1, 3, 4, 5, 6, 7]
                            targ ids:    [2]
        

        Learning model with desc ids:    [0, 1, 2, 3, 5, 6, 7]
                            targ ids:    [4]
        


## MI

Testing our most basic prediction algorithm.

In [13]:
q_code = np.array([0,0,0,0,0,-1,1,1])
g_res = mi_algorithm(g_list, q_code)

In [23]:
fname = to_dot(g_res, fname='mi')

FileNotFoundError: [Errno 2] No such file or directory: 'tmp/mi.dot'

## MI

In [None]:
def mi_algorithm(g_list, q_code):
    
    q_desc, q_targ, q_miss = code_to_query(q_code)
    
    def criterion(g):
        outputs = set([g.nodes()[node]['idx'] for node, out_degree in g.out_degree()
                       if out_degree == 0
                       if g.nodes()[node]['kind']=='data'])
        
        
        return len(set(q_targ).intersection(outputs)) > 0
    
    g_relevant = [g for g in g_list if criterion(g)]
    g_relevant = [copy.deepcopy(g) for g in g_relevant]
    
    
    g_relevant = [add_imputation_nodes(g, q_desc) for g in g_relevant]
    
    
    result = reduce(nx.compose, g_relevant)
    
    return result

In [None]:
g_list = Gs
q_code = np.array([0,0,0,0,0,-1,1,1])
g_res = mi_algorithm(g_list, q_code)

In [None]:
# Plot
X = to_pydot(g_res)
X.set('rankdir', 'BT')
#X.set('splines', 'ortho')

with open("tmp/mi.dot", "w") as text_file:
    print(X.to_string(), file=text_file)

!dot -T png ./tmp/mi.dot > ./tmp/mi.png  # Bash command

from IPython.display import Image, display
from IPython.core.display import HTML 

display(Image('tmp/mi.png', unconfined=True))

## MA-algorithm

In [None]:
def ma_algorithm(g_list, q_code, init_threshold=1.0, stepsize=0.1):
    
    q_desc, q_targ, q_miss = code_to_query(q_code)
    
    def criterion(g):
        inputs = set([g.nodes()[node]['idx']
                      for node, in_degree in g.in_degree()
                      if in_degree == 0
                      if g.nodes()[node]['kind']=='data'])
        
        outputs = set([g.nodes()[node]['idx']
                       for node, out_degree in g.out_degree()
                       if out_degree == 0
                       if g.nodes()[node]['kind']=='data'])
        
        yes_no = len(set(q_targ).intersection(outputs)) > 0
        
        quantifier = len(set(q_desc).intersection(inputs))/len(inputs)
        
        result = int(yes_no) * quantifier
        
        msg = """
        yes_no:       {}
        quantifier:   {}
        result:       {}
        """.format(yes_no, quantifier, result)
        print(msg)
        
        return result
    
    
    thresholds = np.clip(np.arange(init_threshold, -stepsize, -stepsize), 0, 1)
    
    for thr in thresholds:
        g_relevant = [g for g in g_list if criterion(g) > thr]
        if len(g_relevant) > 0:
            print('we have found a model at threshold: {}'.format(thr))
            break
    
    g_relevant = [copy.deepcopy(g) for g in g_relevant]
    g_relevant = [add_imputation_nodes(g, q_desc) for g in g_relevant]
    result = reduce(nx.compose, g_relevant)
    
    add_merge_nodes(result)
    
    return result

In [None]:
m_list = random_m_list_for_mercs(data)
m_list += random_m_list_for_mercs(data)

Gs = [model_to_graph(m, idx) for idx, m in enumerate(m_list)]
Gs = [fix_layout(g) for g in Gs]

In [None]:
q_code = np.array([-1,0,0,0,0,-1,1,0])
g_res = ma_algorithm(Gs, q_code, init_threshold = 1)

In [None]:
# Plot
X = to_pydot(g_res)
X.set('rankdir', 'BT')
#X.set('splines', 'ortho')

with open("tmp/test.dot", "w") as text_file:
    print(X.to_string(), file=text_file)

!dot -T png ./tmp/test.dot > ./tmp/test.png  # Bash command

from IPython.display import Image, display
from IPython.core.display import HTML 

display(Image('tmp/test.png', unconfined=True))

## MRAI

In [None]:
def mrai_algorithm(g_list, q_code, init_threshold=1.0, stepsize=0.1):
    
    q_desc, q_targ, q_miss = code_to_query(q_code)
    
    def criterion(g):        
        outputs = set([g.nodes()[node]['idx']
                       for node, out_degree in g.out_degree()
                       if out_degree == 0
                       if g.nodes()[node]['kind']=='data'])
        
        yes_no = len(set(q_targ).intersection(outputs)) > 0
        
        feature_importances_available = [g.nodes()[node]['fi']
                                         for node, in_degree in g.in_degree()
                                         if in_degree == 0
                                         if g.nodes()[node]['kind']=='data'
                                         if g.nodes()[node]['idx'] in q_desc]
        
        quantifier = np.sum(feature_importances_available)
        
        result = int(yes_no) * quantifier
        
        msg = """
        yes_no:       {}
        quantifier:   {}
        result:       {}
        """.format(yes_no, quantifier, result)
        print(msg)
        
        return result
    
    
    thresholds = np.clip(np.arange(init_threshold, -stepsize, -stepsize), 0, 1)
    
    for thr in thresholds:
        g_relevant = [g for g in g_list if criterion(g) > thr]
        if len(g_relevant) > 0:
            print('we have found a model at threshold: {}'.format(thr))
            break
    
    g_relevant = [copy.deepcopy(g) for g in g_relevant]
    g_relevant = [add_imputation_nodes(g, q_desc) for g in g_relevant]
    result = reduce(nx.compose, g_relevant)
    
    add_merge_nodes(result)
    
    return result

In [None]:
q_code = np.array([-1,0,0,0,0,-1,1,0])
g_res = mrai_algorithm(Gs, q_code, init_threshold = 1, stepsize=0.01)

In [None]:
for e in g_res.edges():
    g_res.edges()[e]['label'] = "{0:.2f}".format(g_res.edges()[e].get('fi', 0))

# Plot
X = to_pydot(g_res)
X.set('rankdir', 'BT')
#X.set('splines', 'ortho')

with open("tmp/test.dot", "w") as text_file:
    print(X.to_string(), file=text_file)

!dot -T png ./tmp/test.dot > ./tmp/test.png  # Bash command

from IPython.display import Image, display
from IPython.core.display import HTML 

display(Image('tmp/test.png', unconfined=True))

In [None]:
g_extra = Gs[15].copy()
g_extra.nodes()['d-05']['fi'] = g_extra.nodes()['d-05']['fi'] - 0.2
g_extra.nodes()['d-03']['fi'] = g_extra.nodes()['d-03']['fi'] + 0.2

Gs[15].nodes()['d-05']

In [None]:
g_extra.nodes()['f-15'].rename('x')

In [None]:
g_extra.nodes()['d-05']

In [None]:
Gs.append(g_extra)