# Selection Algorithm

In [1]:
import numpy as np

from mercs.algo.selection import _set_missing, _ensure_desc_atts, _set_nb_targets

TARG_ENCODING = 1

In [30]:
def _nb_models_and_deficit(nb_targets, potential_targets):
    
    nb_potential_targets = potential_targets.shape[0]
    
    nb_models_with_regular_nb_targets =  nb_potential_targets // nb_targets
    nb_leftover_targets = nb_potential_targets % nb_targets
    
    if nb_leftover_targets:
        nb_models = nb_models_with_regular_nb_targets + 1
        deficit = nb_targets - nb_leftover_targets
    else:
        nb_models = nb_models_with_regular_nb_targets
        deficit = 0
    
    return nb_models, deficit

def _init(nb_models, nb_attributes):
    return np.zeros((nb_models, nb_attributes), dtype=int) 

def _target_sets(potential_targets, nb_targets, nb_models, deficit):
    np.random.shuffle(potential_targets)
    choices = np.r_[potential_targets, potential_targets[:deficit]]
    
    return np.random.choice(choices, replace=False, size=(nb_models, nb_targets))

def _set_targets(m_codes, target_sets):
    
    row_idx = np.arange(m_codes.shape[0]).reshape(-1,1)
    col_idx = target_sets
    
    m_codes[row_idx, col_idx] = TARG_ENCODING
    return m_codes


def _single_iteration_random_selection(nb_attributes, nb_targets, fraction_missing, potential_targets):
    nb_models, deficit = _nb_models_and_deficit(nb_targets, potential_targets)

    # Init
    m_codes = _init(nb_models, nb_attributes)

    target_sets = _target_sets(potential_targets, nb_targets, nb_models, deficit)

    m_codes = _set_targets(m_codes, target_sets)
    m_codes = _set_missing(m_codes, fraction_missing)
    m_codes.astype(int)
    return m_codes

In [31]:
def base_selection_algorithm(metadata, nb_targets=1, nb_iterations=1, random_state=997):
    m_codes = random_selection_algorithm(metadata, nb_targets=nb_targets, nb_iterations=nb_iterations, fraction_missing=0., random_state=random_state)
    return m_codes

In [76]:
def random_selection_algorithm(metadata, nb_targets=1, nb_iterations=1, fraction_missing=0.2, random_state=997):
    
    # Init
    np.random.seed(random_state)
    nb_attributes = metadata["n_attributes"]
    nb_targets = _set_nb_targets(nb_targets, nb_attributes)

    codes = []
    for attribute_kind in {'nominal_attributes', 'numeric_attributes'}:
        potential_targets = np.array(list(metadata[attribute_kind]))
        for iterations in range(nb_iterations):
            m_codes = _single_iteration_random_selection(nb_attributes, nb_targets, fraction_missing, potential_targets)
            codes.append(m_codes)

    m_codes = np.vstack(codes)
    
    m_codes = ensure_desc_atts(m_codes)
    return m_codes

In [34]:
metadata

{'n_attributes': 10,
 'nominal_attributes': {0},
 'numeric_attributes': {7, 8, 9}}

In [35]:
metadata={'n_attributes': 10,
          'nominal_attributes': {0,1,2,3,4,5,6},
          'numeric_attributes': {7, 8, 9}}

In [75]:
m_codes = base_selection_algorithm(metadata, nb_targets=2, nb_iterations=1, random_state=3)
m_codes

array([[0., 0., 0., 0., 0., 0., 0., 1., 0., 1.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 1.],
       [0., 0., 0., 1., 0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 1., 0., 0., 0., 0., 0.],
       [1., 1., 0., 0., 0., 0., 0., 0., 0., 0.]])

In [48]:
np.where(m_codes==1)[1]

array([7, 8, 9, 0, 2, 5, 1, 4, 6, 0, 3, 4])