# Unittests - mercs/algo/inference

## Imports

In [1]:
# Standard imports
import os
import sys
from os.path import dirname


# Custom imports
root_directory = dirname(os.getcwd())
for dname in {'src'}:
    sys.path.insert(0, os.path.join(root_directory, dname))

from mercs.algo.inference import *
from mercs.utils.utils import encode_attribute

import datasets as ds
from sklearn.preprocessing import Imputer

  from numpy.core.umath_tests import inner1d


## Actual Tests

### Perform Imputation

In [2]:
train, test = ds.load_nursery()

load_example_dataset is loading fname: /cw/dtailocal/Files/git/mercs-v5/resc/data/nursery_train.csv

load_example_dataset is loading fname: /cw/dtailocal/Files/git/mercs-v5/resc/data/nursery_test.csv



In [3]:
query_code = [0,-1,-1,0,0,0,0,0,1]

In [4]:
imputator = Imputer(missing_values='NaN',
                    strategy='most_frequent',
                    axis=0)
imputator.fit(train)

Imputer(axis=0, copy=True, missing_values='NaN', strategy='most_frequent',
    verbose=0)

In [5]:
obs = perform_imputation(train, query_code, imputator)

In [6]:
obs.shape

(11680, 9)

In [7]:
train.shape

(11680, 9)

In [8]:
assert train.shape == obs.shape

In [9]:
np.array(query_code)==1

array([False, False, False, False, False, False, False, False,  True])

In [10]:
obs

array([[2., 4., 1., ..., 0., 2., 2.],
       [2., 4., 1., ..., 0., 1., 1.],
       [2., 4., 1., ..., 0., 0., 0.],
       ...,
       [0., 4., 1., ..., 1., 2., 3.],
       [0., 4., 1., ..., 1., 1., 3.],
       [0., 4., 1., ..., 1., 0., 0.]])

In [11]:
missing_attribute_encoding = encode_attribute(0, [1], [2])

In [12]:
boolean_missing = np.array(query_code)== missing_attribute_encoding
boolean_missing

array([False,  True,  True, False, False, False, False, False, False])

In [13]:
missing_count = np.sum(boolean_missing)
missing_count

2

In [14]:
for x in obs[:,boolean_missing].T:
    assert len(np.unique(x)) == 1

In [15]:
len(np.unique(obs[:,np.array(query_code)==-1])) <= missing_count

True

In [16]:
np.unique(obs[:,np.array(query_code)==-1])

array([1., 4.])

In [17]:
def test_perform_imputation():
    # Prelims
    train, test = ds.load_nursery()
    query_code = [0,-1,-1,-1,-1,-1,0,0,1]
    
    imputator = Imputer(missing_values='NaN',
                    strategy='most_frequent',
                    axis=0)
    imputator.fit(train)
    
    # Actual test
    obs = perform_imputation(test, query_code, imputator)
    
    assert test.shape == obs.shape
    assert isinstance(obs, np.ndarray)
    
    boolean_missing = missing_attribute_encoding
    
    for row in obs[:,boolean_missing].T:
        assert len(np.unique(x)) == 1

### Init_predictions

In [18]:
obs = init_predictions(10, 5, type=np.float64)

In [19]:
obs.shape

(10, 5)

In [20]:
obs.dtype == np.float64

True

In [21]:
assert isinstance(obs[0,0], np.float64)

In [22]:
act_att_idx = [4,5,6]

In [23]:
np.max(act_att_idx)

6

In [24]:
obs.shape[1]

5

### Update_X

In [25]:
X = np.zeros((100, 4), dtype=np.float64)
Y = np.random.rand(100,2)
act_att_idx = [1,2]

In [26]:
obs = update_X(X, Y, act_att_idx)
obs

array([[0.        , 0.74065535, 0.10389852, 0.        ],
       [0.        , 0.97966293, 0.84509483, 0.        ],
       [0.        , 0.38775502, 0.22492773, 0.        ],
       [0.        , 0.69416776, 0.12077078, 0.        ],
       [0.        , 0.03069462, 0.67194367, 0.        ],
       [0.        , 0.39686875, 0.24681048, 0.        ],
       [0.        , 0.50689292, 0.65792995, 0.        ],
       [0.        , 0.97053075, 0.09334438, 0.        ],
       [0.        , 0.0629153 , 0.22118842, 0.        ],
       [0.        , 0.2819322 , 0.72296691, 0.        ],
       [0.        , 0.95484094, 0.0665382 , 0.        ],
       [0.        , 0.0894555 , 0.13572269, 0.        ],
       [0.        , 0.64514576, 0.04806034, 0.        ],
       [0.        , 0.46015432, 0.44688079, 0.        ],
       [0.        , 0.87551357, 0.99074728, 0.        ],
       [0.        , 0.14627225, 0.9309965 , 0.        ],
       [0.        , 0.06040654, 0.26502823, 0.        ],
       [0.        , 0.03905386,

In [27]:
test_col_0 = obs[:,act_att_idx[0]] == Y.T[0]
test_col_1 = obs[:,act_att_idx[1]] == Y.T[1]

assert obs.shape == X.shape
assert test_col_0.all()
assert test_col_1.all()


### predict_values_from_numer

In [28]:
nb_atts=4
numer_res = [None]*nb_atts

for i in range(nb_atts):
    numer_res[i]=np.random.rand(100,1)
    
counts = np.random.randint(1,10, size=(nb_atts))

obs = predict_values_from_numer(numer_res, counts)

assert obs.shape[0] == numer_res[0].shape[0]
assert obs.shape[1] == nb_atts
assert isinstance(obs, np.ndarray)

### predict_values_from_proba

In [29]:
nb_atts = 5
nb_classes = 3
nb_samples = 100
proba_res = [None]*nb_atts
lab_res = [None]*nb_atts

for i in range(nb_atts):
    proba_res[i]=np.random.rand(nb_samples, nb_classes)
    
for i in range(nb_atts):
    lab_res[i]=np.random.choice(10, size=nb_classes, replace=False) 

In [30]:
lab_res

[array([7, 2, 9]),
 array([9, 2, 1]),
 array([4, 7, 6]),
 array([3, 4, 2]),
 array([9, 2, 5])]

In [31]:
obs = predict_values_from_proba(proba_res, lab_res)

assert obs.shape == (nb_samples, nb_atts)

for a in range(nb_atts):
    assert np.array_equal(np.unique(obs[:,a]),np.unique(lab_res[a]))

### merge_numer

In [32]:
nb_samples = 100
nb_targ_res = 6
numer_res = [None]*nb_targ_res

for i in range(nb_targ_res):
    numer_res[i] = np.random.rand(nb_samples, 1)

In [33]:
# Multi-target sklearn output
nb_targ_mod_a = 3
numer_mod_a = np.random.rand(nb_samples, nb_targ_mod_a)

# Single-target sklearn output
nb_targ_mod_b = 1
numer_mod_b = np.random.rand(nb_samples, nb_targ_mod_b) 
numer_mod_b = np.squeeze(numer_mod_b) 

# Our own output format
nb_targ_mod_c = 2
numer_mod_c = [None]*nb_targ_mod_c
for i in range(nb_targ_mod_c):
    numer_mod_c[i] = np.random.rand(100, 1)

In [34]:
t_idx_res = 2
t_idx_mod = 0

In [35]:
for numer_mod in [numer_mod_a, numer_mod_b, numer_mod_c]:
    obs = merge_numer(numer_res, numer_mod, t_idx_res, t_idx_mod)
    
    assert isinstance(obs, list)
    assert len(obs) == nb_targ_res

### merge_proba

In [36]:
nb_samples = 100
nb_targ_res = 6
nb_classes = 3
proba_res = [None]*nb_targ_res
lab_res = [None]*nb_targ_res

for i in range(nb_targ_res):
    proba_res[i] = np.random.rand(nb_samples, nb_classes)
    
for i in range(nb_targ_res):
    lab_res[i]=np.random.choice(10, size=nb_classes, replace=False) 

In [37]:
lab_res[2]

array([7, 0, 5])

In [38]:
# list (our own output or multi-target sklearn)
mutual_targets_a = [0,2,4]
nb_targ_mod_a = 3
assert len(mutual_targets_a) == nb_targ_mod_a

nb_classes_mod_a = 2
proba_mod_a = [None]*nb_targ_mod_a
lab_mod_a = [None]*nb_targ_mod_a

for i in range(nb_targ_mod_a):
    proba_mod_a[i] = np.random.rand(nb_samples, nb_classes_mod_a)

lab_mod_a = [lab_res[idx][0:nb_classes_mod_a] for idx in mutual_targets_a]

In [39]:
lab_res

[array([2, 0, 6]),
 array([7, 3, 8]),
 array([7, 0, 5]),
 array([3, 7, 1]),
 array([3, 5, 2]),
 array([2, 1, 3])]

In [40]:
lab_mod_a

[array([2, 0]), array([7, 0]), array([3, 5])]

In [41]:
# np.ndarray (single-target sklearn)
mutual_targets_b = [4]
nb_targ_mod_b = 1
assert len(mutual_targets_b) == nb_targ_mod_b

nb_classes_mod_b = 2

proba_mod_b = np.random.rand(nb_samples, nb_classes_mod_b)

lab_mod_b = [lab_res[idx][-nb_classes_mod_b:] for idx in mutual_targets_b]

In [42]:
lab_mod_b

[array([5, 2])]

In [48]:
proba_res[0]

array([[0.93851448, 0.64181947, 0.17731285],
       [1.81341204, 0.51798434, 0.41828327],
       [0.53609   , 1.52948709, 0.62482418],
       [0.86260705, 0.74097469, 0.93238353],
       [1.07154474, 1.3332534 , 0.449467  ],
       [1.05084136, 1.20853829, 0.77735493],
       [0.42967804, 0.46215878, 0.50575157],
       [0.59074589, 1.79009754, 0.74696919],
       [1.11813727, 0.32338991, 0.9421114 ],
       [1.94640916, 1.51038603, 0.53561217],
       [0.50839095, 1.41475447, 0.02442977],
       [0.93037037, 0.93439186, 0.71430654],
       [1.04903896, 1.320502  , 0.5644284 ],
       [0.9967985 , 0.49347106, 0.29116058],
       [0.89598674, 0.83628399, 0.60534455],
       [1.30518613, 0.38575015, 0.79700773],
       [0.72958338, 1.19893214, 0.33440094],
       [1.14736499, 1.20136355, 0.61739676],
       [1.46826376, 0.21837325, 0.36105468],
       [0.72125009, 1.18997503, 0.4643777 ],
       [0.66075215, 0.80808175, 0.93600208],
       [0.91047429, 0.70409591, 0.07936861],
       [1.

In [44]:
proba_mod_a[0]

array([[0.01098234, 0.41361379],
       [0.88676695, 0.02552948],
       [0.51421082, 0.85077256],
       [0.3472294 , 0.57519507],
       [0.99159677, 0.40536494],
       [0.37546948, 0.63445867],
       [0.38783987, 0.46032035],
       [0.05746638, 0.95459376],
       [0.40401123, 0.04961554],
       [0.9530196 , 0.6299169 ],
       [0.13160144, 0.73449018],
       [0.36117616, 0.70098249],
       [0.42485567, 0.70853248],
       [0.12608448, 0.32585432],
       [0.28133918, 0.03864884],
       [0.80066515, 0.12941358],
       [0.28759482, 0.37909461],
       [0.56587943, 0.32947324],
       [0.50071569, 0.18944879],
       [0.31227447, 0.22177257],
       [0.63236039, 0.22170186],
       [0.28694635, 0.03469799],
       [0.42774277, 0.3323028 ],
       [0.37765727, 0.79346203],
       [0.54355188, 0.90213794],
       [0.09908222, 0.55207693],
       [0.77358196, 0.14559181],
       [0.9573476 , 0.69331137],
       [0.84654035, 0.64367139],
       [0.25306192, 0.44426332],
       [0.

In [45]:
t_idx_mod = 0
t_idx_res = mutual_targets_a[t_idx_mod]
print(t_idx_res)
obs = merge_proba(proba_res, proba_mod_a, lab_res, lab_mod_a, t_idx_res, t_idx_mod)


for x in obs:
    assert isinstance(x,np.ndarray)
    assert x.shape == (nb_samples, nb_classes)

0


In [52]:
t_idx_mod = 0
t_idx_res = mutual_targets_b[t_idx_mod]
print(t_idx_res)
obs = merge_proba(proba_res, proba_mod_b, lab_res, lab_mod_b, t_idx_res, t_idx_mod)


for x in obs:
    assert isinstance(x,np.ndarray)
    assert x.shape == (nb_samples, nb_classes)

4
