# Unittests - mercs/algo/inference

## Imports

In [1]:
# Standard imports
import os
import sys
from os.path import dirname


# Custom imports
root_directory = dirname(os.getcwd())
for dname in {'src'}:
    sys.path.insert(0, os.path.join(root_directory, dname))

from mercs.algo.inference import *
from mercs.utils.utils import encode_attribute

import datasets as ds
from sklearn.preprocessing import Imputer

## Actual Tests

### Perform Imputation

In [2]:
train, test = ds.load_nursery()

load_example_dataset is loading fname: /cw/dtailocal/Files/git/mercs-v5/resc/data/nursery_train.csv

load_example_dataset is loading fname: /cw/dtailocal/Files/git/mercs-v5/resc/data/nursery_test.csv



In [3]:
query_code = [0,-1,-1,0,0,0,0,0,1]

In [4]:
imputator = Imputer(missing_values='NaN',
                    strategy='most_frequent',
                    axis=0)
imputator.fit(train)

Imputer(axis=0, copy=True, missing_values='NaN', strategy='most_frequent',
    verbose=0)

In [5]:
obs = perform_imputation(train, query_code, imputator)

In [6]:
obs.shape

(11680, 9)

In [7]:
train.shape

(11680, 9)

In [8]:
assert train.shape == obs.shape

In [9]:
np.array(query_code)==1

array([False, False, False, False, False, False, False, False,  True])

In [10]:
obs

array([[2., 4., 1., ..., 0., 2., 2.],
       [2., 4., 1., ..., 0., 1., 1.],
       [2., 4., 1., ..., 0., 0., 0.],
       ...,
       [0., 4., 1., ..., 1., 2., 3.],
       [0., 4., 1., ..., 1., 1., 3.],
       [0., 4., 1., ..., 1., 0., 0.]])

In [11]:
missing_attribute_encoding = encode_attribute(0, [1], [2])

In [12]:
boolean_missing = np.array(query_code)== missing_attribute_encoding
boolean_missing

array([False,  True,  True, False, False, False, False, False, False])

In [13]:
missing_count = np.sum(boolean_missing)
missing_count

2

In [14]:
for x in obs[:,boolean_missing].T:
    assert len(np.unique(x)) == 1

In [15]:
len(np.unique(obs[:,np.array(query_code)==-1])) <= missing_count

True

In [16]:
np.unique(obs[:,np.array(query_code)==-1])

array([1., 4.])

In [17]:
def test_perform_imputation():
    # Prelims
    train, test = ds.load_nursery()
    query_code = [0,-1,-1,-1,-1,-1,0,0,1]
    
    imputator = Imputer(missing_values='NaN',
                    strategy='most_frequent',
                    axis=0)
    imputator.fit(train)
    
    # Actual test
    obs = perform_imputation(test, query_code, imputator)
    
    assert test.shape == obs.shape
    assert isinstance(obs, np.ndarray)
    
    boolean_missing = missing_attribute_encoding
    
    for row in obs[:,boolean_missing].T:
        assert len(np.unique(x)) == 1

### Init_predictions

In [18]:
obs = init_predictions(10, 5, type=np.float64)

In [19]:
obs.shape

(10, 5)

In [21]:
obs.dtype == np.float64

True

In [22]:
assert isinstance(obs[0,0], np.float64)

In [None]:
act_att_idx = [4,5,6]

In [None]:
np.max(act_att_idx)

In [None]:
obs.shape[1]

### Update_X

In [30]:
X = np.zeros((100, 4), dtype=np.float64)
Y = np.random.rand(100,2)
act_att_idx = [1,2]

In [31]:
obs = update_X(X, Y, act_att_idx)
obs

array([[0.00000000e+00, 6.05718611e-01, 7.88301472e-01, 0.00000000e+00],
       [0.00000000e+00, 3.10960914e-01, 1.49315912e-01, 0.00000000e+00],
       [0.00000000e+00, 2.18049073e-01, 3.40194977e-01, 0.00000000e+00],
       [0.00000000e+00, 7.56018419e-01, 6.64330171e-02, 0.00000000e+00],
       [0.00000000e+00, 5.30758250e-01, 9.97458888e-01, 0.00000000e+00],
       [0.00000000e+00, 2.37794678e-01, 5.80214702e-01, 0.00000000e+00],
       [0.00000000e+00, 3.12743298e-01, 7.87082174e-01, 0.00000000e+00],
       [0.00000000e+00, 8.08604461e-01, 4.60060939e-01, 0.00000000e+00],
       [0.00000000e+00, 2.52478001e-01, 2.17323879e-01, 0.00000000e+00],
       [0.00000000e+00, 4.70062894e-01, 7.28507809e-01, 0.00000000e+00],
       [0.00000000e+00, 9.75526060e-01, 2.92934149e-01, 0.00000000e+00],
       [0.00000000e+00, 6.27731269e-01, 1.36658308e-01, 0.00000000e+00],
       [0.00000000e+00, 4.31176502e-01, 9.58404287e-01, 0.00000000e+00],
       [0.00000000e+00, 6.65809647e-01, 9.55138851e

In [50]:
test_col_0 = obs[:,act_att_idx[0]] == Y.T[0]
test_col_1 = obs[:,act_att_idx[1]] == Y.T[1]

assert obs.shape == X.shape
assert test_col_0.all()
assert test_col_1.all()


In [49]:
test_col_0 = obs[:,act_att_idx[0]] == Y.T[0]
test_col_1 = obs[:,act_att_idx[1]] == Y.T[1]
test_col_1.all()

True

In [45]:
obs[:,act_att_idx[0]] == Y.T[0].all()

True

array([0.60571861, 0.31096091, 0.21804907, 0.75601842, 0.53075825,
       0.23779468, 0.3127433 , 0.80860446, 0.252478  , 0.47006289,
       0.97552606, 0.62773127, 0.4311765 , 0.66580965, 0.526801  ,
       0.91283395, 0.5935392 , 0.57307027, 0.55181844, 0.22437406,
       0.0704581 , 0.23606151, 0.3685945 , 0.30000183, 0.33976046,
       0.8986953 , 0.23606072, 0.42643783, 0.51713653, 0.55816805,
       0.18969045, 0.04158044, 0.85994451, 0.32817296, 0.75883601,
       0.55108011, 0.96854986, 0.57738196, 0.76551922, 0.17795686,
       0.19956661, 0.35990381, 0.48371261, 0.33313732, 0.33559983,
       0.43016439, 0.0309014 , 0.81519713, 0.29410193, 0.0743841 ,
       0.18069433, 0.55707533, 0.92480572, 0.32039241, 0.25558106,
       0.2301921 , 0.6072164 , 0.86803725, 0.54053836, 0.12930393,
       0.33043409, 0.77227304, 0.99651405, 0.38714134, 0.44982975,
       0.15621563, 0.88344047, 0.51087332, 0.53251536, 0.38020546,
       0.29925054, 0.64145624, 0.69557461, 0.66207471, 0.45995

In [34]:
assert 

TypeError: only integer scalar arrays can be converted to a scalar index