# Unittests - mercs/algo/inference

## Imports

In [2]:
# Standard imports
import os
import sys
from os.path import dirname


# Custom imports
root_directory = dirname(os.getcwd())
for dname in {'src'}:
    sys.path.insert(0, os.path.join(root_directory, dname))

from mercs.algo.inference import *
from mercs.utils.utils import encode_attribute

import datasets as ds
from sklearn.preprocessing import Imputer

  from numpy.core.umath_tests import inner1d


## Actual Tests

### Perform Imputation

In [3]:
train, test = ds.load_nursery()

load_example_dataset is loading fname: /cw/dtailocal/Files/git/mercs-v5/resc/data/nursery_train.csv

load_example_dataset is loading fname: /cw/dtailocal/Files/git/mercs-v5/resc/data/nursery_test.csv



In [4]:
query_code = [0,-1,-1,0,0,0,0,0,1]

In [5]:
imputator = Imputer(missing_values='NaN',
                    strategy='most_frequent',
                    axis=0)
imputator.fit(train)

Imputer(axis=0, copy=True, missing_values='NaN', strategy='most_frequent',
    verbose=0)

In [6]:
obs = perform_imputation(train, query_code, imputator)

In [7]:
obs.shape

(11680, 9)

In [8]:
train.shape

(11680, 9)

In [9]:
assert train.shape == obs.shape

In [10]:
np.array(query_code)==1

array([False, False, False, False, False, False, False, False,  True])

In [11]:
obs

array([[2., 4., 1., ..., 0., 2., 2.],
       [2., 4., 1., ..., 0., 1., 1.],
       [2., 4., 1., ..., 0., 0., 0.],
       ...,
       [0., 4., 1., ..., 1., 2., 3.],
       [0., 4., 1., ..., 1., 1., 3.],
       [0., 4., 1., ..., 1., 0., 0.]])

In [12]:
missing_attribute_encoding = encode_attribute(0, [1], [2])

In [13]:
boolean_missing = np.array(query_code)== missing_attribute_encoding
boolean_missing

array([False,  True,  True, False, False, False, False, False, False])

In [14]:
missing_count = np.sum(boolean_missing)
missing_count

2

In [15]:
for x in obs[:,boolean_missing].T:
    assert len(np.unique(x)) == 1

In [16]:
len(np.unique(obs[:,np.array(query_code)==-1])) <= missing_count

True

In [17]:
np.unique(obs[:,np.array(query_code)==-1])

array([1., 4.])

In [18]:
def test_perform_imputation():
    # Prelims
    train, test = ds.load_nursery()
    query_code = [0,-1,-1,-1,-1,-1,0,0,1]
    
    imputator = Imputer(missing_values='NaN',
                    strategy='most_frequent',
                    axis=0)
    imputator.fit(train)
    
    # Actual test
    obs = perform_imputation(test, query_code, imputator)
    
    assert test.shape == obs.shape
    assert isinstance(obs, np.ndarray)
    
    boolean_missing = missing_attribute_encoding
    
    for row in obs[:,boolean_missing].T:
        assert len(np.unique(x)) == 1

### Init_predictions

In [19]:
obs = init_predictions(10, 5, type=np.float64)

In [20]:
obs.shape

(10, 5)

In [21]:
obs.dtype == np.float64

True

In [22]:
assert isinstance(obs[0,0], np.float64)

In [23]:
act_att_idx = [4,5,6]

In [24]:
np.max(act_att_idx)

6

In [25]:
obs.shape[1]

5

### Update_X

In [26]:
X = np.zeros((100, 4), dtype=np.float64)
Y = np.random.rand(100,2)
act_att_idx = [1,2]

In [27]:
obs = update_X(X, Y, act_att_idx)
obs

array([[0.        , 0.76383335, 0.4063331 , 0.        ],
       [0.        , 0.98343888, 0.61796289, 0.        ],
       [0.        , 0.17403495, 0.56241567, 0.        ],
       [0.        , 0.48444454, 0.79068345, 0.        ],
       [0.        , 0.81506049, 0.50436749, 0.        ],
       [0.        , 0.94112584, 0.49981101, 0.        ],
       [0.        , 0.28962284, 0.32620081, 0.        ],
       [0.        , 0.441841  , 0.9273531 , 0.        ],
       [0.        , 0.24372468, 0.09854359, 0.        ],
       [0.        , 0.07616711, 0.36507717, 0.        ],
       [0.        , 0.41333518, 0.81255102, 0.        ],
       [0.        , 0.70897872, 0.18670529, 0.        ],
       [0.        , 0.39747968, 0.01202911, 0.        ],
       [0.        , 0.07279852, 0.83609838, 0.        ],
       [0.        , 0.54624016, 0.76772894, 0.        ],
       [0.        , 0.87583921, 0.15556332, 0.        ],
       [0.        , 0.51756874, 0.75099103, 0.        ],
       [0.        , 0.61112284,

In [28]:
test_col_0 = obs[:,act_att_idx[0]] == Y.T[0]
test_col_1 = obs[:,act_att_idx[1]] == Y.T[1]

assert obs.shape == X.shape
assert test_col_0.all()
assert test_col_1.all()


### predict_values_from_numer

In [29]:
nb_atts=4
numer_res = [None]*nb_atts

for i in range(nb_atts):
    numer_res[i]=np.random.rand(100,1)
    
counts = np.random.randint(1,10, size=(nb_atts))

obs = predict_values_from_numer(numer_res, counts)

assert obs.shape[0] == numer_res[0].shape[0]
assert obs.shape[1] == nb_atts
assert isinstance(obs, np.ndarray)

### predict_values_from_proba

In [30]:
nb_atts = 5
nb_classes = 3
nb_samples = 100
proba_res = [None]*nb_atts
lab_res = [None]*nb_atts

for i in range(nb_atts):
    proba_res[i]=np.random.rand(nb_samples, nb_classes)
    
for i in range(nb_atts):
    lab_res[i]=np.random.choice(10, size=nb_classes, replace=False) 

In [31]:
lab_res

[array([5, 8, 9]),
 array([3, 2, 8]),
 array([4, 2, 9]),
 array([9, 6, 3]),
 array([2, 3, 1])]

In [32]:
obs = predict_values_from_proba(proba_res, lab_res)

assert obs.shape == (nb_samples, nb_atts)

for a in range(nb_atts):
    assert np.array_equal(np.unique(obs[:,a]),np.unique(lab_res[a]))

### merge_numer

In [33]:
nb_samples = 100
nb_targ_res = 6
numer_res = [None]*nb_targ_res

for i in range(nb_targ_res):
    numer_res[i] = np.random.rand(nb_samples, 1)

In [36]:
# Multi-target sklearn output
nb_targ_mod_a = 3
numer_mod_a = np.random.rand(nb_samples, nb_targ_mod_a)

# Single-target sklearn output
nb_targ_mod_b = 1
numer_mod_b = np.random.rand(nb_samples, nb_targ_mod_b) 
numer_mod_b = np.squeeze(numer_mod_b) 

# Our own output format
nb_targ_mod_c = 2
numer_mod_c = [None]*nb_targ_mod_c
for i in range(nb_targ_mod_c):
    numer_mod_c[i] = np.random.rand(100, 1)

In [38]:
t_idx_res = 2
t_idx_mod = 0