# Unittests - mercs/algo/inference

## Imports

In [1]:
# Standard imports
import os
import sys
from os.path import dirname


# Custom imports
root_directory = dirname(os.getcwd())
for dname in {'src'}:
    sys.path.insert(0, os.path.join(root_directory, dname))

from mercs.algo.inference import *
from mercs.utils.utils import encode_attribute

import datasets as ds
from sklearn.preprocessing import Imputer

  from numpy.core.umath_tests import inner1d


## Actual Tests

### Perform Imputation

In [2]:
train, test = ds.load_nursery()

load_example_dataset is loading fname: /cw/dtailocal/Files/git/mercs-v5/resc/data/nursery_train.csv

load_example_dataset is loading fname: /cw/dtailocal/Files/git/mercs-v5/resc/data/nursery_test.csv



In [3]:
query_code = [0,-1,-1,0,0,0,0,0,1]

In [4]:
imputator = Imputer(missing_values='NaN',
                    strategy='most_frequent',
                    axis=0)
imputator.fit(train)

Imputer(axis=0, copy=True, missing_values='NaN', strategy='most_frequent',
    verbose=0)

In [5]:
obs = perform_imputation(train, query_code, imputator)

In [6]:
obs.shape

(11680, 9)

In [7]:
train.shape

(11680, 9)

In [8]:
assert train.shape == obs.shape

In [9]:
np.array(query_code)==1

array([False, False, False, False, False, False, False, False,  True])

In [10]:
obs

array([[2., 4., 1., ..., 0., 2., 2.],
       [2., 4., 1., ..., 0., 1., 1.],
       [2., 4., 1., ..., 0., 0., 0.],
       ...,
       [0., 4., 1., ..., 1., 2., 3.],
       [0., 4., 1., ..., 1., 1., 3.],
       [0., 4., 1., ..., 1., 0., 0.]])

In [11]:
missing_attribute_encoding = encode_attribute(0, [1], [2])

In [12]:
boolean_missing = np.array(query_code)== missing_attribute_encoding
boolean_missing

array([False,  True,  True, False, False, False, False, False, False])

In [13]:
missing_count = np.sum(boolean_missing)
missing_count

2

In [14]:
for x in obs[:,boolean_missing].T:
    assert len(np.unique(x)) == 1

In [15]:
len(np.unique(obs[:,np.array(query_code)==-1])) <= missing_count

True

In [16]:
np.unique(obs[:,np.array(query_code)==-1])

array([1., 4.])

In [17]:
def test_perform_imputation():
    # Prelims
    train, test = ds.load_nursery()
    query_code = [0,-1,-1,-1,-1,-1,0,0,1]
    
    imputator = Imputer(missing_values='NaN',
                    strategy='most_frequent',
                    axis=0)
    imputator.fit(train)
    
    # Actual test
    obs = perform_imputation(test, query_code, imputator)
    
    assert test.shape == obs.shape
    assert isinstance(obs, np.ndarray)
    
    boolean_missing = missing_attribute_encoding
    
    for row in obs[:,boolean_missing].T:
        assert len(np.unique(x)) == 1

### Init_predictions

In [18]:
obs = init_predictions(10, 5, type=np.float64)

In [19]:
obs.shape

(10, 5)

In [20]:
obs.dtype == np.float64

True

In [21]:
assert isinstance(obs[0,0], np.float64)

In [22]:
act_att_idx = [4,5,6]

In [23]:
np.max(act_att_idx)

6

In [24]:
obs.shape[1]

5

### Update_X

In [25]:
X = np.zeros((100, 4), dtype=np.float64)
Y = np.random.rand(100,2)
act_att_idx = [1,2]

In [26]:
obs = update_X(X, Y, act_att_idx)
obs

array([[0.        , 0.02969722, 0.81935115, 0.        ],
       [0.        , 0.0845069 , 0.73611983, 0.        ],
       [0.        , 0.59915825, 0.89757915, 0.        ],
       [0.        , 0.84219553, 0.66830062, 0.        ],
       [0.        , 0.02888601, 0.90725451, 0.        ],
       [0.        , 0.8022789 , 0.74753191, 0.        ],
       [0.        , 0.45243222, 0.48395937, 0.        ],
       [0.        , 0.90910255, 0.46809358, 0.        ],
       [0.        , 0.99083558, 0.57805572, 0.        ],
       [0.        , 0.06711018, 0.65172993, 0.        ],
       [0.        , 0.59990963, 0.95507322, 0.        ],
       [0.        , 0.57750233, 0.27903197, 0.        ],
       [0.        , 0.08497976, 0.81534759, 0.        ],
       [0.        , 0.31529603, 0.40999267, 0.        ],
       [0.        , 0.29649419, 0.66480434, 0.        ],
       [0.        , 0.13239602, 0.52344505, 0.        ],
       [0.        , 0.20412116, 0.02186363, 0.        ],
       [0.        , 0.17952995,

In [27]:
test_col_0 = obs[:,act_att_idx[0]] == Y.T[0]
test_col_1 = obs[:,act_att_idx[1]] == Y.T[1]

assert obs.shape == X.shape
assert test_col_0.all()
assert test_col_1.all()


### predict_values_from_numer

In [28]:
nb_atts=4
numer_res = [None]*nb_atts

for i in range(nb_atts):
    numer_res[i]=np.random.rand(100,1)
    
counts = np.random.randint(1,10, size=(nb_atts))

obs = predict_values_from_numer(numer_res, counts)

assert obs.shape[0] == numer_res[0].shape[0]
assert obs.shape[1] == nb_atts
assert isinstance(obs, np.ndarray)

### predict_values_from_proba

In [29]:
nb_atts = 5
nb_classes = 3
nb_samples = 100
proba_res = [None]*nb_atts
lab_res = [None]*nb_atts

for i in range(nb_atts):
    proba_res[i]=np.random.rand(nb_samples, nb_classes)
    
for i in range(nb_atts):
    lab_res[i]=np.random.choice(10, size=nb_classes, replace=False) 

In [30]:
lab_res

[array([2, 6, 1]),
 array([3, 4, 2]),
 array([1, 3, 2]),
 array([3, 1, 0]),
 array([8, 7, 2])]

In [31]:
obs = predict_values_from_proba(proba_res, lab_res)

assert obs.shape == (nb_samples, nb_atts)

for a in range(nb_atts):
    assert np.array_equal(np.unique(obs[:,a]),np.unique(lab_res[a]))

### merge_numer

In [32]:
nb_samples = 100
nb_targ_res = 6
numer_res = [None]*nb_targ_res

for i in range(nb_targ_res):
    numer_res[i] = np.random.rand(nb_samples, 1)

In [33]:
# Multi-target sklearn output
nb_targ_mod_a = 3
numer_mod_a = np.random.rand(nb_samples, nb_targ_mod_a)

# Single-target sklearn output
nb_targ_mod_b = 1
numer_mod_b = np.random.rand(nb_samples, nb_targ_mod_b) 
numer_mod_b = np.squeeze(numer_mod_b) 

# Our own output format
nb_targ_mod_c = 2
numer_mod_c = [None]*nb_targ_mod_c
for i in range(nb_targ_mod_c):
    numer_mod_c[i] = np.random.rand(100, 1)

In [34]:
t_idx_res = 2
t_idx_mod = 0

In [35]:
for numer_mod in [numer_mod_a, numer_mod_b, numer_mod_c]:
    obs = merge_numer(numer_res, numer_mod, t_idx_res, t_idx_mod)

Type of numer_res: <class 'list'> 
And type of numer_res[0]: <class 'numpy.ndarray'>
And shape of numer_res[0]: (100, 1)


Type of numer_res: <class 'list'> 
And type of numer_res[0]: <class 'numpy.ndarray'>
And shape of numer_res[0]: (100, 1)


Type of numer_res: <class 'list'> 
And type of numer_res[0]: <class 'numpy.ndarray'>
And shape of numer_res[0]: (100, 1)


