# Unittests - mercs/algo/inference

## Imports

In [1]:
# Standard imports
import os
import sys
from os.path import dirname


# Custom imports
root_directory = dirname(os.getcwd())
for dname in {'src'}:
    sys.path.insert(0, os.path.join(root_directory, dname))

from mercs.algo.inference import *
from mercs.utils.utils import encode_attribute

import datasets as ds
from sklearn.preprocessing import Imputer

## Actual Tests

### Perform Imputation

In [2]:
train, test = ds.load_nursery()

load_example_dataset is loading fname: /cw/dtailocal/Files/git/mercs-v5/resc/data/nursery_train.csv

load_example_dataset is loading fname: /cw/dtailocal/Files/git/mercs-v5/resc/data/nursery_test.csv



In [3]:
query_code = [0,-1,-1,0,0,0,0,0,1]

In [4]:
imputator = Imputer(missing_values='NaN',
                    strategy='most_frequent',
                    axis=0)
imputator.fit(train)

Imputer(axis=0, copy=True, missing_values='NaN', strategy='most_frequent',
    verbose=0)

In [5]:
obs = perform_imputation(train, query_code, imputator)

In [6]:
obs.shape

(11680, 9)

In [7]:
train.shape

(11680, 9)

In [8]:
assert train.shape == obs.shape

In [9]:
np.array(query_code)==1

array([False, False, False, False, False, False, False, False,  True])

In [10]:
obs

array([[2., 4., 1., ..., 0., 2., 2.],
       [2., 4., 1., ..., 0., 1., 1.],
       [2., 4., 1., ..., 0., 0., 0.],
       ...,
       [0., 4., 1., ..., 1., 2., 3.],
       [0., 4., 1., ..., 1., 1., 3.],
       [0., 4., 1., ..., 1., 0., 0.]])

In [11]:
missing_attribute_encoding = encode_attribute(0, [1], [2])

In [12]:
boolean_missing = np.array(query_code)== missing_attribute_encoding
boolean_missing

array([False,  True,  True, False, False, False, False, False, False])

In [13]:
missing_count = np.sum(boolean_missing)
missing_count

2

In [14]:
for x in obs[:,boolean_missing].T:
    assert len(np.unique(x)) == 1

In [16]:
len(np.unique(obs[:,np.array(query_code)==-1])) <= missing_count

True

In [17]:
np.unique(obs[:,np.array(query_code)==-1])

array([1., 4.])

In [18]:
def test_perform_imputation():
    # Prelims
    train, test = ds.load_nursery()
    query_code = [0,-1,-1,-1,-1,-1,0,0,1]
    
    imputator = Imputer(missing_values='NaN',
                    strategy='most_frequent',
                    axis=0)
    imputator.fit(train)
    
    # Actual test
    obs = perform_imputation(test, query_code, imputator)
    
    assert test.shape == obs.shape
    assert isinstance(obs, np.ndarray)
    
    boolean_missing = missing_attribute_encoding
    
    for row in obs[:,boolean_missing].T:
        assert len(np.unique(x)) == 1

### Init_predictions

In [23]:
obs = init_predictions(10, 10, type=np.float64)

In [25]:
obs.shape

(10, 10)

In [28]:
assert obs.shape == (10,10)

In [32]:
obs.dtype == np.float64

True

In [37]:
assert isinstance(obs[0,0], np.float64)