In [11]:
import pandas as pd
import numpy as np

from sklearn.linear_model import SGDClassifier
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, FunctionTransformer, OneHotEncoder, label_binarize
from sklearn.compose import ColumnTransformer

from arguseyes import ArgusEyes
import logging
from arguseyes.refinements import InputUsage, FairnessMetrics
import time

logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)

In [2]:
from freamon_credit import load_train_and_test_data, filter_data, create_feature_encoding_pipeline, \
                           extract_labels, create_training_pipeline, random_subset

In [15]:
def execute_manually(seed):
    np.random.seed(seed)

    train_location = 'arguseyes/example_pipelines/datasets/income/adult.data'
    test_location = 'arguseyes/example_pipelines/datasets/income/adult.test'

    train_all, test_all = load_train_and_test_data(train_location, test_location)

    train_all['id'] = list(range(len(train_all)))
    test_all['id'] = list(range(len(test_all)))

    workclasses_to_exclude=random_subset(['Self-emp-not-inc', 'Private', 'Local-gov', 'Self-emp-inc',
           'Without-pay', 'Never-worked'])

    train, test = filter_data(train_all, test_all, workclasses_to_exclude)

    train_labels, test_labels = extract_labels(train, test)
    pipeline = create_training_pipeline()

    model = pipeline.fit(train, train_labels)

    score = model.score(test, test_labels)

    #print("Model accuracy on held-out data", score)
    
    retained_ids = list(set(train.id))
    usage = np.array(train_all.id.isin(retained_ids))
    #print('Usage', np.sum(usage))
    
    # Group fairness computation
    from sklearn.metrics import confusion_matrix

    eval_data = test.copy(deep=True)

    eval_data['true'] = test_labels
    eval_data['pred'] = model.predict(test)

    priv = eval_data[eval_data.sex=='Male']
    dis = eval_data[eval_data.sex=='Female']

    priv_tn, priv_fp, priv_fn, priv_tp = confusion_matrix(priv['true'], priv['pred']).ravel()
    dis_tn, dis_fp, dis_fn, dis_tp = confusion_matrix(dis['true'], dis['pred']).ravel()

    eq_opp = (float(dis_fn) / (dis_fn + dis_tp)) - (float(priv_fn) / (priv_fn + priv_tp))
    
    #print('Eq opp:', eq_opp)
    return eq_opp, usage

In [8]:
execute_manually(1234)

Model accuracy on held-out data 0.818609022556391
Usage 4115
Eq opp: 0.1432061579651941


  return f(**kwargs)


In [19]:
def execute_with_arguseyes(seed):
    
    cmd_args=[str(seed)]

    eq_opp = 0
    usage = None


    eyes = ArgusEyes('freamon-testing', './mlruns')
    
    overall_start = time.time()
    with eyes.classification_pipeline_from_py_file('freamon_credit.py', cmd_args=cmd_args) as pipeline:

        tagged_inputs = pipeline.compute(InputUsage())
        priv_tn, priv_fp, priv_fn, priv_tp, dis_tn, dis_fp, dis_fn, dis_tp = \
            pipeline.compute(FairnessMetrics('sex', 'Male'))
        
        eq_opp = (float(dis_fn) / (dis_fn + dis_tp)) - (float(priv_fn) / (priv_fn + priv_tp))
        #print('Equal opportunity', eq_opp)

        usage = np.array(tagged_inputs[0].data['__arguseyes__is_used'])
        #print(np.sum(usage))
    return eq_opp, usage


In [21]:
seeds = [1234, 830848, 987496, 429214, 178132, 590519, 725202, 476388, 133576, 515078, 
         720668, 449660, 168556, 74155, 164680, 142209, 181459, 994015, 172466, 529630]

for seed in seeds:
    eq_opp_a, usage_a = execute_with_arguseyes(seed)
    eq_opp_m, usage_m = execute_manually(seed)
    
    print(eq_opp_a, eq_opp_m)
    assert(eq_opp_a == eq_opp_m)
    
    print(np.sum(usage_a), np.sum(usage_m))
    assert(np.array_equal(usage_a, usage_m))
    

INFO:Patching sys.argv with ['eyes', '1234']
INFO:Created run 7ede20d9680a48e38ff7178826da5be9 for this invocation
INFO:Executing instrumented user pipeline with mlinspect
INFO:Redirecting the pipeline's stdout to arguseyes-pipeline-output.txt
INFO:Identifying training sources
INFO:rows_from_operator.keys() dict_keys([0])
INFO:operators_with_duplicates set()
INFO:sources_one_to_one 1
INFO:Found fact table from operator 0 with 32561 records and the following attributes: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income-per-year']
INFO:Identifying test sources
INFO:rows_from_operator.keys() dict_keys([1])
INFO:operators_with_duplicates set()
INFO:sources_one_to_one 1
INFO:Found fact table from operator 1 with 16281 records and the following attributes: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupatio

0.1432061579651941 0.1432061579651941
4115 4115


INFO:Identifying training sources
INFO:rows_from_operator.keys() dict_keys([0])
INFO:operators_with_duplicates set()
INFO:sources_one_to_one 1
INFO:Found fact table from operator 0 with 32561 records and the following attributes: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income-per-year']
INFO:Identifying test sources
INFO:rows_from_operator.keys() dict_keys([1])
INFO:operators_with_duplicates set()
INFO:sources_one_to_one 1
INFO:Found fact table from operator 1 with 16281 records and the following attributes: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income-per-year']
INFO:Extracted feature matrix X_train with 4094 rows and 35 columns
INFO:Extracted feature matrix X_test with 2118 rows an

0.05489503363000203 0.05489503363000203
4094 4094


INFO:Identifying training sources
INFO:rows_from_operator.keys() dict_keys([0])
INFO:operators_with_duplicates set()
INFO:sources_one_to_one 1
INFO:Found fact table from operator 0 with 32561 records and the following attributes: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income-per-year']
INFO:Identifying test sources
INFO:rows_from_operator.keys() dict_keys([1])
INFO:operators_with_duplicates set()
INFO:sources_one_to_one 1
INFO:Found fact table from operator 1 with 16281 records and the following attributes: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income-per-year']
INFO:Extracted feature matrix X_train with 8728 rows and 37 columns
INFO:Extracted feature matrix X_test with 4482 rows an

-0.00038057074418362813 -0.00038057074418362813
8728 8728


INFO:Identifying training sources
INFO:rows_from_operator.keys() dict_keys([0])
INFO:operators_with_duplicates set()
INFO:sources_one_to_one 1
INFO:Found fact table from operator 0 with 32561 records and the following attributes: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income-per-year']
INFO:Identifying test sources
INFO:rows_from_operator.keys() dict_keys([1])
INFO:operators_with_duplicates set()
INFO:sources_one_to_one 1
INFO:Found fact table from operator 1 with 16281 records and the following attributes: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income-per-year']
INFO:Extracted feature matrix X_train with 4101 rows and 36 columns
INFO:Extracted feature matrix X_test with 2121 rows an

0.024492152999524452 0.024492152999524452
4101 4101


INFO:Identifying training sources
INFO:rows_from_operator.keys() dict_keys([0])
INFO:operators_with_duplicates set()
INFO:sources_one_to_one 1
INFO:Found fact table from operator 0 with 32561 records and the following attributes: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income-per-year']
INFO:Identifying test sources
INFO:rows_from_operator.keys() dict_keys([1])
INFO:operators_with_duplicates set()
INFO:sources_one_to_one 1
INFO:Found fact table from operator 1 with 16281 records and the following attributes: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income-per-year']
INFO:Extracted feature matrix X_train with 29999 rows and 39 columns
INFO:Extracted feature matrix X_test with 14950 rows 

0.08002953362212861 0.08002953362212861
29999 29999


INFO:Identifying training sources
INFO:rows_from_operator.keys() dict_keys([0])
INFO:operators_with_duplicates set()
INFO:sources_one_to_one 1
INFO:Found fact table from operator 0 with 32561 records and the following attributes: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income-per-year']
INFO:Identifying test sources
INFO:rows_from_operator.keys() dict_keys([1])
INFO:operators_with_duplicates set()
INFO:sources_one_to_one 1
INFO:Found fact table from operator 1 with 16281 records and the following attributes: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income-per-year']
INFO:Extracted feature matrix X_train with 31431 rows and 40 columns
INFO:Extracted feature matrix X_test with 15695 rows 

0.06343364597027601 0.06343364597027601
31431 31431


INFO:Identifying training sources
INFO:rows_from_operator.keys() dict_keys([0])
INFO:operators_with_duplicates set()
INFO:sources_one_to_one 1
INFO:Found fact table from operator 0 with 32561 records and the following attributes: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income-per-year']
INFO:Identifying test sources
INFO:rows_from_operator.keys() dict_keys([1])
INFO:operators_with_duplicates set()
INFO:sources_one_to_one 1
INFO:Found fact table from operator 1 with 16281 records and the following attributes: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income-per-year']
INFO:Extracted feature matrix X_train with 4115 rows and 37 columns
INFO:Extracted feature matrix X_test with 2128 rows an

0.04886211512717542 0.04886211512717542
4115 4115


INFO:Identifying training sources
INFO:rows_from_operator.keys() dict_keys([0])
INFO:operators_with_duplicates set()
INFO:sources_one_to_one 1
INFO:Found fact table from operator 0 with 32561 records and the following attributes: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income-per-year']
INFO:Identifying test sources
INFO:rows_from_operator.keys() dict_keys([1])
INFO:operators_with_duplicates set()
INFO:sources_one_to_one 1
INFO:Found fact table from operator 1 with 16281 records and the following attributes: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income-per-year']
INFO:Extracted feature matrix X_train with 4108 rows and 36 columns
INFO:Extracted feature matrix X_test with 2125 rows an

0.03480589022757696 0.03480589022757696
4108 4108


INFO:Identifying training sources
INFO:rows_from_operator.keys() dict_keys([0])
INFO:operators_with_duplicates set()
INFO:sources_one_to_one 1
INFO:Found fact table from operator 0 with 32561 records and the following attributes: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income-per-year']
INFO:Identifying test sources
INFO:rows_from_operator.keys() dict_keys([1])
INFO:operators_with_duplicates set()
INFO:sources_one_to_one 1
INFO:Found fact table from operator 1 with 16281 records and the following attributes: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income-per-year']
INFO:Extracted feature matrix X_train with 7758 rows and 38 columns
INFO:Extracted feature matrix X_test with 4021 rows an

0.09371225125564253 0.09371225125564253
7758 7758


INFO:Identifying training sources
INFO:rows_from_operator.keys() dict_keys([0])
INFO:operators_with_duplicates set()
INFO:sources_one_to_one 1
INFO:Found fact table from operator 0 with 32561 records and the following attributes: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income-per-year']
INFO:Identifying test sources
INFO:rows_from_operator.keys() dict_keys([1])
INFO:operators_with_duplicates set()
INFO:sources_one_to_one 1
INFO:Found fact table from operator 1 with 16281 records and the following attributes: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income-per-year']
INFO:Extracted feature matrix X_train with 30468 rows and 41 columns
INFO:Extracted feature matrix X_test with 15238 rows 

0.08809455011875145 0.08809455011875145
30468 30468


INFO:Identifying training sources
INFO:rows_from_operator.keys() dict_keys([0])
INFO:operators_with_duplicates set()
INFO:sources_one_to_one 1
INFO:Found fact table from operator 0 with 32561 records and the following attributes: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income-per-year']
INFO:Identifying test sources
INFO:rows_from_operator.keys() dict_keys([1])
INFO:operators_with_duplicates set()
INFO:sources_one_to_one 1
INFO:Found fact table from operator 1 with 16281 records and the following attributes: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income-per-year']
INFO:Extracted feature matrix X_train with 5210 rows and 36 columns
INFO:Extracted feature matrix X_test with 2697 rows an

0.18524685947998837 0.18524685947998837
5210 5210


INFO:Identifying training sources
INFO:rows_from_operator.keys() dict_keys([0])
INFO:operators_with_duplicates set()
INFO:sources_one_to_one 1
INFO:Found fact table from operator 0 with 32561 records and the following attributes: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income-per-year']
INFO:Identifying test sources
INFO:rows_from_operator.keys() dict_keys([1])
INFO:operators_with_duplicates set()
INFO:sources_one_to_one 1
INFO:Found fact table from operator 1 with 16281 records and the following attributes: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income-per-year']
INFO:Extracted feature matrix X_train with 26790 rows and 37 columns
INFO:Extracted feature matrix X_test with 13328 rows 

0.07847239954662111 0.07847239954662111
26790 26790


INFO:Identifying training sources
INFO:rows_from_operator.keys() dict_keys([0])
INFO:operators_with_duplicates set()
INFO:sources_one_to_one 1
INFO:Found fact table from operator 0 with 32561 records and the following attributes: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income-per-year']
INFO:Identifying test sources
INFO:rows_from_operator.keys() dict_keys([1])
INFO:operators_with_duplicates set()
INFO:sources_one_to_one 1
INFO:Found fact table from operator 1 with 16281 records and the following attributes: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income-per-year']
INFO:Extracted feature matrix X_train with 4101 rows and 36 columns
INFO:Extracted feature matrix X_test with 2121 rows an

0.03984645696039135 0.03984645696039135
4101 4101


INFO:Identifying training sources
INFO:rows_from_operator.keys() dict_keys([0])
INFO:operators_with_duplicates set()
INFO:sources_one_to_one 1
INFO:Found fact table from operator 0 with 32561 records and the following attributes: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income-per-year']
INFO:Identifying test sources
INFO:rows_from_operator.keys() dict_keys([1])
INFO:operators_with_duplicates set()
INFO:sources_one_to_one 1
INFO:Found fact table from operator 1 with 16281 records and the following attributes: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income-per-year']
INFO:Extracted feature matrix X_train with 4094 rows and 35 columns
INFO:Extracted feature matrix X_test with 2118 rows an

0.07357836809565865 0.07357836809565865
4094 4094


INFO:Identifying training sources
INFO:rows_from_operator.keys() dict_keys([0])
INFO:operators_with_duplicates set()
INFO:sources_one_to_one 1
INFO:Found fact table from operator 0 with 32561 records and the following attributes: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income-per-year']
INFO:Identifying test sources
INFO:rows_from_operator.keys() dict_keys([1])
INFO:operators_with_duplicates set()
INFO:sources_one_to_one 1
INFO:Found fact table from operator 1 with 16281 records and the following attributes: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income-per-year']
INFO:Extracted feature matrix X_train with 5217 rows and 37 columns
INFO:Extracted feature matrix X_test with 2700 rows an

0.18118609406952957 0.18118609406952957
5217 5217


INFO:Identifying training sources
INFO:rows_from_operator.keys() dict_keys([0])
INFO:operators_with_duplicates set()
INFO:sources_one_to_one 1
INFO:Found fact table from operator 0 with 32561 records and the following attributes: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income-per-year']
INFO:Identifying test sources
INFO:rows_from_operator.keys() dict_keys([1])
INFO:operators_with_duplicates set()
INFO:sources_one_to_one 1
INFO:Found fact table from operator 1 with 16281 records and the following attributes: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income-per-year']
INFO:Extracted feature matrix X_train with 5224 rows and 37 columns
INFO:Extracted feature matrix X_test with 2704 rows an

0.17816174983385824 0.17816174983385824
5224 5224


INFO:Identifying training sources
INFO:rows_from_operator.keys() dict_keys([0])
INFO:operators_with_duplicates set()
INFO:sources_one_to_one 1
INFO:Found fact table from operator 0 with 32561 records and the following attributes: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income-per-year']
INFO:Identifying test sources
INFO:rows_from_operator.keys() dict_keys([1])
INFO:operators_with_duplicates set()
INFO:sources_one_to_one 1
INFO:Found fact table from operator 1 with 16281 records and the following attributes: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income-per-year']
INFO:Extracted feature matrix X_train with 29352 rows and 40 columns
INFO:Extracted feature matrix X_test with 14659 rows 

0.06658181375330074 0.06658181375330074
29352 29352


INFO:Identifying training sources
INFO:rows_from_operator.keys() dict_keys([0])
INFO:operators_with_duplicates set()
INFO:sources_one_to_one 1
INFO:Found fact table from operator 0 with 32561 records and the following attributes: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income-per-year']
INFO:Identifying test sources
INFO:rows_from_operator.keys() dict_keys([1])
INFO:operators_with_duplicates set()
INFO:sources_one_to_one 1
INFO:Found fact table from operator 1 with 16281 records and the following attributes: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income-per-year']
INFO:Extracted feature matrix X_train with 6642 rows and 37 columns
INFO:Extracted feature matrix X_test with 3442 rows an

0.03591718825228696 0.03591718825228696
6642 6642


INFO:Identifying training sources
INFO:rows_from_operator.keys() dict_keys([0])
INFO:operators_with_duplicates set()
INFO:sources_one_to_one 1
INFO:Found fact table from operator 0 with 32561 records and the following attributes: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income-per-year']
INFO:Identifying test sources
INFO:rows_from_operator.keys() dict_keys([1])
INFO:operators_with_duplicates set()
INFO:sources_one_to_one 1
INFO:Found fact table from operator 1 with 16281 records and the following attributes: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income-per-year']
INFO:Extracted feature matrix X_train with 7303 rows and 37 columns
INFO:Extracted feature matrix X_test with 3740 rows an

0.12075175996258902 0.12075175996258902
7303 7303


INFO:Identifying training sources
INFO:rows_from_operator.keys() dict_keys([0])
INFO:operators_with_duplicates set()
INFO:sources_one_to_one 1
INFO:Found fact table from operator 0 with 32561 records and the following attributes: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income-per-year']
INFO:Identifying test sources
INFO:rows_from_operator.keys() dict_keys([1])
INFO:operators_with_duplicates set()
INFO:sources_one_to_one 1
INFO:Found fact table from operator 1 with 16281 records and the following attributes: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income-per-year']
INFO:Extracted feature matrix X_train with 4115 rows and 37 columns
INFO:Extracted feature matrix X_test with 2128 rows an

0.11917670682730919 0.11917670682730919
4115 4115
