In [None]:
%pylab inline

In [None]:
import pandas as pd
import imp
import rpy2.robjects as ro
from rpy2.robjects import numpy2ri

RANDOM_SEED = 0
numpy2ri.activate()
ro.r('set.seed({})'.format(RANDOM_SEED))

import fairtest.utils.log as fairtest_log
imp.reload(fairtest_log)
fairtest_log.set_params(filename='fairtest.log')

In [None]:
import fairtest.utils.prepare_data as prepare
from fairtest import DataSource
import fairtest.investigation as inv
import fairtest.testing as testing
import fairtest.discovery as discovery
import fairtest.error_profiling as error_profiling
import fairtest.modules.metrics as metrics
import fairtest.modules.metrics.correlation as correlation
import fairtest.modules.metrics.regression as regression
import fairtest.modules.metrics.binary_metrics as binary_metrics
import fairtest.modules.statistics.confidence_interval as intervals
import ast
import sklearn.preprocessing as preprocessing


dataname = 'movies'

# Testing Investigation on Movie Ratings
data = prepare.data_from_csv( '../data/recommender/recommendations.txt', sep='\\t')

# discretize age field
data['Age'] = map(lambda a: 10 if a == 1 
                       else 20 if a == 18 
                       else 30 if a == 25 
                       else 40 if a == 35 
                       else 50 if a == 45 or a == 50
                       else 60 if a == 56 else None, data['Age'])

# discretize ratings
data['Avg Seen Rating'] = ['low' if x < np.mean(data['Avg Seen Rating']) 
                               else 'high' for x in data['Avg Seen Rating']]

data_source = DataSource(data)


#
# Test of associations on movie popularity
#
SENS = ['Gender', 'Age']
TARGET = 'Avg Recommended Rating'
EXPL = []

test_ratings = testing.Testing(data_source, SENS, TARGET, EXPL, random_state=RANDOM_SEED,
                               to_drop=['RMSE', 'Avg Movie Age', 'Types', 'Avg Seen Rating'])

#
# Test of associations on movie popularity conditioned on previously rated movies
#
SENS = ['Gender', 'Age']
TARGET = 'Avg Recommended Rating'
EXPL = ['Avg Seen Rating']

test_ratings_expl = testing.Testing(data_source, SENS, TARGET, EXPL, random_state=RANDOM_SEED,
                               to_drop=['RMSE', 'Avg Movie Age', 'Types'])

investigations = [test_ratings, test_ratings_expl]

In [None]:
import fairtest.modules.context_discovery.guided_tree as guided_tree
imp.reload(guided_tree)
inv.train(investigations, score_aggregation='avg', min_leaf_size=100, max_depth=5)

In [None]:
import fairtest.modules.statistics.hypothesis_test as tests
import fairtest.modules.statistics.multiple_testing as multitest
import fairtest.modules.context_discovery.tree_parser as tree_parser
import fairtest.modules.statistics.confidence_interval as intervals
imp.reload(tests)
imp.reload(multitest)
imp.reload(tree_parser)
imp.reload(intervals)

inv.test(investigations, exact=True, prune_insignificant=True)

In [None]:
import fairtest.modules.bug_report.report as rep
import fairtest.modules.bug_report.filter_rank as fr
imp.reload(rep)
imp.reload(fr)

output_dir = '../results'
#output_dir = None
inv.report(investigations, dataname, filter_conf=0.95, output_dir=None, node_filter=fr.FILTER_BETTER_THAN_ANCESTORS)