In [1]:
import logging
import warnings

logging.basicConfig()
logging.getLogger().setLevel(logging.INFO)

warnings.filterwarnings("ignore")

## Execute a pipeline and setup a view generator

In [2]:
from freamon.adapters.mlinspect.provenance import from_py_file
view_generator = from_py_file('classify_amazon_reviews_sklearn.py')

INFO:root:Patching sys.argv with ['eyes']


58920
29273


INFO:root:Registering source 2 with columns: ['product_id', 'product_parent', 'product_title', 'category_id', 'mlinspect_lineage_2_0']
INFO:root:
                  CREATE OR REPLACE VIEW _freamon_source_2_with_prov_view AS 
                  SELECT 
                  "product_id" AS "product_id", "product_parent" AS "product_parent", "product_title" AS "product_title", "category_id" AS "category_id", "mlinspect_lineage_2_0" AS "prov_id_source_2"
                  FROM _freamon_source_2
                
INFO:root:Registering source 3 with columns: ['id', 'category', 'mlinspect_lineage_3_0']
INFO:root:
                  CREATE OR REPLACE VIEW _freamon_source_3_with_prov_view AS 
                  SELECT 
                  "id" AS "id", "category" AS "category", "mlinspect_lineage_3_0" AS "prov_id_source_3"
                  FROM _freamon_source_3
                
INFO:root:Registering source 1 with columns: ['review_id', 'star_rating', 'helpful_votes', 'total_votes', 'mlinspect_lineage_1

Test accuracy 0.8704422032583398


## Generate and materialize a view for data debugging

In [3]:
materialized_view = view_generator.test_view(
    sliceable_by=['category', 'star_rating'], 
    with_features=False, 
    with_y_true=True, 
    with_y_pred=True)

materialized_view

Unnamed: 0,category,star_rating,y_true,y_pred
0,Digital_Software,5,1,1
1,Digital_Software,4,1,1
2,Digital_Software,5,1,1
3,Digital_Software,5,1,1
4,Digital_Video_Games,5,1,1
...,...,...,...,...
29642,Digital_Video_Games,5,1,1
29643,Digital_Video_Games,1,1,1
29644,Digital_Video_Games,5,0,1
29645,Digital_Software,2,1,0


## Feed the materialized view into the fairlearn library to compute fairness metrics

In [4]:
from fairlearn.metrics import MetricFrame, false_positive_rate
from sklearn.metrics import recall_score

fairness_metrics = MetricFrame(
    metrics={ 'tpr' : recall_score, 'fpr' : false_positive_rate },
    y_true=materialized_view.y_true,
    y_pred=materialized_view.y_pred,
    sensitive_features=materialized_view.star_rating
)

In [5]:
fairness_metrics.overall

tpr    0.948982
fpr    0.342514
dtype: object

In [6]:
fairness_metrics.by_group

Unnamed: 0_level_0,tpr,fpr
star_rating,Unnamed: 1_level_1,Unnamed: 2_level_1
1,0.878457,0.233231
2,0.91342,0.380132
3,0.939338,0.395659
4,0.959288,0.455979
5,0.966078,0.450439


## Data-debugging a la SliceFinder via an aggregation query

In [7]:
view_generator.execute_query(
"""
SELECT 
    star_rating > 3 as top_rated,
    category = 'Digital_Video_Games' as digi_games,
    AVG(-(y_true * log(y_pred_proba) + (1 - y_true) * log(1.0 - y_pred_proba))) AS avg_loss,
    VARIANCE(-(y_true * log(y_pred_proba) + (1 - y_true) * log(1.0 - y_pred_proba))) AS var_loss,    
    COUNT(*) as size
    
FROM (SELECT star_rating, category, y_true, IF(y_pred=0, 0.00001, 0.99999) AS y_pred_proba FROM materialized_view)
GROUP BY GROUPING SETS ((star_rating > 3, category = 'Digital_Video_Games'), (star_rating > 3), 
    (category = 'Digital_Video_Games'))
"""
)

Unnamed: 0,top_rated,digi_games,avg_loss,var_loss,size
0,True,False,0.639077,2.787253,9060
1,True,True,0.387005,1.78541,10155
2,False,False,0.88492,3.642037,6769
3,False,True,0.954139,3.861352,3663
4,True,,0.505859,2.273499,19215
5,False,,0.909225,3.719774,10432
6,,False,0.744207,3.167374,15829
7,,True,0.537346,2.398145,13818
