In [1]:
import logging
import warnings

logging.basicConfig()
logging.getLogger().setLevel(logging.INFO)

warnings.filterwarnings("ignore")

## Execute a pipeline and setup a view generator

In [2]:
from freamon.adapters.mlinspect.provenance import from_sklearn_pandas
view_generator = from_sklearn_pandas('classify_amazon_reviews_sklearn.py')

INFO:root:Patching sys.argv with ['eyes']
INFO:root:Registering source 2 with columns: ['product_id', 'product_parent', 'product_title', 'category_id', 'mlinspect_lineage_2_0']
INFO:root:
                  CREATE OR REPLACE VIEW _freamon_source_2_with_prov AS 
                  SELECT 
                  "product_id" AS "product_id", "product_parent" AS "product_parent", "product_title" AS "product_title", "category_id" AS "category_id", "mlinspect_lineage_2_0" AS "prov_id_source_2"
                  FROM _freamon_source_2
                
INFO:root:Registering source 3 with columns: ['id', 'category', 'mlinspect_lineage_3_0']
INFO:root:
                  CREATE OR REPLACE VIEW _freamon_source_3_with_prov AS 
                  SELECT 
                  "id" AS "id", "category" AS "category", "mlinspect_lineage_3_0" AS "prov_id_source_3"
                  FROM _freamon_source_3
                
INFO:root:Registering source 1 with columns: ['review_id', 'star_rating', 'helpful_votes', 'to

Test accuracy 0.879650554862212


## Generate and materialize a view for data debugging

In [3]:
materialized_view = view_generator.test_view(
    sliceable_by=['category', 'star_rating'], 
    with_features=False, 
    with_y_true=True, 
    with_y_pred=True)

materialized_view

Unnamed: 0,category,star_rating,y_true,y_pred
0,Digital_Software,5,1,1
1,Digital_Software,4,1,1
2,Digital_Software,5,1,1
3,Digital_Software,5,1,1
4,Digital_Video_Games,5,1,1
...,...,...,...,...
29642,Digital_Video_Games,5,1,1
29643,Digital_Video_Games,1,1,1
29644,Digital_Video_Games,5,0,1
29645,Digital_Software,2,1,0


## Feed the materialized view into the fairlearn library to compute fairness metrics

In [4]:
from fairlearn.metrics import MetricFrame, false_positive_rate
from sklearn.metrics import recall_score

materialized_view['rating'] = '(low rated)'
materialized_view['rating'].loc[materialized_view.star_rating.astype(int) > 3] = '(highly rated)'

fairness_metrics = MetricFrame(
    metrics={ 'tpr' : recall_score, 'fpr' : false_positive_rate },
    y_true=materialized_view.y_true,
    y_pred=materialized_view.y_pred,
    sensitive_features=materialized_view.category + ' ' + materialized_view.rating
)

fairness_metrics.by_group

Unnamed: 0_level_0,tpr,fpr
sensitive_feature_0,Unnamed: 1_level_1,Unnamed: 2_level_1
Digital_Software (highly rated),0.940072,0.361386
Digital_Software (low rated),0.904889,0.191647
Digital_Video_Games (highly rated),0.993283,0.593622
Digital_Video_Games (low rated),0.865546,0.297251


## Data-debugging a la SliceFinder via an aggregation query

In [6]:
view_generator.execute_query(
"""
SELECT 
    star_rating > 3 as top_rated,
    category = 'Digital_Video_Games' as digi_games,
    AVG(-(y_true * log(y_pred_proba) + (1 - y_true) * log(1.0 - y_pred_proba))) AS avg_loss,
    VARIANCE(-(y_true * log(y_pred_proba) + (1 - y_true) * log(1.0 - y_pred_proba))) AS var_loss,    
    COUNT(*) as size
    
FROM (
    SELECT star_rating, category, y_true, IF(y_pred=0, 0.00001, 0.99999) AS y_pred_proba 
    FROM _freamon_virtual_test_view
    )
GROUP BY GROUPING SETS ((star_rating > 3, category = 'Digital_Video_Games'), (star_rating > 3), 
    (category = 'Digital_Video_Games'))
"""
)

Unnamed: 0,top_rated,digi_games,avg_loss,var_loss,size
0,True,False,0.602101,2.648252,9060
1,False,False,0.745313,3.171524,6769
2,True,True,0.387005,1.78541,10155
3,False,True,0.930934,3.78905,3663
4,True,,0.488424,2.203659,19215
5,False,,0.810491,3.395865,10432
6,,False,0.663343,2.876855,15829
7,,True,0.531195,2.37396,13818
