In [None]:
import logging
import warnings

logging.basicConfig()
logging.getLogger().setLevel(logging.INFO)

warnings.filterwarnings("ignore")

## Execute a pipeline and setup a view generator

In [None]:
from freamon.adapters.mlinspect.provenance import from_py_file
view_generator = from_py_file('pipelines--mlinspect--credit.py')

## Generate and materialize a view for data debugging

In [3]:
df = view_generator.test_view(
    sliceable_by=['race', 'sex'], 
    with_features=False, 
    with_y_true=True, 
    with_y_pred=True)

df

INFO:root:
SELECT fs1.race, fs1.sex, ftv.y_true, ftv.y_pred
FROM _freamon_test_view ftv
JOIN _freamon_source_1_with_prov_view fs1  ON fs1.prov_id_source_1 = ftv.prov_id_source_1
   
        


Unnamed: 0,race,sex,y_true,y_pred
0,White,Female,0,0
1,Black,Male,0,0
2,White,Male,0,0
3,White,Male,0,0
4,White,Male,0,0
...,...,...,...,...
2123,White,Female,1,0
2124,White,Male,0,0
2125,White,Male,0,1
2126,White,Male,0,1


## Compute group-wise confusion matrix (for fairness metrics) via an aggregation query

In [4]:
view_generator.query(
"""
SELECT 
    race=='White' AS privileged, 
    SUM(CAST((y_true=1 AND y_pred=1) AS INTEGER)) AS true_positive,
    SUM(CAST((y_true=1 AND y_pred=0) AS INTEGER)) AS false_negative,    
    SUM(CAST((y_true=0 AND y_pred=1) AS INTEGER)) AS false_positive,    
    SUM(CAST((y_true=0 AND y_pred=0) AS INTEGER)) AS true_negative,    
FROM df
GROUP BY privileged
"""
)

Unnamed: 0,privileged,true_positive,false_negative,false_positive,true_negative
0,True,168.0,214.0,106.0,1220.0
1,False,33.0,28.0,38.0,321.0


## Slicefinder via aggregation queries

In [5]:
view_generator.query(
"""
SELECT 
    race='White' AS white,
    sex='Male' AS male,
    AVG(-(y_true * log(y_pred_proba) + (1 - y_true) * log(1.0 - y_pred_proba))) AS avg_loss,
    VARIANCE(-(y_true * log(y_pred_proba) + (1 - y_true) * log(1.0 - y_pred_proba))) AS var_loss,    
    COUNT(*) as size
    
FROM (SELECT race, sex, y_true, IF(y_pred=0, 0.00001, 0.99999) AS y_pred_proba FROM df)
GROUP BY GROUPING SETS ((race='White', sex='Male'), (race='White'), (sex='Male'))
"""
)

Unnamed: 0,white,male,avg_loss,var_loss,size
0,True,False,0.616228,2.705607,641
1,False,True,0.949078,3.862504,216
2,True,True,1.129338,4.375369,1067
3,False,False,0.612749,2.701507,204
4,True,,0.936772,3.808529,1708
5,False,,0.785718,3.319121,420
6,,False,0.615388,2.701417,845
7,,True,1.09899,4.290499,1283
