# FairCause Benchmarking

This is a notebook for benchmarking the performance of the faircause causal estimation using a synthetic SCM. 

In [1]:
import sys
sys.path.insert(0, '/Users/alanma/Documents/CFA_python')


In [2]:
import numpy as numpy
import pandas as pd
import matplotlib.pyplot as plt
from faircause.faircause import FairCause
from tests.synthetic_data import *

TV 0.30000000000000004
NDE 0.204
NIE 0.0
ExpSE_x0 -0.09600000000000002
ExpSE_x1 1.214306433183765e-17
xde 0.3
xie 0.0
xse -5.377642775528102e-17
0.30000000000000004
0.204
0.09600000000000003


In [3]:
# calculate ground truth effects
pux = 0.4
puz = 0.8
puw = 0.2
puy = 0.3
puxz = 0.4

def X(U_x, U_xz): 
    return U_x | U_xz
def Z(U_xz, U_z): 
    return U_xz & U_z
def W(X, U_w, Z): 
    return (X & U_w )| Z
def Y(U_y, X, Z, W): 
    return (U_y & X) | (U_y & Z) | (U_y & W)




In [4]:
compute_true_effects(pux, puz, puw, puy, puxz, X, Z, W, Y)

TV 0.30000000000000004
NDE 0.204
NIE 0.0
ExpSE_x0 -0.09600000000000002
ExpSE_x1 1.214306433183765e-17
xde 0.3
xie 0.0
xse -5.377642775528102e-17
0.30000000000000004
0.204
0.09600000000000003


In [5]:
gen_data(10000, X, Z, W, Y, pux, puz, puw, puy, puxz).to_csv('test_data.csv')

In [None]:
# run experiments
# add time benchmark

sample_seq = [1000, 2000, 5000, 10000, 20000, 50000, 100000]
tv_py = []
tv_std_py = []
xde_py = []
xde_std_py= []
xie_py = []
xie_std_py = []
xse_py = []
xse_std_py = []
nde = []

for n_samples in sample_seq:
    data = gen_data(n_samples, X, Z, W, Y, pux, puz, puw, puy, puxz)
    fc = FairCause(data, 'X', ['Z'], ['W'], 'Y', x0=0, x1=1, method='debiasing')
    fc.estimate_effects()
    res = fc.summary()
    nde.append(res[res['measure'] == 'nde']['value'])
    tv_py.append(res[res['measure'] == 'tv']['value'])
    tv_std_py.append(res[res['measure'] == 'tv']['sd'])
    xde_py.append(res[res['measure'] == 'ctfde']['value'])
    xde_std_py.append(res[res['measure'] == 'ctfde']['sd'])
    xie_py.append(res[res['measure'] == 'ctfie']['value'])
    xie_std_py.append(res[res['measure'] == 'ctfie']['sd'])
    xse_py.append(res[res['measure'] == 'ctfse']['value'])
    xse_std_py.append(res[res['measure'] == 'ctfse']['sd'])


  measure     value        sd       scale
0      tv  0.322981  0.019940  difference
1   ctfde  0.309671  0.029534  difference
2   ctfie -0.003707  0.010525  difference
3   ctfse -0.009603  0.026661  difference
4     ett  0.313379  0.027752  difference
faircause object summary:

Protected attribute:                 X
Protected attribute levels:          0, 1
Total Variation (TV): 0.3230

TV decomposition(s):

TV_01(y) (0.3230) = CtfDE_01(y | 0) (0.3097) - CtfIE_10(y | 0) (-0.0037) - CtfSE_10(y) (-0.0096)
  measure     value        sd       scale
0      tv  0.325816  0.014156  difference
1   ctfde  0.323736  0.021737  difference
2   ctfie  0.001744  0.007301  difference
3   ctfse -0.003824  0.019585  difference
4     ett  0.321992  0.020418  difference
faircause object summary:

Protected attribute:                 X
Protected attribute levels:          0, 1
Total Variation (TV): 0.3258

TV decomposition(s):

TV_01(y) (0.3258) = CtfDE_01(y | 0) (0.3237) - CtfIE_10(y | 0) (0.0017) - CtfSE

In [7]:
fc.summary()

faircause object summary:

Protected attribute:                 X
Protected attribute levels:          0, 1
Total Variation (TV): 0.2973

TV decomposition(s):

TV_01(y) (0.2973) = CtfDE_01(y | 0) (0.2993) - CtfIE_10(y | 0) (0.0001) - CtfSE_10(y) (0.0019)


Unnamed: 0,measure,value,sd
0,tv,0.297284,0.00194
1,ctfde,0.29927,0.00312
2,ctfie,5.3e-05,0.001272
3,ctfse,0.001932,0.002666
4,ett,0.299216,0.002851
