In [None]:
import torch
import numpy as np
from sklearn.neural_network import MLPRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.base import clone
from copy import deepcopy
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path


sns.set_theme()
FONTSIZE = 20
METRIC_DICT = {'cf_effect':r'$TE$',
                'cf_effect0':r'$TE_0$',
                'cf_effect1':r'$TE_1$'}

# Helper

In [None]:
def cf_eval(y, y_cf, a):
    a = a.squeeze()
    mask1 = (a == 0)
    mask2 = (a == 1)
    
    cf_effect = np.abs(y_cf - y)
    o1 = cf_effect[mask1]
    o2 = cf_effect[mask2]
    return np.sum(cf_effect) / cf_effect.shape[0], np.sum(o1) / o1.shape[0], np.sum(o2) / o2.shape[0]

def pcf_mix(y_score, ycf_score, a, is_cf=False):
    # attribute corresponding to y
    a_0_indices = a == 0
    a_1_indices = a == 1
    a_0_ratio = np.sum(a_0_indices) / len(a)
    a_1_ratio = 1-a_0_ratio
    if is_cf is True:
        # we need to use the ratio in the real data
        a_0_ratio, a_1_ratio = a_1_ratio, a_0_ratio

    y_output = np.zeros_like(y_score.ravel())
    y_output[a_0_indices] = y_score[a_0_indices] * a_0_ratio + ycf_score[a_0_indices] * a_1_ratio
    y_output[a_1_indices] = y_score[a_1_indices] * a_1_ratio + ycf_score[a_1_indices] * a_0_ratio

    return y_output

# Predictor

In [None]:
def cfe_classifier(data_dict, clf):

    train_dat = data_dict["train"]
    test_dat = data_dict["test"]
    
    # ========= Training ========= #
    y = train_dat["y"].ravel()
    clf.fit(train_dat["u_hat"], y)
    train_acc = mean_squared_error(y, clf.predict(train_dat["u_hat"]),squared=False)


    # ========= Testing ========= #
    y_factual = clf.predict(test_dat["u_hat"])
    acc = mean_squared_error(test_dat["y"].ravel(), y_factual.ravel(),squared=False)
    y_counter = clf.predict(test_dat["u_cf_hat"])

    cf_effect, cf_effect0, cf_effect1 = cf_eval(y_factual, y_counter, test_dat["a"])

    return train_acc, acc, cf_effect, cf_effect0, cf_effect1, clf

def cfr_classifier(data_dict, clf):

    train_dat = data_dict["train"]
    test_dat = data_dict["test"]
    
    # ========= Training ========= #
    inputs = np.concatenate([train_dat["u_hat"], 
                        (train_dat["x"] + train_dat["x_cf_uhat"]) / 2], axis=1)
    y = train_dat["y"].ravel()
    clf.fit(inputs, y)
    train_acc = mean_squared_error(y, clf.predict(inputs),squared=False)    

    # ========= Testing ========= #
    y_factual = clf.predict(np.concatenate([
        test_dat["u_hat"],
        (test_dat["x"] + test_dat["x_cf_uhat"]) / 2
    ], axis=1))
    acc = mean_squared_error(test_dat["y"].ravel(), y_factual.ravel(),squared=False)

    y_counter = clf.predict(np.concatenate([
        test_dat["u_cf_hat"],
        (test_dat["x_cf"] + test_dat["x_cf_cf_uhat"]) / 2
    ], axis=1))
    cf_effect, cf_effect0, cf_effect1 = cf_eval(y_factual, y_counter, test_dat["a"])

    return train_acc, acc, cf_effect, cf_effect0, cf_effect1, clf

def erm_classifier(data_dict, clf):
    train_dat = data_dict["train"]
    test_dat = data_dict["test"]
    
    # ========= Training ========= # 
    inputs = np.concatenate([train_dat["x"],
                             train_dat['a']],axis=1)
    y = train_dat["y"].ravel()
    clf.fit(inputs, y)
    train_acc = mean_squared_error(y, clf.predict(inputs),squared=False)

    # ========= Testing ========= #
    y_factual = clf.predict(np.concatenate([test_dat["x"],
                                            test_dat['a']],axis=1))
    
    acc = mean_squared_error(test_dat["y"].ravel(), y_factual.ravel(),squared=False)

    y_counter = clf.predict(np.concatenate([test_dat["x_cf"],
                                            test_dat['a_cf']],axis=1))
    cf_effect, cf_effect0, cf_effect1 = cf_eval(y_factual, y_counter, test_dat["a"])

    return train_acc, acc, cf_effect, cf_effect0, cf_effect1, clf

def pcf_mix(y_score, ycf_score, a, is_cf=False):
    # attribute corresponding to y
    a_0_indices = a == 0
    a_1_indices = a == 1
    a_0_ratio = np.sum(a_0_indices) / len(a)
    a_1_ratio = 1-a_0_ratio
    if is_cf is True:
        # we need to use the ratio in the real data
        a_0_ratio, a_1_ratio = a_1_ratio, a_0_ratio

    y_output = np.zeros_like(y_score.ravel())
    y_output[a_0_indices] = y_score[a_0_indices] * a_0_ratio + ycf_score[a_0_indices] * a_1_ratio
    y_output[a_1_indices] = y_score[a_1_indices] * a_1_ratio + ycf_score[a_1_indices] * a_0_ratio

    return y_output

def pcf_classifier(data_dict, clf):
    train_dat = data_dict["train"]
    test_dat = data_dict["test"]
    
    # ======= Training ======= #
    inputs = np.concatenate([train_dat["x"],
                             train_dat['a']],axis=1)
    y = train_dat["y"].ravel()
    clf.fit(inputs, y)
    train_acc = mean_squared_error(y, clf.predict(inputs),squared=False)

    # ======= Testing ======= #

    # ======= factual pred ======= #
    y_factual_score = clf.predict(np.concatenate([test_dat["x"],
                                                test_dat['a']],axis=1))
    y_factual_cf_score = clf.predict(np.concatenate([test_dat["x_cf_uhat"],
                                                     test_dat['a_cf']],axis=1))
    
    y_factual = pcf_mix(y_factual_score, y_factual_cf_score, test_dat['a'].ravel())
    acc = mean_squared_error(test_dat["y"].ravel(), y_factual.ravel(), squared=False)

    # ======= counter pred ======= #
    y_counter_score = clf.predict(np.concatenate([test_dat["x_cf"],
                                                        test_dat['a_cf']],axis=1))
    y_counter_cf_score = clf.predict(np.concatenate([test_dat["x_cf_cf_uhat"],
                                                        test_dat['a']],axis=1))
    y_counter = pcf_mix(y_counter_score, y_counter_cf_score, test_dat['a_cf'].ravel(),is_cf=True)

    a = test_dat["a"]
    cf_effect, cf_effect0, cf_effect1 = cf_eval(y_factual, y_counter, a)

    return train_acc, acc, cf_effect, cf_effect0, cf_effect1, clf

def pcfaug_classifier(data_dict, clf):
    train_dat = data_dict["train"]
    test_dat = data_dict["test"]
    
    # ======= Training ======= #
    inputs = np.concatenate([
    np.concatenate([train_dat["x"],
                             train_dat['a']],axis=1),
    np.concatenate([train_dat["x_cf_uhat"],
                             train_dat['a_cf']],axis=1)],axis=0)

    y = np.concatenate([train_dat["y"],train_dat["y"]],axis=0).ravel()
    clf.fit(inputs, y)
    train_acc = mean_squared_error(y, clf.predict(inputs),squared=False)

    # ======= Testing ======= #

    # ======= factual pred ======= #
    y_factual_score = clf.predict(np.concatenate([test_dat["x"],
                                                test_dat['a']],axis=1))
    y_factual_cf_score = clf.predict(np.concatenate([test_dat["x_cf_uhat"],
                                                     test_dat['a_cf']],axis=1))
    
    y_factual = pcf_mix(y_factual_score, y_factual_cf_score, test_dat['a'].ravel())
    acc = mean_squared_error(test_dat["y"].ravel(), y_factual.ravel(), squared=False)

    # ======= counter pred ======= #
    y_counter_score = clf.predict(np.concatenate([test_dat["x_cf"],
                                                        test_dat['a_cf']],axis=1))
    y_counter_cf_score = clf.predict(np.concatenate([test_dat["x_cf_cf_uhat"],
                                                        test_dat['a']],axis=1))
    y_counter = pcf_mix(y_counter_score, y_counter_cf_score, test_dat['a_cf'].ravel(),is_cf=True)

    a = test_dat["a"]
    cf_effect, cf_effect0, cf_effect1 = cf_eval(y_factual, y_counter, a)

    return train_acc, acc, cf_effect, cf_effect0, cf_effect1, clf

def erm_ana_classifer(data_dict, 
                        dataset_type,
                        w_a,
                        ):

    test_dat = data_dict["test"]
    # ======= Testing ======= #

    # ======= factual pred ======= #
    if dataset_type == 'linear':
        y_factual = 2 * test_dat['x'] - w_a * test_dat['a']
    elif dataset_type == 'cubic':
        y_factual= test_dat['x'] ** 3 + test_dat['x'] - w_a * test_dat['a']
    else:
        raise ValueError('dataset_type not recognized')
    acc = mean_squared_error(test_dat["y"].ravel(), y_factual.ravel(), squared=False)

    # ======= counter pred ======= #
    if dataset_type == 'linear':
        y_counter = 2 * test_dat['x_cf'] - w_a * test_dat['a_cf']
    elif dataset_type == 'cubic':
        y_counter = test_dat['x_cf'] ** 3 + test_dat['x_cf'] - w_a * test_dat['a_cf']
    else:
        raise ValueError('dataset_type not recognized')
    cf_effect, cf_effect0, cf_effect1 = cf_eval(y_factual, y_counter, a = test_dat["a"])
    return None, acc, cf_effect, cf_effect0, cf_effect1, None

def pcf_ana_classifer(data_dict, 
                        dataset_type,
                        w_a,
                        ):

    test_dat = data_dict["test"]
    
    # ======= Testing ======= #

    # ======= factual pred ======= #
    if dataset_type == 'linear':
        y_factual_score = 2 * test_dat['x'] - w_a * test_dat['a']
        y_factual_cf_score = 2 * test_dat['x_cf_uhat'] - w_a * test_dat['a_cf']
    elif dataset_type == 'cubic':
        y_factual_score = test_dat['x'] ** 3 + test_dat['x'] - w_a * test_dat['a']
        y_factual_cf_score = test_dat['x_cf_uhat'] ** 3 + test_dat['x_cf_uhat'] - w_a * test_dat['a_cf']
    else:
        raise ValueError('dataset_type not recognized')
    y_factual = pcf_mix(y_factual_score.ravel(), y_factual_cf_score.ravel(), test_dat['a'].ravel())
    acc = mean_squared_error(test_dat["y"].ravel(), y_factual.ravel(), squared=False)

    # ======= counter pred ======= #
    if dataset_type == 'linear':
        y_counter_score = 2 * test_dat['x_cf'] - w_a * test_dat['a_cf']
        y_counter_cf_score = 2 * test_dat['x_cf_cf_uhat'] - w_a * test_dat['a']
    elif dataset_type == 'cubic':
        y_counter_score = test_dat['x_cf'] ** 3 + test_dat['x_cf'] - w_a * test_dat['a_cf']
        y_counter_cf_score = test_dat['x_cf_cf_uhat'] ** 3 + test_dat['x_cf_cf_uhat'] - w_a * test_dat['a']
    else:
        raise ValueError('dataset_type not recognized')
    y_counter = pcf_mix(y_counter_score.ravel(), y_counter_cf_score.ravel(), test_dat['a_cf'].ravel(),is_cf=True)
    cf_effect, cf_effect0, cf_effect1 = cf_eval(y_factual, y_counter, a = test_dat["a"])

    return None, acc, cf_effect, cf_effect0, cf_effect1, None

# Dataset

In [None]:
def gen_reg_dataset(dataset_type, 
                    w_epsx=0, 
                    w_epsy=1, 
                    w_a=1, 
                    a_freq=0.6,
                    num_samples=2000,
                    err_std=0,
                    err_bias=0,
                    seed=0):
    
    torch.manual_seed(seed)

    u_distribution = torch.distributions.Normal(0,1)
    a_distribution = torch.distributions.Bernoulli(torch.tensor([a_freq]))
    epsx_distribution = torch.distributions.Normal(0,1)
    epsy_distribution = torch.distributions.Normal(0,1)

    dataset = {}
    for split in ['train', 'test']:
        dataset[split] = {}
        u = u_distribution.sample((num_samples,1))
        a = a_distribution.sample((num_samples,))
        eps_x = epsx_distribution.sample((num_samples,1))
        eps_y = epsy_distribution.sample((num_samples,1))
        a_cf = 1-a

        if dataset_type == 'linear':
            x = w_a * a + u + w_epsx * eps_x
            x_cf = w_a*a_cf + u + w_epsx * eps_x
            y = x + u + w_epsy * eps_y

        elif dataset_type == 'cubic': 
            x = w_a * a + u + w_epsx * eps_x
            x_cf = w_a*a_cf + u + w_epsx * eps_x
            y = x**3 + u + w_epsy * eps_y
        else:
            raise ValueError('Invalid dataset type')
        
        dataset[split]['x'] = x
        dataset[split]['y'] = y
        dataset[split]['a'] = a
        dataset[split]['u'] = u
        dataset[split]['a_cf'] = a_cf
        dataset[split]['x_cf'] = x_cf

        # prepare data as algorithm input
        dataset[split]['u_hat'] = u + torch.randn_like(u) * err_std + err_bias
        dataset[split]['u_cf_hat'] = u + torch.randn_like(u) * err_std + err_bias
        dataset[split]['x_cf_uhat'] = x_cf + torch.randn_like(x_cf) * err_std + err_bias
        dataset[split]['x_cf_cf_uhat'] = x + torch.randn_like(x) * err_std + err_bias
        
    for split in ['train', 'test']:
        for key in dataset[split].keys():
            dataset[split][key] = dataset[split][key].numpy()
    return dataset

# Exp 1 - GT Estimation Error

In [None]:
def eval(all_res,
         dataset_type = 'linear',
            clf_name = 'Ridge',
            w_a = 1,
            w_epsx = 0,
            w_epsy = 1,
            a_freq = 0.7,
            err_std=0,
            err_bias=0):
    

    for repeat in list(range(5)):
        np.random.seed(repeat)
        dataset = gen_reg_dataset(dataset_type,
            w_epsx=w_epsx,
            w_epsy=w_epsy,
            w_a=w_a,
            a_freq=a_freq,
            err_std=err_std,
            err_bias=err_bias,
            seed=repeat)    
        
        for method, classifier in zip(['cfr', 'cfe','erm','pcf','pcfaug','ermana','pcfana'],
                                      [cfr_classifier, cfe_classifier, erm_classifier, pcf_classifier, pcfaug_classifier,
                                       erm_ana_classifer, pcf_ana_classifer]):
                if clf_name == 'mlp':
                    predictor = MLPRegressor(hidden_layer_sizes=(20,20),max_iter=2000,activation='tanh',random_state=repeat)
                elif clf_name == 'knn':
                    predictor = KNeighborsRegressor()
                else:
                    raise ValueError('Invalid clf_name')
                
                if method in ['ermana','pcfana']:
                    train_err, test_err, cf_effect, cf_effect0, cf_effect1, clf = classifier(dataset, dataset_type=dataset_type, w_a=w_a)
                else:
                    train_err, test_err, cf_effect, cf_effect0, cf_effect1, clf = classifier(dataset, predictor)

                res = dict()
                res['repeat'] = repeat
                res['dataset_type'] = dataset_type
                res['clf'] = clf_name
                res['w_a'] = w_a
                res['w_epsx'] = w_epsx
                res['w_epsy'] = w_epsy
                res['a_freq'] = a_freq
                res['method'] = method
                res['train_err'] = train_err
                res['test_err'] = test_err
                res['cf_effect'] = cf_effect
                res['cf_effect0'] = cf_effect0
                res['cf_effect1'] = cf_effect1
                res['std'] = err_std
                res['bias'] = err_bias

                all_res = all_res.append(res,ignore_index=True)
    return all_res


In [None]:

def vis_alg(all_res, save_dir=None, legend=False, mute=[]):
# Split the 'method' column to separate 'method' and 'group'
    
    all_res = all_res.copy()
    if len(mute)>0:
        for mm in mute:
            all_res = all_res[all_res['method']!=mm]
    replace_dict = {}
    replace_dict['cfe'] = 'CFU'
    replace_dict['cfr'] = 'CFR'
    replace_dict['erm'] = 'ERM'
    replace_dict['pcf'] = 'PCF'
    replace_dict['ermana'] = 'ERM-Ana'
    replace_dict['pcfana'] = 'PCF-Ana'
    all_res['method'] = all_res['method'].replace(replace_dict)

    all_res = all_res.groupby(by='method').mean().reset_index()    
    all_res['style'] = all_res['method']
    #all_res['style'] = all_res['method'] + '-' + all_res['group']

    # Define the plot size
    # if save_dir:
    #     save_dir = Path(save_dir)

    for col in ['cf_effect']:
        fig, ax = plt.subplots(figsize=(6,6))
        # Create the scatter plot with unique styles
        sns.scatterplot(data=all_res, x=col, y='test_err', style='method', hue='method', s=200, ax=ax, legend=legend)
        #ax.set_title(col)
        ax.set_xlabel(METRIC_DICT[col],fontsize=FONTSIZE)
        ax.set_ylabel('RMSE',fontsize=FONTSIZE)
        if legend:
            ax.legend(fontsize=FONTSIZE,markerscale=2, bbox_to_anchor=(0.95, 1), loc='upper left')
        plt.xticks(fontsize=FONTSIZE, rotation=30)
        plt.yticks(fontsize=FONTSIZE)
        if save_dir:
            #plt.savefig(save_dir / f'pcf_{col}.png', bbox_inches='tight',dpi=200)
            plt.savefig(f'{save_dir}_{col}.png', bbox_inches='tight',dpi=200)
            plt.show()
        else:
            plt.show()
        

In [None]:
res = pd.DataFrame()
dataset_type = 'linear'
clf_name = 'knn'

res = eval(res, 
        dataset_type=dataset_type, 
        clf_name=clf_name, 
        w_a=1, 
        w_epsx=0)

vis_alg(res, save_dir=f'./figures/synthetic/gt_{dataset_type}_{clf_name}',
        mute=['ermana','pcfana','pcfaug'])

res = pd.DataFrame()
dataset_type = 'cubic'
res = eval(res, 
        dataset_type=dataset_type, 
        clf_name=clf_name, 
        w_a=1, 
        w_epsx=0)

vis_alg(res, save_dir=f'./figures/synthetic/gt_{dataset_type}_{clf_name}',
        mute=['ermana','pcfana','pcfaug'])


In [None]:
res = pd.DataFrame()
dataset_type = 'linear'
clf_name = 'mlp'

res = eval(res, 
        dataset_type=dataset_type, 
        clf_name=clf_name, 
        w_a=1, 
        w_epsx=0)

vis_alg(res, save_dir=f'./figures/synthetic/gt_{dataset_type}_{clf_name}',
        mute=['ermana','pcfana','pcfaug'])

res = pd.DataFrame()
dataset_type = 'cubic'
res = eval(res, 
        dataset_type=dataset_type, 
        clf_name=clf_name, 
        w_a=1, 
        w_epsx=0)

vis_alg(res, save_dir=f'./figures/synthetic/gt_{dataset_type}_{clf_name}',
        mute=['ermana','pcfana','pcfaug'],
        legend=True)

# Exp 1.2 - CF Estimation Error  

## Variance

In [None]:

def vis_alg(all_res, save_dir=None, legend=False, mute=[]):
# Split the 'method' column to separate 'method' and 'group'
    
    all_res = all_res.copy()
    if len(mute)>0:
        for mm in mute:
            all_res = all_res[all_res['method']!=mm]
    replace_dict = {}
    replace_dict['cfe'] = 'CFU'
    replace_dict['cfr'] = 'CFR'
    replace_dict['erm'] = 'ERM'
    replace_dict['pcf'] = 'PCF'
    replace_dict['pcfaug'] = 'PCGAug'
    replace_dict['ermana'] = 'ERM-Ana'
    replace_dict['pcfana'] = 'PCF-Ana'
    all_res['method'] = all_res['method'].replace(replace_dict)

    all_res = all_res.groupby(by=['method','std']).mean().reset_index()    
    all_res['style'] = all_res['method']
    #all_res['style'] = all_res['method'] + '-' + all_res['group']

    # Define the plot size
    # if save_dir:
    #     save_dir = Path(save_dir)

    for col in ['cf_effect']:
        fig, ax = plt.subplots(figsize=(6,6))
        # Create the scatter plot with unique styles
        sns.scatterplot(data=all_res, x=col, y='test_err', style='method', hue='std', s=200, ax=ax, palette='deep', legend=legend)
        #ax.set_title(col)
        ax.set_xlabel(METRIC_DICT[col],fontsize=FONTSIZE)
        ax.set_ylabel('RMSE',fontsize=FONTSIZE)
        if legend:
            ax.legend(fontsize=FONTSIZE,markerscale=2, bbox_to_anchor=(0.95, 1), loc='upper left')
        plt.xticks(fontsize=FONTSIZE, rotation=30)
        plt.yticks(fontsize=FONTSIZE)
        if save_dir:
            #plt.savefig(save_dir / f'pcf_{col}.png', bbox_inches='tight',dpi=200)
            plt.savefig(f'{save_dir}_{col}.png', bbox_inches='tight',dpi=200)
            plt.show()
        else:
            plt.show()
        

### EST

In [None]:
clf_name = 'knn'
bias = 0

dataset_type = 'linear'
res = pd.DataFrame()
for err_std in [0,0.001,0.01,0.1]:
    res = eval(res, 
                dataset_type=dataset_type, 
                clf_name=clf_name, 
                w_a=1, 
                w_epsx=0,
                err_std=err_std,
                err_bias=bias)

vis_alg(res, save_dir=f'./figures/synthetic/eststd_b{bias}_{dataset_type}_{clf_name}',
        mute=['ermana','pcfaug','pcfana'])

dataset_type = 'cubic'
res = pd.DataFrame()
for err_std in [0,0.001,0.01,0.1]:
    res = eval(res, 
                dataset_type=dataset_type, 
                clf_name=clf_name, 
                w_a=1, 
                w_epsx=0,
                err_std=err_std,
                err_bias=bias)

vis_alg(res, save_dir=f'./figures/synthetic/eststd_b{bias}_{dataset_type}_{clf_name}',
        mute=['ermana','pcfaug','pcfana'])

In [None]:
clf_name = 'knn'
bias = 0.001

dataset_type = 'linear'
res = pd.DataFrame()
for err_std in [0,0.001,0.01,0.1]:
    res = eval(res, 
                dataset_type=dataset_type, 
                clf_name=clf_name, 
                w_a=1, 
                w_epsx=0,
                err_std=err_std,
                err_bias=bias)

vis_alg(res, save_dir=f'./figures/synthetic/eststd_b{bias}_{dataset_type}_{clf_name}',
        mute=['ermana','pcfaug','pcfana'])

dataset_type = 'cubic'
res = pd.DataFrame()
for err_std in [0,0.001,0.01,0.1]:
    res = eval(res, 
                dataset_type=dataset_type, 
                clf_name=clf_name, 
                w_a=1, 
                w_epsx=0,
                err_std=err_std,
                err_bias=bias)

vis_alg(res, save_dir=f'./figures/synthetic/eststd_b{bias}_{dataset_type}_{clf_name}',
        mute=['ermana','pcfaug','pcfana'])

### Analytic Solution

In [None]:
res = pd.DataFrame()
clf_name = 'knn'
dataset_type = 'linear'

for err_std in [0,0.001,0.01,0.1]:
    res = eval(res, 
                dataset_type=dataset_type, 
                clf_name=clf_name, 
                w_a=1, 
                w_epsx=0,
                err_std=err_std,
                err_bias=0)

vis_alg(res, save_dir=f'./figures/synthetic/eststdana_{dataset_type}_{clf_name}',
        mute=['ermana','erm','cfr','cfe','pcfaug'])

res = pd.DataFrame()
dataset_type = 'cubic'
for err_std in [0,0.001,0.01,0.1]:
    res = eval(res, 
                dataset_type=dataset_type, 
                clf_name=clf_name, 
                w_a=1, 
                w_epsx=0,
                err_std=err_std,
                err_bias=0)
    
vis_alg(res, save_dir=f'./figures/synthetic/eststdana_{dataset_type}_{clf_name}',
        mute=['ermana','erm','cfr','cfe','pcfaug'])

In [None]:
res = pd.DataFrame()
clf_name = 'knn'
bias = 0.001
dataset_type = 'linear'
for err_std in [0,0.001,0.01,0.1]:
    res = eval(res, 
                dataset_type=dataset_type, 
                clf_name=clf_name, 
                w_a=1, 
                w_epsx=0,
                err_std=err_std,
                err_bias=bias)

vis_alg(res, save_dir=f'./figures/synthetic/eststdana_b{bias}_{dataset_type}_{clf_name}',
        mute=['ermana','erm','cfr','cfe','pcfaug'])

res = pd.DataFrame()
dataset_type = 'cubic'
for err_std in [0,0.001,0.01,0.1]:
    res = eval(res, 
                dataset_type=dataset_type, 
                clf_name=clf_name, 
                w_a=1, 
                w_epsx=0,
                err_std=err_std,
                err_bias=bias)
    
vis_alg(res, save_dir=f'./figures/synthetic/eststdana_b{bias}_{dataset_type}_{clf_name}',
        mute=['ermana','erm','cfr','cfe','pcfaug'])