In [1]:
import numpy as np
from tqdm import tqdm

from gen_data import generate_data
from model import Model
import utils



In [11]:
np.random.seed(0)

In [2]:
def extract_weights(W, b, add_zero=False):
    weights = W.numpy()
    bias = b.numpy()
    if add_zero:
        weights = np.vstack((weights, np.zeros(1)))
    weights = np.vstack((weights, bias)).flatten()
    return weights

In [None]:
def run_model(train, test, loss_type):
    train = X_train, y_train
    test = X_test, y_test
    model = Model(X_train.shape[1])
    model.fit(X_train, y_train, loss_type)
    y_pred = model.predict(X_test)
    acc = utils.accuracy(y_test, y_pred)
    pseudo_feature_weight = model.W[-1]
    return acc, pseudo_feature_weight

In [3]:
def experiment(n_trials=10):
    accs_og = []
    accs_regularized = []
    accs_baseline = []
    pseudo_w_regularized = []
    pseudo_w_baseline = []
    for i in tqdm(range(n_trials)):
        data_in, data_in_out, data_original = generate_data()
        X_train_base, y_train_base, X_test_base, y_test_base = data_in
        X_train, y_train, X_test, y_test = data_in_out
        X_train_og, y_train_og, X_test_og, y_test_og = data_original
        
        acc, _ = run_model((X_train_og, y_train_og), 
                           (X_test_og, y_test_og), 
                           'bce')
        accs_og.append(acc)
        
        acc, pseudo_weight = run_model((X_train_base, y_train_base), 
                                       (X_test_base, y_test_base), 
                                       'bce')
        accs_baseline.append(acc)
        pseudo_w_baseline.append(pseudo_weight)

        
        acc, pseudo_weight = run_model((X_train, y_train), 
                                       (X_test, y_test), 
                                       'out')
        accs_regularized.append(acc)
        pseudo_w_regularized.append(pseudo_weight)
        
        
    baseline_stats = {'mean_acc': np.mean(accs_baseline), 
                      'std': np.std(accs_baseline),
                      'mean_pseudo_weight': np.mean(artifact_w_baseline)}
    
    og_stats = {'mean_acc': np.mean(accs_og), 
                      'std': np.std(accs_og)}
    
    regularized_stats = {'mean_acc': np.mean(accs_regularized), 
                      'std': np.std(accs_regularized),
                      'mean_pseudo_weight': np.mean(artifact_w_regularized)}
    
    return og_stats, baseline_stats, regularized_stats

In [4]:
og_stats, baseline_stats, regularized_stats = experiment(n_trials=100)

  0%|          | 0/100 [00:00<?, ?it/s]

Instructions for updating:
Use tf.identity instead.


100%|██████████| 100/100 [02:31<00:00,  1.51s/it]


In [5]:
og_stats

{'mean_acc': 0.9459375, 'std': 0.030572339929256315}

In [6]:
baseline_stats

{'mean_acc': 0.6646875,
 'std': 0.09242342692061359,
 'mean_artifact_weight': 4.211347101934146,
 'mean_distance': 4.961079748276202}

In [7]:
regularized_stats

{'mean_acc': 0.91203125,
 'std': 0.026484720683584713,
 'mean_artifact_weight': 0.5313282745580511,
 'mean_distance': 1.7987647188935287}