In [1]:
from lr.models.transformers.processor import clean_df
from lr.models.transformers.train_functions import set_seed
from lr.models.transformers.BertWrapper import BertWrapper
from lr.text_processing.transformations.structural import entailment_internalization  # noqa
from lr.stats.h_testing import *
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from time import time
import shutil
import os

## get data

In [2]:
transformation = entailment_internalization

train = pd.read_csv("data/toy/train.csv")
dev_o = pd.read_csv("data/toy/dev.csv")
dev_t = transformation(dev_o)

train = clean_df(train, n_cores=8)
dev_o = clean_df(dev_o, n_cores=8)
dev_t = clean_df(dev_t, n_cores=8)

## Hyperparams

In [3]:
folder = "toy"

hyperparams = {"local_rank": -1,
               "max_seq_length": 200,
               "overwrite_cache": False,
               "num_train_epochs": 1.0,
               "per_gpu_train_batch_size": 32,
               "per_gpu_eval_batch_size": 50,
               "gradient_accumulation_steps": 1,
               "learning_rate": 5e-5,
               "weight_decay": 0.0,
               "adam_epsilon": 1e-8,
               "max_grad_norm": 1.0,
               "max_steps": 4,
               "warmup_steps": 0,
               "save_steps": 3,
               "no_cuda": False,
               "n_gpu": 1,
               "model_name_or_path": "bert",
               "output_dir": "bert_draft",
               "random_state": 42,
               "fp16": False,
               "fp16_opt_level": "01",
               "device": "cpu",
               "verbose": False,
               "model_type": "bert",
               "pad_on_left": False,
               "pad_token": 0,
               "n_cores": 7,
               'eval_sample_size': 100,
               "pad_token_segment_id": 0,
               "mask_padding_with_zero": True,
               "base_path": "data/{}/cached_".format(folder)}

## Selecting one data by DGP

In [4]:
set_seed(hyperparams["random_state"], 0)
dgp = DGP(train, transformation, rho=0.3)
train_ = dgp.sample()

## Performing the test

In [5]:
df_train = train_
df_dev = dev_o
df_dev_t = dev_t
S = 1000
ModelWrapper = BertWrapper

init = time()
transformer = ModelWrapper(hyperparams)

global_step, tr_loss, train_time = transformer.fit(df_train)

dev_results = transformer.get_results(df_dev, mode="test")
dev_t_results = transformer.get_results(df_dev_t, mode="test_t")

m_results = get_matched_results_transformers(dev_results, dev_t_results)
t_obs = get_paired_t_statistic(m_results)

# Generate S bootstrap replications
t_boots = []
for _ in range(S):
    boot_sample = get_boot_sample_under_H0(m_results)
    t = get_paired_t_statistic(boot_sample)
    t_boots.append(t)

# Get bootstrap p-value
t_boots = pd.Series(t_boots)
p_value = get_boot_p_value(t_boots, t_obs)

test_time = time() - init

## Checking Values

In [6]:
assert t_boots.sum() == 21.92981436880456
assert p_value == 0.604
assert t_obs == 0.5985858317644218
assert m_results.A.mean() == 0.36
assert m_results.B.mean() == 0.325

## Test main function

In [7]:
test_results =  h_test_transformer(df_train=train_,
                                   df_dev=dev_o,
                                   df_dev_t=dev_t,
                                   ModelWrapper=BertWrapper,
                                   hyperparams=hyperparams,
                                   S=1000)

In [8]:
assert test_results.observable_t_stats[0] == 0.5985858317644218
assert test_results.validation_accuracy[0] == 0.36
assert test_results.transformed_validation_accuracy[0] == 0.325
assert test_results.p_value[0] == 0.604

p_sum = (test_results[[c for c in test_results.columns if c.find("boot") >-1]]).sum(1)[0] 
assert p_sum == 21.92981436880456


In [9]:
hyperparams["random_state"] = 123

test_results =  h_test_transformer(df_train=train_,
                                   df_dev=dev_o,
                                   df_dev_t=dev_t,
                                   ModelWrapper=BertWrapper,
                                   hyperparams=hyperparams,
                                   S=1000)

In [10]:
test_results

Unnamed: 0,validation_accuracy,transformed_validation_accuracy,observable_t_stats,p_value,training_time,test_time,boot_t_1,boot_t_2,boot_t_3,boot_t_4,...,boot_t_991,boot_t_992,boot_t_993,boot_t_994,boot_t_995,boot_t_996,boot_t_997,boot_t_998,boot_t_999,boot_t_1000
0,0.435,0.325,2.227113,0.024,91.905859,153.323272,-0.594613,-0.583267,0.770943,-0.103698,...,1.907694,-0.412744,-0.606649,0.809445,-0.600541,0.0,-0.594613,0.778204,-0.304675,-1.216661
