# Behavioral testing workflow

## Install necessary libraries for loading repo


In [None]:
%%capture
!pip install dvc fastds transformers
import os

## Set all credentials and download all necessary files/data for training

### Set up local repo and branch

In [None]:
# Clone repo with personal token (Settings -> Tokens -> Default Access Token)
!git clone https://{your_token}@dagshub.com/Omdena/NYU.git
%cd NYU/tasks/task-5-testing

### Set up DVC and git

In [None]:
%%bash
dvc remote add origin --local https://dagshub.com/Omdena/NYU.dvc
dvc remote modify --local origin auth basic
dvc remote modify --local origin user {your_username}
dvc remote modify --local origin password {your_token}

In [None]:
%%bash
git config --global user.email "{your_email}"
git config --global user.name "{your_username}"

### Pull trained model and model arguments. Also pulls behavioral testing data.

In [None]:
!dvc pull -r origin

## Create behavioral testing pipeline (*This should be run only if there's a change in the pipeline that we want to update in DagsHub*)

In [None]:
!dvc run -n behavioral_test \
-d ../task-4-language-transformer-models/workflow/model_artifacts/model_best.pt \
-d ../task-4-language-transformer-models/workflow/model_artifacts/args.pt \
-d 'test_data/Behavioral Testing - MFT-Script.csv' \
-d 'test_data/Behavioral Testing - MFT-Neutral.csv' \
-d 'test_data/Behavioral Testing - MFT-Adversarial.csv' \
-d 'test_data/Behavioral Testing - INV-Typos.csv' \
-d test.py \
-m results/test_results.json \
--force \
--no-run-cache \
python test.py

In [None]:
# Tells DagsHub to track data with dvc and upload it to repo
!dvc add test_data

In [None]:
!git add .gitignore results/.gitignore dvc.lock dvc.yaml test_data.dvc

In [None]:
!git status

In [None]:
!git commit -m "Add behavioral test pipeline"

In [None]:
!git push

In [None]:
# Push data to repo
!dvc push -r origin

## Run only behavioral_test stage in pipeline

If there is no change in saved model or input test data, the stage will skip and do nothing, since result is already cached.

The stage has to be specified with `-s behavioral_test` since this pipeline is connected to the output of the training pipeline. If you only run the command:

```
!dvc repro
```

then dvc will try to run the whole pipeline. Both the training stage (`train_eval`) and then the behavioral test (`behavioral_test`).


In [None]:
!dvc repro -s behavioral_test