# Test

In [2]:
%load_ext autoreload
%autoreload 2

### Import datasets

Import datasets using functions from src/data/data.py. Datasets are downloaded from huggingface and stored in /data. Once downloaded, datasets are loaded locally.

Run ```pip install -e .``` if module importing isn't working.

In [3]:
from src.data.data import get_in_domain, get_out_domain
from src.data.data import get_random_subsets

in_domain = get_in_domain()
out_domain = get_out_domain()

print(f"In domain:\n{in_domain}")
print(in_domain[0])

print(f"Out of domain:\n{out_domain}")
print(out_domain[10])

# get_random_subsets(in_domain)

In domain:
Dataset({
    features: ['premise', 'hypothesis', 'label', 'idx'],
    num_rows: 261802
})
{'premise': 'you know during the season and i guess at at your level uh you lose them to the next level if if they decide to recall the the parent team the Braves decide to call to recall a guy from triple A then a double A guy goes up to replace him and a single A guy goes up to replace him', 'hypothesis': 'You lose the things to the following level if the people recall.', 'label': 0, 'idx': 1}
Out of domain:
Dataset({
    features: ['premise', 'hypothesis', 'label', 'parse_premise', 'parse_hypothesis', 'binary_parse_premise', 'binary_parse_hypothesis', 'heuristic', 'subcase', 'template'],
    num_rows: 10000
})
{'premise': 'The president avoided the athlete .', 'hypothesis': 'The athlete avoided the president .', 'label': 1, 'parse_premise': '(ROOT (S (NP (DT The) (NN president)) (VP (VBD avoided) (NP (DT the) (NN athlete))) (. .)))', 'parse_hypothesis': '(ROOT (S (NP (DT The) (NN at

### Import models

Import models using methods from src/models/opt.py. Models are downloaded from huggingface and stored in /models/pretrained. Once downloaded, models are loaded locally.

In [4]:
from src.model.model import get_model

model_opt125, tokenizer_opt125 = get_model('opt-125m')
model_opt350, tokenizer_opt350 = get_model('opt-350m')

### Few-shot finetuning

Perform few-shot fine-tuning using the fine_tune method from /src/finetuners/fewshot.py. This method requires model, tokenizers, and datasets to be passed in as parameters. It returns training and evaluation metrics.

In [5]:
from src.finetuners.fewshot import fine_tune

train_dataset = in_domain.select(range(2))
eval_dataset = out_domain.select(range(2))

fine_tune(model=model_opt125, tokenizer=tokenizer_opt125, train_dataset=train_dataset, eval_dataset=eval_dataset)

100%|██████████| 40/40 [00:11<00:00,  3.62it/s]


{'train_runtime': 11.0397, 'train_samples_per_second': 7.247, 'train_steps_per_second': 3.623, 'train_loss': 0.061122357845306396, 'epoch': 40.0, 'train_peak_memory_gb': 2.502718925476074}


100%|██████████| 1/1 [00:02<00:00,  2.59s/it]


{'train_runtime': 11.0397,
 'train_samples_per_second': 7.247,
 'train_steps_per_second': 3.623,
 'train_loss': 0.061122357845306396,
 'epoch': 40.0,
 'train_peak_memory_gb': 2.502718925476074,
 'eval_loss': 5.777809143066406,
 'eval_accuracy': 0.0,
 'eval_runtime': 2.6514,
 'eval_samples_per_second': 0.754,
 'eval_steps_per_second': 0.377,
 'eval_peak_memory_gb': 1.5613317489624023}

### Batch few-shot finetuning

Perform batch few-shot fine-tuning using batch_fine_tune from /src/finetuners/fewshot.py. This method only requires model_name, datasets, sample_sizes, and num_trials. The selected model is loaded from /models/pretrained for each trial to prevent cumulative fine-tuning. sample_sizes corresponds to the number of shots used for each trial. Each trial is trained using data randomly selected from the train_dataset (in domain) by get_random_subsets from /src/utils.py. Results are written to a csv in /logs.

In [6]:
from src.finetuners.fewshot import batch_fine_tune
import json

eval_dataset = out_domain.select(range(10)) # This needs to be changed... how much of the OOD should we use? 50 (pg. 6)

results = batch_fine_tune(model_name='opt-125m', train_dataset=in_domain, eval_dataset=eval_dataset, sample_sizes=[2, 4], num_trials=5)

print(json.dumps(results, indent=4))

2-shot: 100%|██████████| 5/5 [01:18<00:00, 15.71s/it, train_runtime=11.9, train_samples_per_second=6.74, train_steps_per_second=3.37, total_flos=2.09e+13, train_loss=0.0808, epoch=40, train_peak_memory_gb=2.99, eval_loss=0.584, eval_accuracy=0.7, eval_runtime=2.63, eval_samples_per_second=3.8, eval_steps_per_second=0.759, eval_peak_memory_gb=2.45] 
4-shot: 100%|██████████| 5/5 [02:07<00:00, 25.46s/it, train_runtime=21.4, train_samples_per_second=7.46, train_steps_per_second=1.87, total_flos=4.18e+13, train_loss=0.122, epoch=40, train_peak_memory_gb=4.05, eval_loss=0.599, eval_accuracy=0.6, eval_runtime=2.57, eval_samples_per_second=3.89, eval_steps_per_second=0.778, eval_peak_memory_gb=2.45]

{
    "2": [
        {
            "train_runtime": 9.2539,
            "train_samples_per_second": 8.645,
            "train_steps_per_second": 4.323,
            "total_flos": 20903740047360.0,
            "train_loss": 0.05942646861076355,
            "epoch": 40.0,
            "train_peak_memory_gb": 2.9804773330688477,
            "eval_loss": 0.00859286542981863,
            "eval_accuracy": 1.0,
            "eval_runtime": 2.557,
            "eval_samples_per_second": 3.911,
            "eval_steps_per_second": 0.782,
            "eval_peak_memory_gb": 2.4387941360473633
        },
        {
            "train_runtime": 9.6594,
            "train_samples_per_second": 8.282,
            "train_steps_per_second": 4.141,
            "total_flos": 20903740047360.0,
            "train_loss": 0.08801689147949218,
            "epoch": 40.0,
            "train_peak_memory_gb": 2.9860925674438477,
            "eval_loss": 0.6015669107437134,
            "eval_accuracy": 0.7,
            


