In [71]:
%load_ext autoreload
%autoreload 2

import numpy as np
import torch

from cot.run import debug_parse_option
from transformers import AutoTokenizer, AutoModelForCausalLM
from cot.dataset import CoTDataset
from cot.learner import run_model

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [72]:
args = debug_parse_option(notebook=True)

tokenizer = AutoTokenizer.from_pretrained(args.model_id, cache_dir=args.hf_cache_dir)
# model = AutoModelForCausalLM.from_pretrained(args.model_id, cache_dir=args.hf_cache_dir)


In [73]:
tasks = ['nli', 'arithmetic']

for task in tasks:
# for task in tasks[1:]:
# for task in tasks[:1]:
    
    print(f'{task} task!')

    args = debug_parse_option(notebook=True)

    if task == 'arithmetic':
        args.dataset_name='arithmetic'
        args.arithmetic_task_name='3_digit_division'
        args.bigbench_explanations_dataset='arithmetic_3_digit_division'
        args.dataset_is_bigbench=True
    elif task == 'nli':
        args.dataset_name='presuppositions_as_nli'
        args.bigbench_explanations_dataset='presuppositions_as_nli'
        args.dataset_is_bigbench=True
        args.model_max_length=1015
    else:
        raise ValueError(f'Unknown task {task}')


    ablations = {
        1 : {'n_shot' : 0, 'step_by_step' : False, 'no_explanation' : False},
        2 : {'n_shot' : 0, 'step_by_step' : True,  'no_explanation' : False},
        3 : {'n_shot' : 1, 'step_by_step' : False, 'no_explanation' : True},
        4 : {'n_shot' : 1, 'step_by_step' : False, 'no_explanation' : False},
    }

    for k, args_dict in ablations.items():
        print(f'\n\nAblation {k} !')
        args.n_shot, args.step_by_step, args.no_explanation = args_dict.values()

        print(args)

        x = CoTDataset(args, tokenizer, 'train')[0]
        print('\n## untokenized_sample ##\n')
        print(x['untokenized_sample'])

        print('\n## labels_untokenized ##\n')
        print(x['labels_untokenized'])


nli task!


Ablation 1 !
Namespace(train=True, wandb_run=None, model_id='bigscience/bloom-1b1', hf_cache_dir='datadump/hf', debug=True, model_max_length=1015, preprocessed_dir='datadump/preprocessed', dataset_name='presuppositions_as_nli', dataset_is_bigbench=True, arithmetic_task_name='3_digit_division', bigbench_explanations_dataset='presuppositions_as_nli', bigbench_explanations_type='handtuned', bigbench_explanations_path='data/bigbench-explanations/', n_shot=0, rebuild_cache=True, shuffle_cots=False, step_by_step=False, no_explanation=False, qae=False, lr=0.001, max_epochs=1, batch_size=8, seed=666, lora_r=8, lora_alpha=32, lora_dropout=0.05, lora_bias='none')


Found cached dataset json (/home/lcur1112/.cache/huggingface/datasets/json/default-ba76fde3db5b84f4/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4)


  0%|          | 0/2 [00:00<?, ?it/s]

Loading cached processed dataset at /home/lcur1112/.cache/huggingface/datasets/json/default-ba76fde3db5b84f4/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4/cache-3469f907dcb2dada.arrow
Loading cached processed dataset at /home/lcur1112/.cache/huggingface/datasets/json/default-ba76fde3db5b84f4/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4/cache-5767d5db5f3e8e3d.arrow
Loading cached processed dataset at /home/lcur1112/.cache/huggingface/datasets/json/default-ba76fde3db5b84f4/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4/cache-1ed7528e33f9afaf.arrow
Loading cached processed dataset at /home/lcur1112/.cache/huggingface/datasets/json/default-ba76fde3db5b84f4/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4/cache-460a42fcdacbf8e9.arrow


Making a subset of the dataset

## untokenized_sample ##

Sentence 1:  Thirty-three students died before the storm moved on. the winds finally dissipated at 4:30 P.M. After ripping up half of Princeton, the winds finally dissipated at 4:30 P.M. 
Sentence 2: The students ripped up half of Princeton.
A:

## labels_untokenized ##

['contradiction']


Ablation 2 !
Namespace(train=True, wandb_run=None, model_id='bigscience/bloom-1b1', hf_cache_dir='datadump/hf', debug=True, model_max_length=1015, preprocessed_dir='datadump/preprocessed', dataset_name='presuppositions_as_nli', dataset_is_bigbench=True, arithmetic_task_name='3_digit_division', bigbench_explanations_dataset='presuppositions_as_nli', bigbench_explanations_type='handtuned', bigbench_explanations_path='data/bigbench-explanations/', n_shot=0, rebuild_cache=True, shuffle_cots=False, step_by_step=True, no_explanation=False, qae=False, lr=0.001, max_epochs=1, batch_size=8, seed=666, lora_r=8, lora_alpha=32, lora_dropout=0.05, lora_bi

Found cached dataset json (/home/lcur1112/.cache/huggingface/datasets/json/default-ba76fde3db5b84f4/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4)


  0%|          | 0/2 [00:00<?, ?it/s]

Loading cached processed dataset at /home/lcur1112/.cache/huggingface/datasets/json/default-ba76fde3db5b84f4/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4/cache-3469f907dcb2dada.arrow
Loading cached processed dataset at /home/lcur1112/.cache/huggingface/datasets/json/default-ba76fde3db5b84f4/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4/cache-5767d5db5f3e8e3d.arrow
Loading cached processed dataset at /home/lcur1112/.cache/huggingface/datasets/json/default-ba76fde3db5b84f4/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4/cache-1ed7528e33f9afaf.arrow
Loading cached processed dataset at /home/lcur1112/.cache/huggingface/datasets/json/default-ba76fde3db5b84f4/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4/cache-460a42fcdacbf8e9.arrow


Making a subset of the dataset

## untokenized_sample ##

Sentence 1:  Thirty-three students died before the storm moved on. the winds finally dissipated at 4:30 P.M. After ripping up half of Princeton, the winds finally dissipated at 4:30 P.M. 
Sentence 2: The students ripped up half of Princeton.
Let's think step by step.
A:

## labels_untokenized ##

['contradiction']


Ablation 3 !
Namespace(train=True, wandb_run=None, model_id='bigscience/bloom-1b1', hf_cache_dir='datadump/hf', debug=True, model_max_length=1015, preprocessed_dir='datadump/preprocessed', dataset_name='presuppositions_as_nli', dataset_is_bigbench=True, arithmetic_task_name='3_digit_division', bigbench_explanations_dataset='presuppositions_as_nli', bigbench_explanations_type='handtuned', bigbench_explanations_path='data/bigbench-explanations/', n_shot=1, rebuild_cache=True, shuffle_cots=False, step_by_step=False, no_explanation=True, qae=False, lr=0.001, max_epochs=1, batch_size=8, seed=666, lora_r=8, lora_alpha=32, 

Found cached dataset json (/home/lcur1112/.cache/huggingface/datasets/json/default-ba76fde3db5b84f4/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4)


  0%|          | 0/2 [00:00<?, ?it/s]

Loading cached processed dataset at /home/lcur1112/.cache/huggingface/datasets/json/default-ba76fde3db5b84f4/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4/cache-3469f907dcb2dada.arrow
Loading cached processed dataset at /home/lcur1112/.cache/huggingface/datasets/json/default-ba76fde3db5b84f4/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4/cache-5767d5db5f3e8e3d.arrow
Loading cached processed dataset at /home/lcur1112/.cache/huggingface/datasets/json/default-ba76fde3db5b84f4/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4/cache-1ed7528e33f9afaf.arrow
Loading cached processed dataset at /home/lcur1112/.cache/huggingface/datasets/json/default-ba76fde3db5b84f4/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4/cache-460a42fcdacbf8e9.arrow


Making a subset of the dataset

## untokenized_sample ##

Sentence 1:In 1975, Peacock didn't begin filming grizzly bears, hoping it would help him plead the bears' case. 
Sentence 2: Peacock didn't train grizzly bears before 1975.
A:neutral
Sentence 1:  Thirty-three students died before the storm moved on. the winds finally dissipated at 4:30 P.M. After ripping up half of Princeton, the winds finally dissipated at 4:30 P.M. 
Sentence 2: The students ripped up half of Princeton.
A:

## labels_untokenized ##

['contradiction']


Ablation 4 !
Namespace(train=True, wandb_run=None, model_id='bigscience/bloom-1b1', hf_cache_dir='datadump/hf', debug=True, model_max_length=1015, preprocessed_dir='datadump/preprocessed', dataset_name='presuppositions_as_nli', dataset_is_bigbench=True, arithmetic_task_name='3_digit_division', bigbench_explanations_dataset='presuppositions_as_nli', bigbench_explanations_type='handtuned', bigbench_explanations_path='data/bigbench-explanations/', n_shot=1, rebuild_

Found cached dataset json (/home/lcur1112/.cache/huggingface/datasets/json/default-ba76fde3db5b84f4/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4)


  0%|          | 0/2 [00:00<?, ?it/s]

Loading cached processed dataset at /home/lcur1112/.cache/huggingface/datasets/json/default-ba76fde3db5b84f4/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4/cache-3469f907dcb2dada.arrow
Loading cached processed dataset at /home/lcur1112/.cache/huggingface/datasets/json/default-ba76fde3db5b84f4/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4/cache-5767d5db5f3e8e3d.arrow
Loading cached processed dataset at /home/lcur1112/.cache/huggingface/datasets/json/default-ba76fde3db5b84f4/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4/cache-1ed7528e33f9afaf.arrow
Loading cached processed dataset at /home/lcur1112/.cache/huggingface/datasets/json/default-ba76fde3db5b84f4/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4/cache-460a42fcdacbf8e9.arrow


Making a subset of the dataset

## untokenized_sample ##

Sentence 1:In 1975, Peacock didn't begin filming grizzly bears, hoping it would help him plead the bears' case. 
Sentence 2: Peacock didn't train grizzly bears before 1975.
Explanation:It's possible that Peacock had been training grizzly bears previously even without filming them; so the relationship is neutral, sentence 1 neither entails nor contradicts sentence 2.
A:neutral
Sentence 1:  Thirty-three students died before the storm moved on. the winds finally dissipated at 4:30 P.M. After ripping up half of Princeton, the winds finally dissipated at 4:30 P.M. 
Sentence 2: The students ripped up half of Princeton.
Explanation:

## labels_untokenized ##

['contradiction']
arithmetic task!


Ablation 1 !
Namespace(train=True, wandb_run=None, model_id='bigscience/bloom-1b1', hf_cache_dir='datadump/hf', debug=True, model_max_length=512, preprocessed_dir='datadump/preprocessed', dataset_name='arithmetic', dataset_is_bigbench=True, ari

Found cached dataset bigbench (/home/lcur1112/CoT/datadump/hf/tasksource___bigbench/arithmetic/1.0.0/c5da5ac497141c7435da10444495b8577405d4ed01e524265b144a7063718c0c)


Making a subset of the dataset

## untokenized_sample ##

Q: What is 972 divided by 36?
  choice: 27
  choice: 43
  choice: banana
  choice: 4817
  choice: 86
  choice: 7
  choice: house
A:

## labels_untokenized ##

['27']


Ablation 2 !
Namespace(train=True, wandb_run=None, model_id='bigscience/bloom-1b1', hf_cache_dir='datadump/hf', debug=True, model_max_length=512, preprocessed_dir='datadump/preprocessed', dataset_name='arithmetic', dataset_is_bigbench=True, arithmetic_task_name='3_digit_division', bigbench_explanations_dataset='arithmetic_3_digit_division', bigbench_explanations_type='handtuned', bigbench_explanations_path='data/bigbench-explanations/', n_shot=0, rebuild_cache=True, shuffle_cots=False, step_by_step=True, no_explanation=False, qae=False, lr=0.001, max_epochs=1, batch_size=8, seed=666, lora_r=8, lora_alpha=32, lora_dropout=0.05, lora_bias='none')


Found cached dataset bigbench (/home/lcur1112/CoT/datadump/hf/tasksource___bigbench/arithmetic/1.0.0/c5da5ac497141c7435da10444495b8577405d4ed01e524265b144a7063718c0c)


Making a subset of the dataset

## untokenized_sample ##

Q: What is 972 divided by 36?
  choice: 27
  choice: 43
  choice: banana
  choice: 4817
  choice: 86
  choice: 7
  choice: house
Let's think step by step.
A:

## labels_untokenized ##

['27']


Ablation 3 !
Namespace(train=True, wandb_run=None, model_id='bigscience/bloom-1b1', hf_cache_dir='datadump/hf', debug=True, model_max_length=512, preprocessed_dir='datadump/preprocessed', dataset_name='arithmetic', dataset_is_bigbench=True, arithmetic_task_name='3_digit_division', bigbench_explanations_dataset='arithmetic_3_digit_division', bigbench_explanations_type='handtuned', bigbench_explanations_path='data/bigbench-explanations/', n_shot=1, rebuild_cache=True, shuffle_cots=False, step_by_step=False, no_explanation=True, qae=False, lr=0.001, max_epochs=1, batch_size=8, seed=666, lora_r=8, lora_alpha=32, lora_dropout=0.05, lora_bias='none')


Found cached dataset bigbench (/home/lcur1112/CoT/datadump/hf/tasksource___bigbench/arithmetic/1.0.0/c5da5ac497141c7435da10444495b8577405d4ed01e524265b144a7063718c0c)


Making a subset of the dataset

## untokenized_sample ##

Q:What is 842 divided by 1?
  choice: 456
  choice: 842
  choice: house
  choice: 14513
  choice: 1
  choice: banana
  choice: 820
A:842
Q: What is 972 divided by 36?
  choice: 27
  choice: 43
  choice: banana
  choice: 4817
  choice: 86
  choice: 7
  choice: house
A:

## labels_untokenized ##

['27']


Ablation 4 !
Namespace(train=True, wandb_run=None, model_id='bigscience/bloom-1b1', hf_cache_dir='datadump/hf', debug=True, model_max_length=512, preprocessed_dir='datadump/preprocessed', dataset_name='arithmetic', dataset_is_bigbench=True, arithmetic_task_name='3_digit_division', bigbench_explanations_dataset='arithmetic_3_digit_division', bigbench_explanations_type='handtuned', bigbench_explanations_path='data/bigbench-explanations/', n_shot=1, rebuild_cache=True, shuffle_cots=False, step_by_step=False, no_explanation=False, qae=False, lr=0.001, max_epochs=1, batch_size=8, seed=666, lora_r=8, lora_alpha=32, lora_dropout=0.05, l

Found cached dataset bigbench (/home/lcur1112/CoT/datadump/hf/tasksource___bigbench/arithmetic/1.0.0/c5da5ac497141c7435da10444495b8577405d4ed01e524265b144a7063718c0c)


Making a subset of the dataset

## untokenized_sample ##

Q:What is 842 divided by 1?
  choice: 456
  choice: 842
  choice: house
  choice: 14513
  choice: 1
  choice: banana
  choice: 820
Explanation:Dividing any number by 1 does not change it, so 842 / 1 = 842.
A:842
Q: What is 972 divided by 36?
  choice: 27
  choice: 43
  choice: banana
  choice: 4817
  choice: 86
  choice: 7
  choice: house
Explanation:

## labels_untokenized ##

['27']
