In [1]:
import time
import sys
import torch
import transformers
import numpy as np
from pathlib import Path
from torch.utils.data import DataLoader, RandomSampler, DistributedSampler, SequentialSampler
from src.options import Options

import src.slurm
import src.util
import src.evaluation
import src.data
import src.model


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
print(torch.__version__)

1.11.0+cu113


In [3]:
def train(model, optimizer, scheduler, step, train_dataset, eval_dataset, opt, collator, best_dev_em, checkpoint_path):

    if opt.is_main:
        try:
            tb_logger = torch.utils.tensorboard.SummaryWriter(Path(opt.checkpoint_dir)/opt.name)
        except:
            tb_logger = None
            logger.warning('Tensorboard is not available.')

    torch.manual_seed(opt.global_rank + opt.seed) #different seed for different sampling depending on global_rank
    train_sampler = RandomSampler(train_dataset)
    train_dataloader = DataLoader(
        train_dataset,
        sampler=train_sampler,
        batch_size=opt.per_gpu_batch_size,
        drop_last=True,
        num_workers=10,
        collate_fn=collator
    )

    loss, curr_loss = 0.0, 0.0
    epoch = 1
    model.train()
    while step < opt.total_steps:
        epoch += 1
        for i, batch in enumerate(train_dataloader):
            step += 1
            (idx, labels, _, context_ids, context_mask) = batch

            train_loss = model(
                input_ids=context_ids.cuda(),
                attention_mask=context_mask.cuda(),
                labels=labels.cuda()
            )[0]

            train_loss.backward()

            if step % opt.accumulation_steps == 0:
                torch.nn.utils.clip_grad_norm_(model.parameters(), opt.clip)
                optimizer.step()
                scheduler.step()
                model.zero_grad()

            train_loss = src.util.average_main(train_loss, opt)
            curr_loss += train_loss.item()

            if step % opt.eval_freq == 0:
                dev_em = evaluate(model, eval_dataset, tokenizer, collator, opt ,step)
                model.train()
                if opt.is_main:
                    # if dev_em > best_dev_em:
                    #     best_dev_em = dev_em
                    #     src.util.save(model, optimizer, scheduler, step, best_dev_em,
                    #               opt, checkpoint_path, 'best_dev')
                    log = f"{step} / {opt.total_steps} |"
                    log += f"train: {curr_loss/opt.eval_freq:.3f} |"
                    log += f"evaluation: {100:.2f}EM |"
                    log += f"lr: {scheduler.get_last_lr()[0]:.5f}"
                    logger.info(log)    
                    if tb_logger is not None:
                        tb_logger.add_scalar("Evaluation", dev_em, step)
                        tb_logger.add_scalar("Training", curr_loss / (opt.eval_freq), step)
                    curr_loss = 0.

            if opt.is_main and step % opt.save_freq == 0:
                src.util.save(model, optimizer, scheduler, step, best_dev_em,
                          opt, checkpoint_path, f"step-{step}")
            if step > opt.total_steps:
                break

def evaluate(model, dataset, tokenizer, collator, opt, step):
    sampler = SequentialSampler(dataset)
    dataloader = DataLoader(dataset,
        sampler=sampler,
        batch_size=opt.per_gpu_batch_size,
        drop_last=False,
        num_workers=10,
        collate_fn=collator
    )
    model.eval()
    total = 0
    exactmatch = []
    preds =[]
    target =[]
    model = model.module if hasattr(model, "module") else model
    with torch.no_grad():
        for i, batch in enumerate(dataloader):
            (idx, _, _, context_ids, context_mask) = batch

            outputs = model.generate(
                input_ids=context_ids.cuda(),
                attention_mask=context_mask.cuda(),
                max_length=50
            )

            for k, o in enumerate(outputs):
                ans = tokenizer.decode(o, skip_special_tokens=True)
                gold = dataset.get_example(idx[k])['answers']
                preds.append([ans]) 
                target.append(gold)
                    # log=ans
                    # log+=f"-------"
                    # log+=gold[0]
                    # log+=f"======="
                    # logger.info(log)
                # score = src.evaluation.ems(ans, gold)
                # total += 1
                # exactmatch.append(score)
        with open('answer2/golden_'+str(step)+'.txt','w') as f:
            for i,line in enumerate(target):
                f.write(str(i)+'\t'+line[0]+'\n')
        with open('answer2/preds_'+str(step)+'.txt','w') as f:
            for i,line in enumerate(preds):
                f.write(str(i)+'\t'+line[0]+'\n')

    exactmatch, total = src.util.weighted_average(np.mean(exactmatch), total, opt)
    return exactmatch

In [4]:
sys.argv

['/home/shifu/.conda/envs/fid4/lib/python3.8/site-packages/ipykernel_launcher.py',
 '--ip=127.0.0.1',
 '--stdin=9095',
 '--control=9093',
 '--hb=9092',
 '--Session.signature_scheme="hmac-sha256"',
 '--Session.key=b"4f8710b2-821a-4336-85c8-cd1f0c27bde2"',
 '--shell=9094',
 '--transport="tcp"',
 '--iopub=9096',
 '--f=/home/shifu/.local/share/jupyter/runtime/kernel-v2-3352m4eiONCdXpYg.json']

In [5]:
sys.argv=['/home/shifu/.conda/envs/fid2/lib/python3.8/site-packages/ipykernel_launcher.py'
    ,'--train_data'
    ,'/home/shifu/FiD/codefid_data3/train_php_10.json'
    ,'--eval_data'
    ,'/home/shifu/FiD/codefid_data3/test_php_10.json'
    ,'--model_path'
    ,'/home/shifu/FiD/checkpoint/my_code_fid4/checkpoint/step-3000'
    ,'--text_maxlength'
    ,'200'
    ,'--answer_maxlength'
    ,'100'
    ,'--save_freq'
    ,'1500'
    ,'--model_size'
    ,'base'
    ,'--per_gpu_batch_size'
    ,'10'
    ,'--total_steps'
    ,'1000000'
    ,'--n_context'
    ,'10'
    ,'--name'
    ,'my_code_fid5'
    ,'--checkpoint_dir'
    ,'checkpoint'
    ,'--use_checkpoint'
    ,'--eval_freq'
    ,'1500']


In [6]:
options = Options()
options.add_reader_options()
options.add_optim_options()
opt = options.parse()

In [7]:
torch.manual_seed(opt.seed)
src.slurm.init_distributed_mode(opt)
src.slurm.init_signal_handler()

In [8]:
checkpoint_path = Path(opt.checkpoint_dir)/opt.name
checkpoint_exists = checkpoint_path.exists()
if opt.is_distributed:
    torch.distributed.barrier()
checkpoint_path.mkdir(parents=True, exist_ok=True)

In [9]:
logger = src.util.init_logger(
    opt.is_main,
    opt.is_distributed,
    checkpoint_path / 'run.log'
)

model_name = 't5-' + opt.model_size
model_class = src.model.FiDT5


In [10]:
#load data
tokenizer = transformers.T5Tokenizer.from_pretrained(model_name)
collator = src.data.Collator(opt.text_maxlength, tokenizer, answer_maxlength=opt.answer_maxlength)

In [11]:
# use golbal rank and world size to split the eval set on multiple gpus
train_examples = src.data.load_data(
    opt.train_data, 
    global_rank=opt.global_rank, 
    world_size=opt.world_size,
)
train_dataset = src.data.Dataset(train_examples, opt.n_context)
# use golbal rank and world size to split the eval set on multiple gpus
eval_examples = src.data.load_data(
    opt.eval_data,
    global_rank=opt.global_rank,
    world_size=opt.world_size,
)
eval_dataset = src.data.Dataset(eval_examples, opt.n_context)

if not checkpoint_exists and opt.model_path == "none":
    t5 = transformers.T5ForConditionalGeneration.from_pretrained(model_name)
    model = src.model.FiDT5(t5.config)
    model.load_t5(t5.state_dict())
    model = model.to(opt.local_rank)
    optimizer, scheduler = src.util.set_optim(opt, model)
    step, best_dev_em = 0, 0.0
elif opt.model_path == "none":
    load_path = checkpoint_path / 'checkpoint' / 'latest'
    model, optimizer, scheduler, opt_checkpoint, step, best_dev_em = \
        src.util.load(model_class, load_path, opt, reset_params=False)
    logger.info(f"Model loaded from {load_path}")
else:
    model, optimizer, scheduler, opt_checkpoint, step, best_dev_em = \
        src.util.load(model_class, opt.model_path, opt, reset_params=True)
    logger.info(f"Model loaded from {opt.model_path}")

model.set_checkpoint(opt.use_checkpoint)

if opt.is_distributed:
    model = torch.nn.parallel.DistributedDataParallel(
        model,
        device_ids=[opt.local_rank],
        output_device=opt.local_rank,
        find_unused_parameters=False,
    )

# logger.info("Start training")
# train(
#     model,
#     optimizer,
#     scheduler,
#     step,
#     train_dataset,
#     eval_dataset,
#     opt,
#     collator,
#     best_dev_em,
#     checkpoint_path
# )


[06/25/2022 02:24:12] {util.py:75} INFO - Loading /home/shifu/FiD/checkpoint/my_code_fid4/checkpoint/step-3000
[06/25/2022 02:24:12] {configuration_utils.py:262} INFO - loading configuration file /home/shifu/FiD/checkpoint/my_code_fid4/checkpoint/step-3000/config.json
[06/25/2022 02:24:12] {configuration_utils.py:300} INFO - Model config T5Config {
  "architectures": [
    "FiDT5"
  ],
  "d_ff": 3072,
  "d_kv": 64,
  "d_model": 768,
  "decoder_start_token_id": 0,
  "dropout_rate": 0.1,
  "eos_token_id": 1,
  "initializer_factor": 1.0,
  "is_encoder_decoder": true,
  "layer_norm_epsilon": 1e-06,
  "model_type": "t5",
  "n_positions": 512,
  "num_heads": 12,
  "num_layers": 12,
  "output_past": true,
  "pad_token_id": 0,
  "relative_attention_num_buckets": 32,
  "task_specific_params": {
    "summarization": {
      "early_stopping": true,
      "length_penalty": 2.0,
      "max_length": 200,
      "min_length": 30,
      "no_repeat_ngram_size": 3,
      "num_beams": 4,
      "prefix": "

In [12]:
# train(
#     model,
#     optimizer,
#     scheduler,
#     step,
#     train_dataset,
#     eval_dataset,
#     opt,
#     collator,
#     best_dev_em,
#     checkpoint_path
# )

In [13]:
def Test_bleu(model, dataset, tokenizer, collator, opt):
    sampler = SequentialSampler(dataset)
    dataloader = DataLoader(dataset,
        sampler=sampler,
        batch_size=opt.per_gpu_batch_size,
        drop_last=False,
        num_workers=10,
        collate_fn=collator
    )
    model.eval()
    total = 0
    exactmatch = []
    preds =[]
    target =[]
    model = model.module if hasattr(model, "module") else model
    with torch.no_grad():
        for i, batch in enumerate(dataloader):
            # if i<1000:
            #     continue
            
            (idx, _, _, context_ids, context_mask) = batch

            outputs = model.generate(
                input_ids=context_ids.cuda(),
                attention_mask=context_mask.cuda(),
                max_length=100
            )

            for k, o in enumerate(outputs):
                ans = tokenizer.decode(o, skip_special_tokens=True)
                gold = dataset.get_example(idx[k])['answers']
                score = src.evaluation.ems(ans, gold)
                total += 1
                exactmatch.append(score)
                # print("=================")
                # print(ans)
                # print("-----------------")
                # print(gold)
            
                preds.append([ans]) 
                target.append(gold)
            if i%100==0:
                print(i)
            if i>100:
                break
    print(i)
    #exactmatch, total = src.util.weighted_average(np.mean(exactmatch), total, opt)
    return preds,target

In [None]:
sampler = SequentialSampler(eval_dataset)
dataloader = DataLoader(eval_dataset,
    sampler=sampler,
    batch_size=opt.per_gpu_batch_size,
    drop_last=False,
    num_workers=10,
    collate_fn=collator
)
for i, batch in enumerate(dataloader):
    b= batch
    break

In [14]:
preds,target = Test_bleu(model, eval_dataset, tokenizer, collator, opt)

0
100
200
300
400
500


KeyboardInterrupt: 

In [None]:
print(torch.__version__)

In [None]:
# sampler = SequentialSampler(train_dataset)
# dataloader = DataLoader(train_dataset,
#         sampler=sampler,
#         batch_size=opt.per_gpu_batch_size,
#         drop_last=False,
#         num_workers=10,
#         collate_fn=collator
#     )

In [None]:
# for i, batch in enumerate(dataloader):
    
#     (idx, _, _, context_ids, context_mask) = batch


#     gold = train_dataset.get_example(idx[0])['question']
#     print(gold)
#     if i >10:
#         break

In [None]:
target

In [None]:
preds

In [None]:
target[0]

In [None]:
with open('golden.txt','w') as f:
    for i,line in enumerate(target):
        f.write(str(i)+'\t'+line[0]+'\n')
with open('preds.txt','w') as f:
    for i,line in enumerate(preds):
        f.write(str(i)+'\t'+line[0]+'\n')

In [None]:
from datasets import load_metric
metric = load_metric("sacrebleu")
results = metric.compute(predictions=preds, references=target)
print(results["score"])

In [None]:
from datasets import load_metric
metric = load_metric("sacrebleu")

ans,gold =Test_bleu(model, eval_dataset, tokenizer, collator, opt)
results = metric.compute(predictions=ans, references=gold)
print(results["score"])