# Testing Fine-tuned T5 Model

The notebook is supposed to test a trained model before you run batch inference and performance computation. It does the following tasks:
* Load a fine-tuned T5 model
* Run inference on the loaded model

Note: If you want to run inference for the whole dataset, please configure and run the [t5_performance.py](t5_performance.py)

## Install Packages

In [1]:
#Install packages if needed
#Note: If you install, please restart your kernel before continuing
# pip install -r ../requirements.txt

## Import Packages

In [1]:
%load_ext autoreload
%autoreload 2

In [8]:
import sys
sys.path.append('../')

import os
import pandas as pd
import json
import argparse
from nlp import load_metric
from sklearn.model_selection import train_test_split
import pytorch_lightning as pl
from transformers import T5Tokenizer
import shutil
from sklearn.model_selection import train_test_split

#from model import T5FineTuner, set_seed
from src.step4.model.t5_performance import load_model, do_inference
import dataset as ds
from callback import LoggingCallback, logger
from config import model_params

ModuleNotFoundError: No module named 'model.model'; 'model' is not a package

In [None]:
MODEL_PATH = '/home/ec2-user/SageMaker/efs/deliverable_models/0607_wikisql_all_v0e4.ckpt'

### Model Loading

{'data_dir': '/home/ec2-user/SageMaker/efs/data/pilot_nl2sql_dev/t5_tuning2/input-level/1000/data',
 'output_dir': '/home/ec2-user/SageMaker/efs/data/pilot_nl2sql_dev/t5_tuning2/input-level/1000/models/',
 'model_name': 'mrm8488/t5-base-finetuned-wikiSQL',
 'tokenizer_name': 'mrm8488/t5-base-finetuned-wikiSQL',
 'max_input_length': 256,
 'max_output_length': 512,
 'freeze_encoder': False,
 'freeze_embeds': False,
 'learning_rate': 0.01,
 'weight_decay': 0.0,
 'adam_epsilon': 1e-08,
 'warmup_steps': 0,
 'train_batch_size': 32,
 'eval_batch_size': 32,
 'num_train_epochs': 5,
 'gradient_accumulation_steps': 1,
 'n_gpu': 8,
 'resume_from_checkpoint': None,
 'val_check_interval': 0.8,
 'n_train': -1,
 'n_val': -1,
 'n_test': -1,
 'early_stop_callback': False,
 'fp_16': False,
 'opt_level': 'O1',
 'max_grad_norm': 1.0,
 'seed': False,
 'automatic_optimization': True}

In [11]:
#Set random seed if needed
if model_params["seed"]:
    set_seed(model_params["seed"])

In [12]:
args = argparse.Namespace(**model_params)

In [15]:
checkpoint_callback = pl.callbacks.ModelCheckpoint(
    dirpath=args.output_dir, prefix="checkpoint", monitor="val_loss", mode="min", save_top_k=3
)

## If resuming from checkpoint, add an arg resume_from_checkpoint
train_params = dict(
    accumulate_grad_batches=args.gradient_accumulation_steps,
    gpus=args.n_gpu,
    max_epochs=args.num_train_epochs,
    precision= 16 if args.fp_16 else 32,
    amp_level=args.opt_level,
    resume_from_checkpoint=args.resume_from_checkpoint,
    gradient_clip_val=args.max_grad_norm,
    checkpoint_callback=checkpoint_callback,
    val_check_interval=args.val_check_interval,
    logger=None,
    callbacks=[LoggingCallback()],
    
    #newly added
    accelerator="dp", #"dp"|"ddp"|"ddp2"
#     automatic_optimization = False
)

In [16]:
model = T5FineTuner(args)

The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.


In [17]:
trainer = pl.Trainer(gpus=train_params, accelerator='dp')#**train_params)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores


In [19]:
model.automatic_optimization

True

In [None]:
trainer.fit(model)

## Model Inference

In [24]:
df_test = pd.read_csv(TEST_DATA_PATH)
df_test.head()

Unnamed: 0,question,Input,query,final_query_example
0,what's the counts of individuals got <ARG-DRUG...,"Counts of patients taking drug <ARG-DRUG><0>, ...",SELECT COUNT( DISTINCT dr1.person_id) FROM (((...,SELECT COUNT( DISTINCT dr1.person_id) FROM (((...
1,tell me what is the number of people who are o...,"Counts of patients taking drug <ARG-DRUG><0>, ...",SELECT COUNT( DISTINCT dr1.person_id) FROM (((...,SELECT COUNT( DISTINCT dr1.person_id) FROM (((...
2,What is the count of individuals who have trea...,"Counts of patients taking drug <ARG-DRUG><0>, ...",SELECT COUNT( DISTINCT dr1.person_id) FROM (((...,SELECT COUNT( DISTINCT dr1.person_id) FROM (((...
3,What is the count of patients who took all of ...,"Counts of patients taking drug <ARG-DRUG><0>, ...",SELECT COUNT( DISTINCT dr1.person_id) FROM (((...,SELECT COUNT( DISTINCT dr1.person_id) FROM (((...
4,tell me what is the number of individuals who ...,"Counts of patients taking drug <ARG-DRUG><0>, ...",SELECT COUNT( DISTINCT dr1.person_id) FROM (((...,SELECT COUNT( DISTINCT dr1.person_id) FROM (((...


In [None]:
question = df_test.iloc[0]['question']
question

In [None]:
batch_temp = model.tokenizer.batch_encode_plus(["SELECT people FROM peoples where age > 10"], max_length=150, 
                                                     padding='max_length', truncation=True, return_tensors="pt")

In [None]:
batch_temp['input_ids'].shape

In [None]:
def get_sql(question, model):
    #input_text = "translate English to SQL: %s </s>" % question
    input_text = "translate English to SQL: %s" % question
    inputs = model.tokenizer.batch_encode_plus([input_text], 
                                                 max_length=150,
                                                 padding='max_length', 
                                                 truncation=True, 
                                                 return_tensors="pt")

    model.to('cuda')
    outs = model.model.generate(
                inputs["input_ids"].cuda(),
                attention_mask=inputs["attention_mask"].cuda(),
                use_cache=False,
                # decoder_attention_mask=batch['target_mask'].cuda(),
                max_length=150, 
                num_beams=2,
                repetition_penalty=2.5, 
                length_penalty=1.0
                # early_stopping=True
            )

    sql = [model.tokenizer.decode(ids) for ids in outs][0]
    sql = sql.replace('<pad>', '')
    sql = sql.replace('</s>', '')
    sql = sql.strip()
    
    print(f'Question: {question}\nSQL: {sql}')
    
    return sql


def predict(row, model):
    """Predict query for a single dataframes row."""
    question = row['question'].strip()
    sql = get_sql(question, model)
    return sql     

In [None]:
#Predict sql query template for single question 
query = get_sql(question, model)

In [None]:
#Get predictions for multiple rows
df_test = df_test.iloc[:10]
df_test['prediction'] = df_test.apply(predict, args=(model,), axis=1)
df_test.head()

In [None]:
true_values = df_test['query'].values
pred_values = df_test['prediction'].values

### Compute Performance

#TODO: add sample entities for `acc_ex` function

In [None]:
acc_if(true_values, pred_values, token=True)

In [None]:
acc_ex(true_values, pred_values, entites=None)