# Inference notebook for Prompt tuning - NL2SQL

In [9]:
from transformers import AutoModelForCausalLM, AutoTokenizer, default_data_collator, get_linear_schedule_with_warmup
from peft import PeftModel,get_peft_config, get_peft_model, PromptTuningInit, PromptTuningConfig, TaskType, PeftType, PeftConfig
import torch

device = "cuda:0"
peft_model_id = "prompt_model/"
model_name_or_path = "bigcode/santacoder"

config = PeftConfig.from_pretrained(peft_model_id)
org_model = AutoModelForCausalLM.from_pretrained(model_name_or_path,trust_remote_code=True)
model = PeftModel.from_pretrained(org_model, peft_model_id)
model.eval()
model.to(device)


Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.


PeftModelForCausalLM(
  (base_model): GPT2LMHeadCustomModel(
    (transformer): GPT2CustomModel(
      (wte): Embedding(49280, 2048)
      (wpe): Embedding(2048, 2048)
      (drop): Dropout(p=0.1, inplace=False)
      (h): ModuleList(
        (0-23): 24 x GPT2CustomBlock(
          (ln_1): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
          (attn): GPT2MQAttention(
            (q_attn): Conv1D()
            (kv_attn): Conv1D()
            (c_proj): Conv1D()
            (attn_dropout): Dropout(p=0.1, inplace=False)
            (resid_dropout): Dropout(p=0.1, inplace=False)
          )
          (ln_2): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
          (mlp): GPT2MLP(
            (c_fc): Conv1D()
            (c_proj): Conv1D()
            (act): FastGELUActivation()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (ln_f): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
    )
    (lm_head): Linear(in_features=2048,

In [10]:
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
if tokenizer.pad_token_id is None:
    tokenizer.pad_token_id = tokenizer.eos_token_id

In [11]:
stop_words_ids = [tokenizer.encode(stop_word) for stop_word in ["\n"]]

## Inference

In [None]:
def infer(model,input_text):
    inputs = tokenizer(input_text,return_tensors="pt")
    with torch.no_grad():
        inputs = {k: v.to(device) for k, v in inputs.items()}
        outputs = model.generate(
            input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"], max_new_tokens=40, eos_token_id=stop_words_ids[0])
    
    return tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)

def parse(text):
    return text.split("SQL :")[1]

#### Example Inference with Soft prompts

In [8]:
input_text = """question : What is the average score of students in maths 
 Table Columns : id:INT,name:text,subject:Text,score:INT
 SQL : """
 
predictions = infer(model,input_text)
print(parse(predictions[0]))

Setting `pad_token_id` to `eos_token_id`:185 for open-end generation.


 SELECT AVG score FROM table WHERE subject = maths



In [26]:
input_text = """question : What is the highest score of dhoni in a match in chennai
 Table Columns : id:INT,player:text,runs:INT,match:INT,year:INT,city:text
 SQL : """
 
predictions = infer(model,input_text)
print(parse(predictions[0]))

Setting `pad_token_id` to `eos_token_id`:185 for open-end generation.


 SELECT MAX runs FROM table WHERE city = chennai AND player = dhoni



#### Inferencing the original model

In [19]:
org_model.eval()

input_text = """# write a python program to reverse text \ndef"""
 
predictions = infer(org_model,input_text)
print(predictions[0])

Setting `pad_token_id` to `eos_token_id`:185 for open-end generation.


# write a python program to reverse text 
def reverse_text(text):
    return text[::-1]

