<a href="https://colab.research.google.com/github/jai-llm/TEXT2SQL/blob/main/2b_Text2SQL_Evaluate_Llama2GPTQFineTunev3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 2b. Text2SQL Evaluate LLaMA2GPTQ Fine-Tune
Runs in Free Google Colab needs T4 GPU to run.

In [3]:
!pip install -q -U datasets
!pip install -q -U torch auto-gptq transformers optimum
!pip install -q -U peft trl einops accelerate xformers bitsandbytes
! pip install -q -U rouge_score
! pip install -q -U langchain

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.6/92.6 MB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[?25h

### Imports

In [4]:
import pandas as pd
import json
import torch
import os
import time

# In case Login Required For Model
# from huggingface_hub import login
# from dotenv import load_dotenv

from datasets import load_dataset, Dataset, load_metric, load_from_disk
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, pipeline
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model, AutoPeftModelForCausalLM
from transformers import GPTQConfig, Trainer, TrainingArguments, DataCollatorForLanguageModeling
from trl import SFTTrainer
from time import perf_counter
from rich import print

# LangChain Imports
from langchain.llms import HuggingFacePipeline
from langchain import PromptTemplate  #, LLMChain

# Imports for QA Retrieval Chain
from langchain.chains import RetrievalQA

# Import to Cleanup LLM Output
import textwrap

In [5]:
# If Login Required for Model Access
# load_dotenv("/notebooks/.env")
# os.environ["TOKENIZERS_PARALLELISM"]="false"
# login(token=os.getenv("HUGGINGFACE_TOKEN"))

### Global Constants

In [6]:
model_id = "TheBloke/Llama-2-7B-GPTQ"
# model_id = "TheBloke/Llama-2-7b-Chat-GPTQ"
MODEL_PATH = "/content/drive/MyDrive/Text2SQL/FTClean/"
MODEL_DIR = "sql_gptq_training"+"_30/"
checkpoint_name ="SQL_llama2_gptq_7b_peftv1_20230930_043049"

In [7]:
print(checkpoint_name)

In [8]:
# GDrive Location for Train/Test Data
DATA_PATH ="/content/drive/MyDrive/Text2SQL/Data/"
DS_DIR = "sql_train_test"
PKL_DIR = "test/"
PKL_FILE ="sql_test.pkl"
LLMRES_PKL_FILE="sql_test_llmres.pkl"

### Common Functions

In [9]:
def tokenize_function(example):
    return tokenizer(example["sentence1"], example["sentence2"], truncation=True)

In [10]:
rouge = load_metric("rouge")

def compute_metrics(pred):
    labels_ids = pred.label_ids
    pred_ids = pred.predictions

    # all unnecessary tokens are removed
    pred_str = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)
    labels_ids[labels_ids == -100] = tokenizer.pad_token_id
    label_str = tokenizer.batch_decode(labels_ids, skip_special_tokens=True)

    rouge_output = rouge.compute(predictions=pred_str, references=label_str, rouge_types=["rouge2"])["rouge2"].mid

    return {
        "rouge2_precision": round(rouge_output.precision, 4),
        "rouge2_recall": round(rouge_output.recall, 4),
        "rouge2_fmeasure": round(rouge_output.fmeasure, 4),
    }

  rouge = load_metric("rouge")


Downloading builder script:   0%|          | 0.00/2.17k [00:00<?, ?B/s]

In [11]:
def parse(text):
    start_marker = '### Response:'
    end_marker = '### End'
    start_index = text.find(start_marker)
    end_index = text.find(end_marker, start_index + len(start_marker))

    return (text[start_index + len(start_marker):].strip() if start_index != -1 and end_index == -1
            else text[start_index + len(start_marker):end_index].strip() if start_index != -1
            else None)

In [12]:
def gen_eval_res(test_df):
  eval_res = []
  time_res = []
  row_no = 1
  # tmp = test_df.iloc[:2]
  for row in test_df.itertuples(index=False):
    start_time = perf_counter()
    # print(row.text)
    text = row.text # test_df['text'][ID]
    llm_res = pipe(text)[0]['generated_text']
    parse_res = parse(llm_res)
    end_time = perf_counter()
    eval_res.append(parse_res)
    time_res.append(end_time-start_time)
    print(f"*** Row {row_no} Done ***")
    row_no += 1

  test_df['eval_res'] = eval_res
  test_df['eval_time'] = time_res
  # display(pd.DataFrame(eval_res))
  # display(pd.DataFrame(time_res))

### Load and Check Data

In [13]:
# Load Test & Training Data from Disk
dataset = load_from_disk(DATA_PATH + DS_DIR)

In [14]:
test_df = pd.read_pickle(DATA_PATH + PKL_DIR + PKL_FILE)

In [15]:
display(dataset['train'])
display(dataset['test'])

Dataset({
    features: ['question', 'context', 'response', 'text', '__index_level_0__'],
    num_rows: 4086
})

Dataset({
    features: ['question', 'context', 'response', 'text', '__index_level_0__'],
    num_rows: 454
})

In [16]:
display(test_df.head(2))
display(test_df.shape)

Unnamed: 0,question,context,response,__index_level_0__,text
0,Show the name of track and the number of races...,"CREATE TABLE track (name VARCHAR, track_id VAR...","SELECT T2.name, COUNT(*) FROM race AS T1 JOIN ...",429,### Instruction:\n You are a powerful text-...
1,Show names of shops and the carriers of device...,"CREATE TABLE shop (Shop_Name VARCHAR, Shop_ID ...","SELECT T3.Shop_Name, T2.Carrier FROM stock AS ...",2907,### Instruction:\n You are a powerful text-...


(454, 5)

### Load Model and Setup Trainer

In [17]:
output_dir = MODEL_PATH + MODEL_DIR + checkpoint_name
print(output_dir)

In [18]:
quantization_config_loading = GPTQConfig(bits=4, disable_exllama=True)

# Tokenizer - Base Model since we did not change it
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# LLM GPTQ Fine-Tune Model
# To perform inference on the test dataset
model = AutoPeftModelForCausalLM.from_pretrained(
          output_dir,
          low_cpu_mem_usage=True,
          return_dict=True,
          torch_dtype=torch.float16,
          device_map="cuda", # Forces model to use GPU
      )

Downloading (…)okenizer_config.json:   0%|          | 0.00/727 [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/411 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/784 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/3.90G [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

In [19]:
# Get Model Memory Footprint = ~4GB
print(model.get_memory_footprint()/1e9) # GB

In [20]:
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=180,
    do_sample = True,
    temperature=0.1,
    top_k=4,
    # top_p=0.95,
    repetition_penalty=1.2
)
#  penalty_alpha=0.5,
#     # do_sample = True,
#     top_k=1,
#     # temperature=0.1,
#     repetition_penalty=1.2,
#     max_new_tokens=180

llm = HuggingFacePipeline(pipeline=pipe)

The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MptForCausalLM', 'MusicgenForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusForCausalLM', 'PLBartForCausalLM', 'ProphetNetForCausalLM', 'QDQBertLMHeadModel', 'Refor

#### Check Model on Sample Test Question

In [21]:
ID = 10

In [22]:
test_df['text'][ID]

'### Instruction:\n    You are a powerful text-to-SQL model.     Your job is to answer questions about a database.     You are given a question and context regarding one or more tables.\n\n    You must output the SQL query that answers the question.\n\n    ### Input:\n    Show ids for all students who have advisor 1121.\n    ### Context:\n    CREATE TABLE Student (StuID VARCHAR, Advisor VARCHAR)\n    ### Response:\n    '

In [23]:
display(test_df['response'][ID])

'SELECT StuID FROM Student WHERE Advisor = 1121'

In [24]:
text = test_df['text'][ID]
llm_res = pipe(text)[0]['generated_text']
parse_res = parse(llm_res)
# print(llm_res)
print(parse_res)



### Get LLM Responses For All Questions

In [25]:
# def gen_eval_res(test_df):
#   eval_res = []
#   time_res = []
#   # tmp = test_df.iloc[:2]
#   for row in test_df.itertuples(index=False):
#     start_time = perf_counter()
#     # print(row.text)
#     text = row.text # test_df['text'][ID]
#     llm_res = pipe(text)[0]['generated_text']
#     parse_res = parse(llm_res)
#     end_time = perf_counter()
#     eval_res.append(parse_res)
#     time_res.append(end_time-start_time)

#   test_df['eval_res'] = eval_res
#   test_df['eval_time'] = time_res
#   # display(pd.DataFrame(eval_res))
#   # display(pd.DataFrame(time_res))

In [26]:
eval_start_time = perf_counter()
gen_eval_res(test_df) # test_df.iloc[:4]
eval_end_time = perf_counter()
print('Evaluation Took: ', round(eval_end_time-eval_start_time, 2))



In [27]:
test_df.to_pickle(DATA_PATH + PKL_DIR + LLMRES_PKL_FILE)

In [28]:
# display(test_df['response'][:4].values)

In [29]:
test_df2=pd.read_pickle(DATA_PATH + PKL_DIR + LLMRES_PKL_FILE)
display(test_df2.head(2))

Unnamed: 0,question,context,response,__index_level_0__,text,eval_res,eval_time
0,Show the name of track and the number of races...,"CREATE TABLE track (name VARCHAR, track_id VAR...","SELECT T2.name, COUNT(*) FROM race AS T1 JOIN ...",429,### Instruction:\n You are a powerful text-...,"SELECT T1.name, COUNT(*) FROM track AS T1 JOIN...",3.763995
1,Show names of shops and the carriers of device...,"CREATE TABLE shop (Shop_Name VARCHAR, Shop_ID ...","SELECT T3.Shop_Name, T2.Carrier FROM stock AS ...",2907,### Instruction:\n You are a powerful text-...,SELECT T1.Shop_name FROM stock AS T2 JOIN shop...,8.575436
