In [1]:
from typing import Dict
from sklearn.model_selection import train_test_split

from dataclasses import dataclass, field
from typing import cast, Optional

import torch
from datasets import load_dataset
from torch.utils.data import Dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    pipeline
)
from transformers.trainer_pt_utils import LabelSmoother
from transformers.integrations import deepspeed

from peft import PeftConfig, PeftModel

import os
IGNORE_TOKEN_ID = LabelSmoother.ignore_index
import sys
sys.path.append('/home/jupyter/TQL/')
from utils.token import hub_token
import pandas as pd
import numpy as np


def format_example(example):
    instruction = (
        "Given the context, answer the question by writing the appropriate SQL code."
    )
    q_header = "### Question"
    a_header = "### Answer"

    q = example

    return f"{instruction}\n\n{q_header}\n{q}\n\n{a_header}\n"

def get_gpu_memory(max_gpus=None):
    """Get available memory for each GPU."""
    import torch

    gpu_memory = []
    num_gpus = (
        torch.cuda.device_count()
        if max_gpus is None
        else min(max_gpus, torch.cuda.device_count())
    )

    for gpu_id in range(num_gpus):
        with torch.cuda.device(gpu_id):
            device = torch.cuda.current_device()
            gpu_properties = torch.cuda.get_device_properties(device)
            total_memory = gpu_properties.total_memory / (1024**3)
            allocated_memory = torch.cuda.memory_allocated() / (1024**3)
            available_memory = total_memory - allocated_memory
            gpu_memory.append(available_memory)
    return gpu_memory

def load_model(model_path, num_gpus, max_gpu_memory=None):
    
    kwargs = {"torch_dtype": torch.float16}
    if num_gpus != 1:
        kwargs["device_map"] = "auto"
        if max_gpu_memory is None:
            kwargs[
                "device_map"
            ] = "sequential"  # This is important for not the same VRAM sizes
            available_gpu_memory = get_gpu_memory(num_gpus)
            kwargs["max_memory"] = {
                i: str(int(available_gpu_memory[i] * 0.85)) + "GiB"
                for i in range(num_gpus)
            }
        else:
            kwargs["max_memory"] = {i: max_gpu_memory for i in range(num_gpus)}
        
        config = PeftConfig.from_pretrained(model_path)
        base_model_path = config.base_model_name_or_path
        tokenizer = AutoTokenizer.from_pretrained(
            base_model_path, use_fast=False, token = hub_token
        )
        base_model = AutoModelForCausalLM.from_pretrained(
            base_model_path,
            low_cpu_mem_usage=True,
            token = hub_token,
            **kwargs,
        )
        model = PeftModel.from_pretrained(base_model, model_path)
        
        return model, tokenizer, base_model

model, tokenizer, base_model = load_model('New_Model', torch.cuda.device_count(), max_gpu_memory=None)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

[2023-10-26 16:16:43,173] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



In [2]:
queryData = pd.read_csv('gs://data_tql/spider/processed/spiderTrainingData.csv')
question = queryData['question'][10]
sql = queryData['SQL'][10]

prompt = format_example(question)

pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])
print('-'*100)
print("Actual Query: ", sql)

The model 'PeftModel' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MptForCausalLM', 'MusicgenForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusForCausalLM', 'PLBartForCausalLM', 'ProphetNetForCausalLM', 'QDQBertLMHeadModel', 'ReformerModelWit

<s>[INST] Given the context, answer the question by writing the appropriate SQL code.

### Question
Given the Table department having columns as Department_ID has datatype number, Name has datatype text, Creation has datatype text, Ranking has datatype number, Budget_in_Billions has datatype number, Num_Employees has datatype number which has Department_ID and Given the Table head having columns as head_ID has datatype number, name has datatype text, born_state has datatype text, age has datatype number which has head_ID and Given the Table management having columns as department_ID has datatype number, head_ID has datatype number, temporary_acting has datatype text which has department_ID. Answer the question by writing the appropriate SQL code. How many acting statuses are there?

### Answer
 [/INST]  SELECT COUNT(*) FROM management WHERE temporary_acting = 'true';

### Explanation
The SQL code selects all rows from the management table where the temporary_acting column is true, whic