In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
!nvidia-smi

Wed Apr 10 09:32:18 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA L4                      Off | 00000000:00:03.0 Off |                    0 |
| N/A   34C    P8              12W /  72W |      1MiB / 23034MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [16]:
!pip install -q -U transformers torch==2.2.1 datasets huggingface_hub wandb
!pip install -q -U accelerate bitsandbytes peft trl python-dotenv
!echo "Installations completed!"

"Installations completed!"


In [17]:
import transformers
import datasets
import trl
import accelerate
import peft
import bitsandbytes
import torch
import huggingface_hub
import wandb

print("transformers version:", transformers.__version__)
print("datasets version:", datasets.__version__)
print("trl version:", trl.__version__)
print("accelerate version:", accelerate.__version__)
print("peft version:", peft.__version__)
print("bitsandbytes version:", bitsandbytes.__version__)
print("torch version:", torch.__version__)
print("huggingface_hub version:", huggingface_hub.__version__)
print("wandb version:", wandb.__version__)

transformers version: 4.39.3
datasets version: 2.18.0
trl version: 0.8.2
accelerate version: 0.29.2
peft version: 0.10.0
bitsandbytes version: 0.43.0
torch version: 2.2.1+cpu
huggingface_hub version: 0.22.2
wandb version: 0.16.6


In [None]:
from datasets import load_dataset, Dataset
from datasets.exceptions import DatasetNotFoundError

from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments
from torch import bfloat16

from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model, PeftModel
from trl import SFTTrainer

from huggingface_hub import notebook_login, logging
import wandb

In [None]:
try:
    from google.colab import drive
    import os
    
    print("Using Google Colab")
    drive.mount('/content/drive')
    os.chdir('/content/drive/MyDrive/SQL-Query-Generator/')
    
    load_dotenv()

    huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
    wandb_api_key = os.environ.get("WANDB_API_KEY")

    print(huggingface_token)

    wandb.login(key=wandb_api_key)
    
except ModuleNotFoundError:
    notebook_login()
    logging.set_verbosity(logging.CRITICAL)
    wandb.login()
    print("Using other environment")

In [None]:
def load_data(use_my_dataset=True):
  try:
    if use_my_dataset:
      print("Using fawern/Text-to-sql-query-generation")
      dataset = load_dataset("fawern/Text-to-sql-query-generation", split='train')
      print(dataset[0])
    else:
      raise DatasetNotFoundError

  except DatasetNotFoundError:
    print("Clinton/Text-to-sql-v1")
    dataset = load_dataset("Clinton/Text-to-sql-v1", split='train')

    print(dataset[0])

    def get_prompt(text):
      input_text = text['instruction']
      output_text = text['response']

      prompt = f""" <s> [INST] You are a SQL query generator (text-to-sql). Your task is to generate a SQL query from the given question.
      Question : {input_text} [/INST] SQL Query : {output_text} </s>"""
      return {'prompt' : prompt}

    dataset = dataset.map(get_prompt, remove_columns=dataset.features)
    dataset.push_to_hub("fawern/Text-to-sql-query-generation")

  train_rate = int(len(dataset) * 0.8)

  train_dataset = Dataset.from_dict(dataset[:train_rate])
  val_dataset = Dataset.from_dict(dataset[train_rate:])

  return train_dataset, val_dataset

train_dataset, val_dataset = load_data()

In [None]:
base_model_name = 'mistralai/Mistral-7B-v0.1'

tokenizer = AutoTokenizer.from_pretrained(base_model_name)
tokenizer.add_eos_token = True
tokenizer.padding_side = 'right'
tokenizer.pad_token = tokenizer.eos_token

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=False,
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    quantization_config=bnb_config,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

model.gradient_checkpointing_enable()

model = prepare_model_for_kbit_training(model)

tokenizer_config.json:   0%|          | 0.00/967 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

In [None]:
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
        "lm_head",
    ],
    bias="none",
    lora_dropout=0.05,
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, lora_config)

In [None]:
training_arguments = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_strategy="steps",
    save_steps=25,
    logging_steps=25,
    learning_rate=2e-4,
    weight_decay=0.001,
    max_steps=50,
    evaluation_strategy="steps",
    eval_steps=25,
    do_eval=True,
    report_to="wandb",
)

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    args=training_arguments,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    peft_config=lora_config,
    dataset_text_field="prompt",
)

model.config.use_cache = False

Map:   0%|          | 0/209766 [00:00<?, ? examples/s]

Map:   0%|          | 0/52442 [00:00<?, ? examples/s]

In [None]:
trainer.train()

Step,Training Loss,Validation Loss


KeyboardInterrupt: 

In [None]:
trained_model_name = "mistral-7B-SQL-query-generator"
trainer.model.push_to_hub(trained_model_name)
tokenizer.push_to_hub(trained_model_name)

In [None]:
# wandb.finish()
# model.config.use_cache=True

In [1]:
from peft import PeftConfig, PeftModel

from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig
)
from torch import bfloat16
from transformers import pipeline, logging

  warn("The installed version of bitsandbytes was compiled without GPU support. "


function 'cadam32bit_grad_fp32' not found


In [2]:
trained_model_name = "fawern/mistral-7B-SQL-query-generator"

config = PeftConfig.from_pretrained(trained_model_name)

In [3]:
model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    return_dict=True,
    load_in_4bit=True,
    device_map='auto'
)

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


ImportError: Using `bitsandbytes` 8-bit quantization requires Accelerate: `pip install accelerate` and the latest version of bitsandbytes: `pip install -i https://pypi.org/simple/ bitsandbytes`

In [None]:
tokenizer = AutoTokenizer.from_pretrained(
    config.base_model_name_or_path,
    padding_size='right',
    add_eos_token=True,
)
tokenizer.pad_token = tokenizer.eos_token

In [None]:
sql_model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    return_dict=True,
    load_in_4bit=True,
    device_map='auto'
)

In [4]:
# Load model directly
from transformers import AutoModel
model = AutoModel.from_pretrained("fawern/mistral-7B-SQL-query-generator")

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

KeyboardInterrupt: 

In [None]:
fine_tuned_model = PeftModel.from_pretrained(model, trained_model_name)

In [None]:
logging.set_verbosity(logging.CRITICAL)

In [None]:
pipe = pipeline(
    'text-generation',
    model=fine_tuned_model,
    tokenizer=tokenizer,
    eos_token_id=tokenizer.eos_token_id,
    max_length=500,
    device_map='auto',
)

In [None]:
prompt = """<s>[INST]
You are a SQL query generator (text-to-sql). Your task is to generate a SQL query from the given question.
Question: bana yaşı 10 dan büyük olan öğrencileri getir[/INST]
"""

result = pipe(prompt)
generated = result[0]['generated_text']
print(generated)