In [1]:
!nvidia-smi

Wed Mar  6 14:20:11 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 545.23.08              Driver Version: 545.23.08    CUDA Version: 12.3     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A100 80GB PCIe          On  | 00000000:00:06.0 Off |                    0 |
| N/A   33C    P0              43W / 300W |      4MiB / 81920MiB |      0%      Default |
|                                         |                      |             Disabled |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [2]:
%%capture
!pip install transformers datasets accelerate peft huggingface_hub hf_transfer flash-attn trl wandb -qU

In [3]:
import os
os.environ["HF_TOKEN"] = "<hftoken>"
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
os.environ["WANDB_API_KEY"] = "<w&bapikey>"
os.environ["WANDB_PROJECT"] = "7bsqlmaster"
os.environ["WANDB_NAME"] = "Mistral-Finetune" 

In [4]:
import torch
from IPython.display import Markdown
from transformers import AutoTokenizer, AutoModelForCausalLM, EarlyStoppingCallback
from peft import LoraConfig, get_peft_model
from datasets import load_dataset 
from transformers import TrainingArguments
from trl import SFTTrainer

In [5]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Is Bfloat16 avaiable?: {torch.cuda.is_bf16_supported()}")

Is Bfloat16 avaiable?: True


### 1. Load model and tokenizer

In [6]:
model_name = "mistralai/Mistral-7B-v0.1"

#### 1.1 Load model

In [7]:
model = AutoModelForCausalLM.from_pretrained(
    pretrained_model_name_or_path=model_name,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    attn_implementation="flash_attention_2"
)

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

#### 1.2 Load tokenizer

In [8]:
tokenizer = AutoTokenizer.from_pretrained(
    pretrained_model_name_or_path=model_name,
    padding_side="left"
)

tokenizer_config.json:   0%|          | 0.00/967 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

In [9]:
print(f"Vocabulary size of Mistral7B: {len(tokenizer.get_vocab()):,}")

Vocabulary size of Mistral7B: 32,000


In [10]:
tokenizer.special_tokens_map

{'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>'}

In [11]:
tokenizer.pad_token = tokenizer.unk_token

#### 1.3 Inferece test

In [12]:
generation_config = {
    "max_new_tokens": 100,
    "do_sample": True,
    "temperature": 1,
    "top_k": 100,
    "top_p":0.90,
    "pad_token_id": tokenizer.eos_token_id
}

In [13]:
input_text = "Write me a poem about Machine Learning."
input_ids = tokenizer(text=input_text, return_tensors="pt").to(device)
outputs = model.generate(**input_ids, **generation_config)
Markdown(tokenizer.decode(token_ids=outputs[0], skip_special_tokens=True))

Write me a poem about Machine Learning.

I think about words as a language.

I think about my body as a language.

I think about time as a language.

I think about music as a language.

I think about dreams as a language.

I think about algorithms as a language.

I think about writing as a language.

I think about poetry as a language.

I think about art as a language.

I think about my mind as a

### 2. Train data

#### 2.1. Load data

In [23]:
dataset = load_dataset("b-mc2/sql-create-context", split="train")

#### 2.2 Split into test and val

In [24]:
train_test_split = dataset.train_test_split(test_size=100, seed=1399, shuffle=True)
train_data = train_test_split["train"].shuffle()
val_data = train_test_split["test"].shuffle()
print(len(train_data), len(val_data))

78477 100


In [25]:
torch.manual_seed(42)
sample = train_data[torch.randint(low=0, high=len(train_data), size=(1,)).item()]

#### 2.2 Testing baseline inference

In [26]:
template = "You are a powerful text-to-SQL model. Your job is to answer questions about a database. You are given a question and context regarding one or more tables.\n\n" + \
"You must output the SQL query that answers the question.\n\n" + \
"### Input:\n" + \
"```{question}```\n\n" + \
"### Context:\n" + \
"```{context}```\n\n"
# "### Response:\n" + \
# "```{response}```"

In [27]:
Markdown(template.format(question=sample["question"], context=sample["context"]))

You are a powerful text-to-SQL model. Your job is to answer questions about a database. You are given a question and context regarding one or more tables.

You must output the SQL query that answers the question.

### Input:
```What is the sum of Total, when Silver is greater than 1, when Nation is Germany (GER), and when Gold is less than 1?```

### Context:
```CREATE TABLE table_name_87 (total INTEGER, gold VARCHAR, silver VARCHAR, nation VARCHAR)```



In [28]:
prompt = template.format(context=sample["context"], question=sample["question"])
input_ids = tokenizer(text=prompt, return_tensors="pt").to(device)
outputs = model.generate(**input_ids, **generation_config)

In [29]:
display(Markdown("#### Completion:"))
display(Markdown(tokenizer.decode(token_ids=outputs[0], skip_special_tokens=True).replace(prompt, "")))
display(Markdown("#### Answer:"))
Markdown(sample["answer"])

#### Completion:

```table_name_87
+----------+------------+-----------+---------+
| total    | gold       | silver    | nation  |
+----------+------------+-----------+---------+
|        2 | No         | Yes       | GER     |
|        5 | Yes        | No        | FIN     |
|        1 | No         | Yes       | USA     |
|        1 | No         | No        | GBR    

#### Answer:

SELECT SUM(total) FROM table_name_87 WHERE silver > 1 AND nation = "germany (ger)" AND gold < 1

#### 2.3 Creating template function

In [30]:
def formatting_func(example):
    template = "You are a powerful text-to-SQL model. Your job is to answer questions about a database. You are given a question and context regarding one or more tables.\n\n" + \
    "You must output the SQL query that answers the question.\n\n" + \
    "### Input:\n" + \
    "```{question}```\n\n" + \
    "### Context:\n" + \
    "```{context}```\n\n" + \
    "### Response:\n" + \
    "```{answer};```"

    text = template.format(context=example["context"], question=example["question"], answer=example["answer"])
    return text

In [31]:
Markdown(formatting_func(train_data[1]))

You are a powerful text-to-SQL model. Your job is to answer questions about a database. You are given a question and context regarding one or more tables.

You must output the SQL query that answers the question.

### Input:
```What college team did Derek Fisher play for?```

### Context:
```CREATE TABLE table_name_19 (school_club_team VARCHAR, player VARCHAR)```

### Response:
```SELECT school_club_team FROM table_name_19 WHERE player = "derek fisher";```

### 3. Parameter Efficient Fine-Tuning (PEFT) - LoRA

In [32]:
print(model)

MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralFlashAttention2(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): MistralRotaryEmbedding()
        )
        (mlp): MistralMLP(
          (gate_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): MistralRMSNorm()
        (post_attention_layernorm): MistralRMSNorm()
      )
    )
    (norm): MistralRMSNor

#### 3.1 Prepare LoRA Fine-Tuning

In [33]:
model.gradient_checkpointing_enable()
if model.config.to_dict()["use_cache"]:
    model.use_cache = False

In [34]:
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

In [35]:
peft_model = get_peft_model(model=model, peft_config=peft_config)

#### 3.2 Check trainable parameters

In [36]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [37]:
print_trainable_parameters(peft_model)

trainable params: 41943040 || all params: 7283675136 || trainable%: 0.5758499550960753


### 4. Train the model

In [38]:
args_definition = dict(
    output_dir="/mistral7bit-lora-sql",
    overwrite_output_dir=True,
    evaluation_strategy="steps",
    per_device_train_batch_size=8,
    gradient_accumulation_steps=4,
    learning_rate=3e-4,
    max_steps=500,
    lr_scheduler_type="cosine",
    max_grad_norm = 0.3,
    warmup_steps=100,
    logging_steps=20,
    save_steps=20,
    logging_first_step=True,
    seed=1399,
    bf16=True,
    report_to="wandb",
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    load_best_model_at_end=True
)
args = TrainingArguments(**args_definition)

In [39]:
trainer = SFTTrainer(
    model=peft_model,
    args=args,
    train_dataset=train_data,
    eval_dataset=val_data,
    tokenizer=tokenizer,
    peft_config=peft_config,
    formatting_func=formatting_func,
    max_seq_length=1024,
    packing=True,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=1)]
)

Generating train split: 0 examples [00:00, ? examples/s]

Generating train split: 0 examples [00:00, ? examples/s]



In [40]:
trainer.train()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mjj-ovalle[0m. Use [1m`wandb login --relogin`[0m to force relogin


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Step,Training Loss,Validation Loss
20,0.7533,0.516929
40,0.4806,0.43382
60,0.4285,0.405471
80,0.403,0.394427
100,0.3969,0.386949
120,0.3898,0.381256
140,0.3836,0.376557
160,0.3786,0.372552
180,0.3708,0.367515
200,0.3681,0.364338


TrainOutput(global_step=240, training_loss=0.4240016654133797, metrics={'train_runtime': 2596.2428, 'train_samples_per_second': 6.163, 'train_steps_per_second': 0.193, 'total_flos': 3.375021593670451e+17, 'train_loss': 0.4240016654133797, 'epoch': 0.68})

#### 4.1 Compare outputs

In [41]:
fine_tuned_model = peft_model.merge_and_unload()

In [42]:
prompt = template.format(context=sample["context"], question=sample["question"])
input_ids = tokenizer(text=prompt, return_tensors="pt").to(device)
outputs = fine_tuned_model.generate(**input_ids, **generation_config)

In [43]:
display(Markdown("#### Completion:"))
display(Markdown(tokenizer.decode(token_ids=outputs[0], skip_special_tokens=True)))
display(Markdown("#### Answer:"))
Markdown(sample["answer"])

#### Completion:

You are a powerful text-to-SQL model. Your job is to answer questions about a database. You are given a question and context regarding one or more tables.

You must output the SQL query that answers the question.

### Input:
```What is the sum of Total, when Silver is greater than 1, when Nation is Germany (GER), and when Gold is less than 1?```

### Context:
```CREATE TABLE table_name_87 (total INTEGER, gold VARCHAR, silver VARCHAR, nation VARCHAR)```

### Response:
```SELECT SUM(total) FROM table_name_87 WHERE silver > 1 AND nation = "germany (ger)" AND gold < 1;```

#### Answer:

SELECT SUM(total) FROM table_name_87 WHERE silver > 1 AND nation = "germany (ger)" AND gold < 1

#### 4.2 Performance on test set

In [44]:
not_tuned_model = AutoModelForCausalLM.from_pretrained(
    pretrained_model_name_or_path=model_name,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    attn_implementation="flash_attention_2"
)

fine_tuned_model.use_cache = True

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [45]:
def generate_responses(example, ft_model, og_model):
    prompt = template.format(context=example["context"], question=example["question"])
    input_ids = tokenizer(text=prompt, return_tensors="pt").to(device)
    ft_outputs = ft_model.generate(**input_ids, **generation_config)
    og_outputs = og_model.generate(**input_ids, **generation_config)

    display(Markdown("#### Prompt:"))
    display(Markdown(prompt))
    display(Markdown("#### Original Completion:"))
    display(Markdown(tokenizer.decode(token_ids=og_outputs[0], skip_special_tokens=True) \
           .replace(prompt, "")))
    display(Markdown("#### Fine-tuned Completion:"))
    display(Markdown(tokenizer.decode(token_ids=ft_outputs[0], skip_special_tokens=True) \
           .replace(prompt, "")))
    display(Markdown("#### Expected Answer:"))
    display(Markdown("`{answer}`".format(answer=example["answer"])))
    display(Markdown("-----------------------------"))

In [46]:
for i in range(5):
    generate_responses(val_data[i], ft_model=fine_tuned_model, og_model=not_tuned_model)

#### Prompt:

You are a powerful text-to-SQL model. Your job is to answer questions about a database. You are given a question and context regarding one or more tables.

You must output the SQL query that answers the question.

### Input:
```What Team has the Engine of Chevrolet 6.0 V8, the Manufacturer of Opel, and the Driver Chris Jackson?```

### Context:
```CREATE TABLE table_name_51 (team VARCHAR, driver VARCHAR, engine VARCHAR, manufacturer VARCHAR)```



#### Original Completion:

`table_name_51` has the following content:
```
team | driver | engine | manufacturer
---- | ------ | ------ | ------------
Porsche | Chris Jackson | Porsche 6.0 V12 | Porsche
McLaren | Lando Norris | Mercedes-AMG 1.6 V6 | McLaren
Aston Martin | Daniel Ricciardo | Mercedes-AMG 1.6 V

#### Fine-tuned Completion:

### Response:
```SELECT team FROM table_name_51 WHERE engine = "chevrolet 6.0 v8" AND manufacturer = "opel" AND driver = "chris jackson";```

#### Expected Answer:

`SELECT team FROM table_name_51 WHERE engine = "chevrolet 6.0 v8" AND manufacturer = "opel" AND driver = "chris jackson"`

-----------------------------

#### Prompt:

You are a powerful text-to-SQL model. Your job is to answer questions about a database. You are given a question and context regarding one or more tables.

You must output the SQL query that answers the question.

### Input:
```What record was set by walter brennan before 1941?```

### Context:
```CREATE TABLE table_name_30 (record_set VARCHAR, actor VARCHAR, year VARCHAR)```



#### Original Completion:

### Output:
```SELECT record_set, year FROM table_name_30 WHERE actor = "Walter Brennan" AND year < "1941"```

### Additional context (not necessary, but helpful):
```SELECT * FROM table_name_30 WHERE actor = "Walter Brennan"```


```SELECT * FROM table_name_30 WHERE year = "1941"```

## Examples

#### Fine-tuned Completion:

### Response:
```SELECT record_set FROM table_name_30 WHERE actor = "walter brennan" AND year < 1941;```

#### Expected Answer:

`SELECT record_set FROM table_name_30 WHERE actor = "walter brennan" AND year < 1941`

-----------------------------

#### Prompt:

You are a powerful text-to-SQL model. Your job is to answer questions about a database. You are given a question and context regarding one or more tables.

You must output the SQL query that answers the question.

### Input:
```What album came out before 2008 called We're not Made in the USA?```

### Context:
```CREATE TABLE table_name_93 (album VARCHAR, year VARCHAR, title VARCHAR)```



#### Original Completion:

### Answer:
```SELECT * FROM table_name_93 WHERE year < '2008' AND title = 'We're not Made in the USA'```


## Install
```npm install ask-me```

## Usage

```
const askMe = require("ask-me");
```

```
const q = "What album came out before 2008 called We'

#### Fine-tuned Completion:

### Response:
```SELECT album FROM table_name_93 WHERE year < 2008 AND title = "we're not made in the usa";```

#### Expected Answer:

`SELECT album FROM table_name_93 WHERE year < 2008 AND title = "we're not made in the usa"`

-----------------------------

#### Prompt:

You are a powerful text-to-SQL model. Your job is to answer questions about a database. You are given a question and context regarding one or more tables.

You must output the SQL query that answers the question.

### Input:
```What was the location and attendance when the record was 19-17?```

### Context:
```CREATE TABLE table_name_69 (location_attendance VARCHAR, record VARCHAR)```



#### Original Completion:

```INSERT INTO table_name_69 (location_attendance, record) VALUES('19-17', '12-29')```

```INSERT INTO table_name_69 (location_attendance, record) VALUES('19-19', '14-28')```

```INSERT INTO table_name_69 (location_attendance, record) VALUES('

#### Fine-tuned Completion:

### Response:
```SELECT location_attendance FROM table_name_69 WHERE record = "19-17";```

#### Expected Answer:

`SELECT location_attendance FROM table_name_69 WHERE record = "19-17"`

-----------------------------

#### Prompt:

You are a powerful text-to-SQL model. Your job is to answer questions about a database. You are given a question and context regarding one or more tables.

You must output the SQL query that answers the question.

### Input:
```What is the maximum power at rpm for the engine named 2.0 TDI that has a 1968cc displacement?```

### Context:
```CREATE TABLE table_name_90 (max_power_at_rpm VARCHAR, displacement VARCHAR, engine_name VARCHAR)```



#### Original Completion:

### Output:
```SELECT max_power_at_rpm FROM table_name_90 WHERE engine_name = '2.0 TDI' AND displacement = '1968cc'```

### Hint:
You can ask your prompt the following questions:
- What is the max power at rpm?
- What is the displacement?
- What is the engine name?




You are a powerful text

#### Fine-tuned Completion:

### Response:
```SELECT max_power_at_rpm FROM table_name_90 WHERE displacement = "1968cc" AND engine_name = "2.0 tdi";```

#### Expected Answer:

`SELECT max_power_at_rpm FROM table_name_90 WHERE displacement = "1968cc" AND engine_name = "2.0 tdi"`

-----------------------------

### 5. Save model

In [47]:
model_save_name = "mistral7b-ft-lora-sql-v2"

In [48]:
# Save model & tokenizer
fine_tuned_model.push_to_hub(model_save_name)
tokenizer.push_to_hub(model_save_name)

  0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

README.md:   0%|          | 0.00/5.18k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/jjovalle99/mistral7b-ft-lora-sql-v2/commit/c64bd2a19a22fb886406100e6607fb3a5e2a2144', commit_message='Upload tokenizer', commit_description='', oid='c64bd2a19a22fb886406100e6607fb3a5e2a2144', pr_url=None, pr_revision=None, pr_num=None)

In [49]:
# Save adapters
trainer.push_to_hub(model_save_name + "adapters")

  0%|          | 0/1 [00:00<?, ?it/s]

training_args.bin:   0%|          | 0.00/4.92k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/jjovalle99/mistral7bit-lora-sql/commit/6cbea05a7da64b9476e0610e2ed5d17055cd7a1d', commit_message='mistral7b-ft-lora-sql-v2adapters', commit_description='', oid='6cbea05a7da64b9476e0610e2ed5d17055cd7a1d', pr_url=None, pr_revision=None, pr_num=None)