In [1]:
!nvidia-smi

Wed Mar  6 18:26:24 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 545.23.08              Driver Version: 545.23.08    CUDA Version: 12.3     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A100 80GB PCIe          On  | 00000000:00:0A.0 Off |                    0 |
| N/A   39C    P0              47W / 300W |      4MiB / 81920MiB |      0%      Default |
|                                         |                      |             Disabled |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [2]:
%%capture
!pip install transformers datasets accelerate peft huggingface_hub hf_transfer flash-attn trl wandb -qU

In [3]:
import os
os.environ["HF_TOKEN"] = "<hftoken>"
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
os.environ["WANDB_API_KEY"] = "<w&bapikey>"
os.environ["WANDB_PROJECT"] = "7bsqlmaster"
os.environ["WANDB_NAME"] = "Llama2-Finetune" 

In [4]:
import torch
from IPython.display import Markdown
from transformers import AutoTokenizer, AutoModelForCausalLM, EarlyStoppingCallback
from peft import LoraConfig, get_peft_model
from datasets import load_dataset 
from transformers import TrainingArguments
from trl import SFTTrainer

In [5]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Is Bfloat16 avaiable?: {torch.cuda.is_bf16_supported()}")

Is Bfloat16 avaiable?: True


### 1. Load model and tokenizer

In [6]:
model_name = "meta-llama/Llama-2-7b-hf"

#### 1.1 Load model

In [7]:
model = AutoModelForCausalLM.from_pretrained(
    pretrained_model_name_or_path=model_name,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    attn_implementation="flash_attention_2"
)

config.json:   0%|          | 0.00/609 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

#### 1.2 Load tokenizer

In [8]:
tokenizer = AutoTokenizer.from_pretrained(
    pretrained_model_name_or_path=model_name,
    padding_side="left",
)

tokenizer_config.json:   0%|          | 0.00/776 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [9]:
print(f"Vocabulary size of Llama27B: {len(tokenizer.get_vocab()):,}")

Vocabulary size of Llama27B: 32,000


In [10]:
tokenizer.special_tokens_map

{'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>'}

In [11]:
tokenizer.pad_token = tokenizer.unk_token

#### 1.3 Inferece test

In [12]:
generation_config = {
    "max_new_tokens": 100,
    "do_sample": True,
    "temperature": 1,
    "top_k": 100,
    "top_p":0.90,
    "pad_token_id": tokenizer.eos_token_id
}

In [13]:
input_text = "Write me a poem about Machine Learning."
input_ids = tokenizer(text=input_text, return_tensors="pt").to(device)
outputs = model.generate(**input_ids, **generation_config)
Markdown(tokenizer.decode(token_ids=outputs[0], skip_special_tokens=True))

Write me a poem about Machine Learning.
Write me a poem about Machine Learning. It should be funny, sarcastic and/or mean. No need for rhyme.
I'm doing an article about Machine Learning and have a little trouble to come up with the title. This is my best effort but it is only one way to go. I thought a poem could give it a touch of humor and if you are willing to have a fun and give a few minutes of your time for this, I would appreciate it

### 2. Train data

#### 2.1. Load data

In [14]:
dataset = load_dataset("b-mc2/sql-create-context", split="train")

Downloading readme:   0%|          | 0.00/4.43k [00:00<?, ?B/s]

Downloading data: 100%|██████████| 21.8M/21.8M [00:00<00:00, 75.0MB/s]


Generating train split: 0 examples [00:00, ? examples/s]

#### 2.2 Split into test and val

In [15]:
train_test_split = dataset.train_test_split(test_size=100, seed=1399, shuffle=True)
train_data = train_test_split["train"].shuffle()
val_data = train_test_split["test"].shuffle()
print(len(train_data), len(val_data))

78477 100


In [16]:
torch.manual_seed(42)
sample = train_data[torch.randint(low=0, high=len(train_data), size=(1,)).item()]

#### 2.2 Testing baseline inference

In [17]:
template = "You are a powerful text-to-SQL model. Your job is to answer questions about a database. You are given a question and context regarding one or more tables.\n\n" + \
"You must output the SQL query that answers the question.\n\n" + \
"### Input:\n" + \
"```{question}```\n\n" + \
"### Context:\n" + \
"```{context}```\n\n"
# "### Response:\n" + \
# "```{response}```"

In [18]:
Markdown(template.format(question=sample["question"], context=sample["context"]))

You are a powerful text-to-SQL model. Your job is to answer questions about a database. You are given a question and context regarding one or more tables.

You must output the SQL query that answers the question.

### Input:
```What was the home team's score at Victoria Park?```

### Context:
```CREATE TABLE table_name_27 (home_team VARCHAR, venue VARCHAR)```



In [19]:
prompt = template.format(context=sample["context"], question=sample["question"])
input_ids = tokenizer(text=prompt, return_tensors="pt").to(device)
outputs = model.generate(**input_ids, **generation_config)

In [20]:
display(Markdown("#### Completion:"))
display(Markdown(tokenizer.decode(token_ids=outputs[0], skip_special_tokens=True).replace(prompt, "")))
display(Markdown("#### Answer:"))
Markdown(sample["answer"])

#### Completion:

### Output:
```SELECT `home_team`, `venue`, `score` FROM table_name_27 WHERE home_team = 'Vancouver Whitecaps' AND venue = 'Victoria Park';```


#### Answer:

SELECT home_team AS score FROM table_name_27 WHERE venue = "victoria park"

#### 2.3 Creating template function

In [21]:
def formatting_func(example):
    template = "You are a powerful text-to-SQL model. Your job is to answer questions about a database. You are given a question and context regarding one or more tables.\n\n" + \
    "You must output the SQL query that answers the question.\n\n" + \
    "### Input:\n" + \
    "```{question}```\n\n" + \
    "### Context:\n" + \
    "```{context}```\n\n" + \
    "### Response:\n" + \
    "```{answer};```"

    text = template.format(context=example["context"], question=example["question"], answer=example["answer"])
    return text

In [22]:
Markdown(formatting_func(train_data[1]))

You are a powerful text-to-SQL model. Your job is to answer questions about a database. You are given a question and context regarding one or more tables.

You must output the SQL query that answers the question.

### Input:
```Silvio Santos is the presenter in what country?```

### Context:
```CREATE TABLE table_20780285_1 (country_region VARCHAR, presenters VARCHAR)```

### Response:
```SELECT country_region FROM table_20780285_1 WHERE presenters = "Silvio Santos";```

### 3. Parameter Efficient Fine-Tuning (PEFT) - LoRA

In [23]:
print(model)

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaFlashAttention2(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)
          (up_proj): Linear(in_features=4096, out_features=11008, bias=False)
          (down_proj): Linear(in_features=11008, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSNorm()
      )
    )
    (norm): LlamaRMSNorm()
  )
  (lm_head

#### 3.1 Prepare LoRA Fine-Tuning

In [24]:
model.gradient_checkpointing_enable()
if model.config.to_dict()["use_cache"]:
    model.use_cache = False

In [25]:
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

In [26]:
peft_model = get_peft_model(model=model, peft_config=peft_config)

#### 3.2 Check trainable parameters

In [27]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [28]:
print_trainable_parameters(peft_model)

trainable params: 39976960 || all params: 6778392576 || trainable%: 0.589770503135875


### 4. Train the model

In [29]:
args_definition = dict(
    output_dir="/llama7bit-lora-sql",
    overwrite_output_dir=True,
    evaluation_strategy="steps",
    per_device_train_batch_size=8,
    gradient_accumulation_steps=4,
    learning_rate=3e-4,
    max_steps=500,
    lr_scheduler_type="cosine",
    max_grad_norm = 0.3,
    warmup_steps=100,
    logging_steps=20,
    save_steps=20,
    logging_first_step=True,
    seed=1399,
    bf16=True,
    report_to="wandb",
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    load_best_model_at_end=True
)
args = TrainingArguments(**args_definition)

In [30]:
trainer = SFTTrainer(
    model=peft_model,
    args=args,
    train_dataset=train_data,
    eval_dataset=val_data,
    tokenizer=tokenizer,
    peft_config=peft_config,
    formatting_func=formatting_func,
    max_seq_length=1024,
    packing=True,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=1)]
)

Generating train split: 0 examples [00:00, ? examples/s]

Generating train split: 0 examples [00:00, ? examples/s]



In [31]:
trainer.train()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mjj-ovalle[0m. Use [1m`wandb login --relogin`[0m to force relogin


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss,Validation Loss
20,1.2068,0.818085
40,0.6757,0.514753
60,0.5104,0.455208
80,0.4633,0.426902
100,0.442,0.410962
120,0.428,0.399287
140,0.4209,0.398281
160,0.4142,0.393196
180,0.4032,0.388849
200,0.3999,0.384112


TrainOutput(global_step=360, training_loss=0.47063878575960794, metrics={'train_runtime': 3932.4332, 'train_samples_per_second': 4.069, 'train_steps_per_second': 0.127, 'total_flos': 4.7036738195737805e+17, 'train_loss': 0.47063878575960794, 'epoch': 1.05})

#### 4.1 Compare outputs

In [32]:
fine_tuned_model = peft_model.merge_and_unload()

In [33]:
prompt = template.format(context=sample["context"], question=sample["question"])
input_ids = tokenizer(text=prompt, return_tensors="pt").to(device)
outputs = fine_tuned_model.generate(**input_ids, **generation_config)

In [34]:
display(Markdown("#### Completion:"))
display(Markdown(tokenizer.decode(token_ids=outputs[0], skip_special_tokens=True)))
display(Markdown("#### Answer:"))
Markdown(sample["answer"])

#### Completion:

You are a powerful text-to-SQL model. Your job is to answer questions about a database. You are given a question and context regarding one or more tables.

You must output the SQL query that answers the question.

### Input:
```What was the home team's score at Victoria Park?```

### Context:
```CREATE TABLE table_name_27 (home_team VARCHAR, venue VARCHAR)```

### Response:
```SELECT home_team AS score FROM table_name_27 WHERE venue = "victoria park";```

#### Answer:

SELECT home_team AS score FROM table_name_27 WHERE venue = "victoria park"

#### 4.2 Performance on test set

In [35]:
not_tuned_model = AutoModelForCausalLM.from_pretrained(
    pretrained_model_name_or_path=model_name,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    attn_implementation="flash_attention_2",
)

fine_tuned_model.use_cache = True

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [36]:
def generate_responses(example, ft_model, og_model):
    prompt = template.format(context=example["context"], question=example["question"])
    input_ids = tokenizer(text=prompt, return_tensors="pt").to(device)
    ft_outputs = ft_model.generate(**input_ids, **generation_config)
    og_outputs = og_model.generate(**input_ids, **generation_config)

    display(Markdown("#### Prompt:"))
    display(Markdown(prompt))
    display(Markdown("#### Original Completion:"))
    display(Markdown(tokenizer.decode(token_ids=og_outputs[0], skip_special_tokens=True) \
           .replace(prompt, "")))
    display(Markdown("#### Fine-tuned Completion:"))
    display(Markdown(tokenizer.decode(token_ids=ft_outputs[0], skip_special_tokens=True) \
           .replace(prompt, "")))
    display(Markdown("#### Expected Answer:"))
    display(Markdown("`{answer}`".format(answer=example["answer"])))
    display(Markdown("-----------------------------"))

In [37]:
for i in range(5):
    generate_responses(val_data[i], ft_model=fine_tuned_model, og_model=not_tuned_model)

#### Prompt:

You are a powerful text-to-SQL model. Your job is to answer questions about a database. You are given a question and context regarding one or more tables.

You must output the SQL query that answers the question.

### Input:
```Which highest number of Seats has votes of 244,867?```

### Context:
```CREATE TABLE table_name_83 (seats INTEGER, votes VARCHAR)```



#### Original Completion:

### Example Output:
```SELECT * FROM table_name_83 WHERE votes = 244867 ORDER BY seats DESC LIMIT 1```

### Guidelines
1. If no conditions are given, you must assume that all are true.

### Sample
```SELECT * FROM table_name_83 WHERE votes >= 100 ORDER BY votes DESC LIMIT 1```


### Solution
```SELECT * FROM

#### Fine-tuned Completion:

### Response:
```SELECT MAX(seats) FROM table_name_83 WHERE votes = 244 OFFSET 867;```

#### Expected Answer:

`SELECT MAX(seats) FROM table_name_83 WHERE votes = 244 OFFSET 867`

-----------------------------

#### Prompt:

You are a powerful text-to-SQL model. Your job is to answer questions about a database. You are given a question and context regarding one or more tables.

You must output the SQL query that answers the question.

### Input:
```What's the average number of silver medals for germany (GER) having more than 3 bronze?```

### Context:
```CREATE TABLE table_name_44 (silver INTEGER, nation VARCHAR, bronze VARCHAR)```



#### Original Completion:

### Output:
```select nation, avg(silver) from table_name_44 where bronze > 3 group by nation```

### Context:
```CREATE TABLE table_name_22 (silver INTEGER, bronze INTEGER, gold INTEGER)```

### Output:
```select nation, avg(silver) from table_name_22 where gold > 0 group by nation```



#### Fine-tuned Completion:

### Response:
```SELECT AVG(silver) FROM table_name_44 WHERE nation = "germany (ger)" AND bronze > 3;```

#### Expected Answer:

`SELECT AVG(silver) FROM table_name_44 WHERE nation = "germany (ger)" AND bronze > 3`

-----------------------------

#### Prompt:

You are a powerful text-to-SQL model. Your job is to answer questions about a database. You are given a question and context regarding one or more tables.

You must output the SQL query that answers the question.

### Input:
```Who was the home team when there was a record of 20–13–3?```

### Context:
```CREATE TABLE table_name_52 (home VARCHAR, record VARCHAR)```



#### Original Completion:

### Output:
```SELECT * FROM table_name_52 WHERE home='San Francisco 49ers' and record=20```

### Example:
```Which team scored 12 touchdowns in the regular season?```
```Create Table table_name_44(home VARCHAR, touchdowns INT);```
```SELECT * FROM table_name_44 WHERE home='Pittsburgh Steelers' and touchdown

#### Fine-tuned Completion:

### Response:
```SELECT home FROM table_name_52 WHERE record = "20–13–3";```

#### Expected Answer:

`SELECT home FROM table_name_52 WHERE record = "20–13–3"`

-----------------------------

#### Prompt:

You are a powerful text-to-SQL model. Your job is to answer questions about a database. You are given a question and context regarding one or more tables.

You must output the SQL query that answers the question.

### Input:
```Who had the high assist in a game number above 77 for Milwaukee?```

### Context:
```CREATE TABLE table_name_24 (high_assists VARCHAR, game VARCHAR, team VARCHAR)```



#### Original Completion:

### Output:
```SELECT * FROM table_name_24 WHERE game > 77 AND high_assists > 4````

# Instructions

## Data Input Format
The data in the form of a question and context. The context is a string describing a table.

## Data Output Format
The output will be a string of SQL code that represents the query for the question.

## Sample Input

```Who had the high

#### Fine-tuned Completion:

### Response:
```SELECT high_assists FROM table_name_24 WHERE game > 77 AND team = "milwaukee";```

#### Expected Answer:

`SELECT high_assists FROM table_name_24 WHERE game > 77 AND team = "milwaukee"`

-----------------------------

#### Prompt:

You are a powerful text-to-SQL model. Your job is to answer questions about a database. You are given a question and context regarding one or more tables.

You must output the SQL query that answers the question.

### Input:
```Name the total number for 3 public```

### Context:
```CREATE TABLE table_26375386_22 (total VARCHAR, public VARCHAR)```



#### Original Completion:

### Output:
```SELECT (SELECT total + public FROM table_26375386_22) FROM table_26375386_26```

# Constraints:
-   SQL commands are case-insensitive.
-   SQL queries can only be generated from the given context, and the context cannot contain more than 100 rows.

# Source:
[HackerRank](

#### Fine-tuned Completion:

### Response:
```SELECT COUNT(total) FROM table_26375386_22 WHERE public = 3;```

#### Expected Answer:

`SELECT COUNT(total) FROM table_26375386_22 WHERE public = 3`

-----------------------------

### 5. Save model

In [38]:
model_save_name = "llama7b-ft-lora-sql-v2"

In [39]:
# Save model & tokenizer
fine_tuned_model.push_to_hub(model_save_name)
tokenizer.push_to_hub(model_save_name)

  0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/3.59G [00:00<?, ?B/s]

README.md:   0%|          | 0.00/5.18k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/jjovalle99/llama7b-ft-lora-sql-v2/commit/ab7e12d649d0c7eb10667ac9ac6612905d5523ba', commit_message='Upload tokenizer', commit_description='', oid='ab7e12d649d0c7eb10667ac9ac6612905d5523ba', pr_url=None, pr_revision=None, pr_num=None)

In [40]:
# Save adapters
trainer.push_to_hub(model_save_name + "adapters")

  0%|          | 0/1 [00:00<?, ?it/s]

training_args.bin:   0%|          | 0.00/4.92k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/jjovalle99/llama7bit-lora-sql/commit/c505945ae0f709d73a3ab4c434e53fc100379769', commit_message='llama7b-ft-lora-sql-v2adapters', commit_description='', oid='c505945ae0f709d73a3ab4c434e53fc100379769', pr_url=None, pr_revision=None, pr_num=None)