In [1]:
!nvidia-smi

Tue Mar  5 21:09:35 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 545.23.08              Driver Version: 545.23.08    CUDA Version: 12.3     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A100 80GB PCIe          On  | 00000000:00:0A.0 Off |                    0 |
| N/A   70C    P0              67W / 300W |      4MiB / 81920MiB |      0%      Default |
|                                         |                      |             Disabled |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [2]:
%%capture
!pip install transformers datasets accelerate peft huggingface_hub hf_transfer flash-attn trl wandb -qU

In [3]:
import os
os.environ["HF_TOKEN"] = "<hftoken>"
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
os.environ["WANDB_API_KEY"] = "<w&bapikey>"
os.environ["WANDB_PROJECT"] = "7bsqlmaster"
os.environ["WANDB_NAME"] = "Gemma-Finetune"

In [4]:
import torch
from IPython.display import Markdown
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig, EarlyStoppingCallback
from peft import LoraConfig, get_peft_model
from datasets import load_dataset, DatasetDict 
from transformers import TrainingArguments
from trl import SFTTrainer

In [5]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Is Bfloat16 avaiable?: {torch.cuda.is_bf16_supported()}")

Is Bfloat16 avaiable?: True


### 1. Load model and tokenizer

In [6]:
model_name = "google/gemma-7b"

#### 1.1 Load model

In [7]:
model = AutoModelForCausalLM.from_pretrained(
    pretrained_model_name_or_path=model_name,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    attn_implementation="flash_attention_2"
)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

#### 1.2 Load tokenizer

In [8]:
tokenizer = AutoTokenizer.from_pretrained(
    pretrained_model_name_or_path=model_name,
    padding_side="left"
)

In [9]:
print(f"Vocabulary size of Gemma7B: {len(tokenizer.get_vocab()):,}")

Vocabulary size of Gemma7B: 256,000


In [10]:
tokenizer.special_tokens_map

{'bos_token': '<bos>',
 'eos_token': '<eos>',
 'unk_token': '<unk>',
 'pad_token': '<pad>'}

#### 1.3 Inferece test

In [11]:
generation_config = {
    "max_new_tokens": 100,
    "do_sample": True,
    "temperature": 1,
    "top_k": 100,
    "top_p":0.90,
}

In [12]:
input_text = "Write me a poem about Machine Learning."
input_ids = tokenizer(text=input_text, return_tensors="pt").to(device)
outputs = model.generate(**input_ids, **generation_config)
Markdown(tokenizer.decode(token_ids=outputs[0], skip_special_tokens=True))

Write me a poem about Machine Learning. Make it rhyme. You've got an hour.

I'm 39, which I think is still pretty young. I am a writer, and I'm trying to learn how to write computer programs. I'm a software developer for a startup called Tradestream. The company I started 8 years ago, which was in the social media data analytics space, was acquired by Tradestream in August. I still work for Tradestream, and I'm now

### 2. Train data

#### 2.1. Load data

In [13]:
dataset = load_dataset("b-mc2/sql-create-context", split="train")

#### 2.2 Split into test and val

In [14]:
train_test_split = dataset.train_test_split(test_size=100, seed=1399, shuffle=True)
train_data = train_test_split["train"].shuffle()
val_data = train_test_split["test"].shuffle()
print(len(train_data), len(val_data))

78477 100


In [15]:
torch.manual_seed(42)
sample = train_data[torch.randint(low=0, high=len(train_data), size=(1,)).item()]

#### 2.2 Testing baseline inference

In [16]:
template = "You are a powerful text-to-SQL model. Your job is to answer questions about a database. You are given a question and context regarding one or more tables.\n\n" + \
"You must output the SQL query that answers the question.\n\n" + \
"### Input:\n" + \
"```{question}```\n\n" + \
"### Context:\n" + \
"```{context}```\n\n"
# "### Response:\n" + \
# "```{response}```"

In [17]:
Markdown(template.format(question=sample["question"], context=sample["context"]))

You are a powerful text-to-SQL model. Your job is to answer questions about a database. You are given a question and context regarding one or more tables.

You must output the SQL query that answers the question.

### Input:
```what is the date when the away team is bolton wanderers?```

### Context:
```CREATE TABLE table_name_66 (date VARCHAR, away_team VARCHAR)```



In [18]:
prompt = template.format(context=sample["context"], question=sample["question"])
input_ids = tokenizer(text=prompt, return_tensors="pt").to(device)
outputs = model.generate(**input_ids, **generation_config)

In [19]:
display(Markdown("#### Completion:"))
display(Markdown(tokenizer.decode(token_ids=outputs[0], skip_special_tokens=True).replace(prompt, "")))
display(Markdown("#### Answer:"))
Markdown(sample["answer"])

#### Completion:

### Output:
```SELECT date FROM table_name_66 WHERE away_team = 'bolton wanderers'```

### Input:
```what is the color of the stone with the id 23?```

### Context:
```CREATE TABLE table_name_30 (id INTEGER, stone VARCHAR)```

### Output:
```SELECT stone FROM table_name_30 WHERE id = 23```

### Input:
```is the name

#### Answer:

SELECT date FROM table_name_66 WHERE away_team = "bolton wanderers"

#### 2.3 Creating template function

In [20]:
def formatting_func(example):
    template = "You are a powerful text-to-SQL model. Your job is to answer questions about a database. You are given a question and context regarding one or more tables.\n\n" + \
    "You must output the SQL query that answers the question.\n\n" + \
    "### Input:\n" + \
    "```{question}```\n\n" + \
    "### Context:\n" + \
    "```{context}```\n\n" + \
    "### Response:\n" + \
    "```{answer};```"

    text = template.format(context=example["context"], question=example["question"], answer=example["answer"])
    return text

In [21]:
Markdown(formatting_func(train_data[1]))

You are a powerful text-to-SQL model. Your job is to answer questions about a database. You are given a question and context regarding one or more tables.

You must output the SQL query that answers the question.

### Input:
```Tell me the affiliation for mls team of metrostars and pick number of 26```

### Context:
```CREATE TABLE table_name_28 (affiliation VARCHAR, mls_team VARCHAR, pick__number VARCHAR)```

### Response:
```SELECT affiliation FROM table_name_28 WHERE mls_team = "metrostars" AND pick__number = 26;```

### 3. Parameter Efficient Fine-Tuning (PEFT) - LoRA

In [22]:
print(model)

GemmaForCausalLM(
  (model): GemmaModel(
    (embed_tokens): Embedding(256000, 3072, padding_idx=0)
    (layers): ModuleList(
      (0-27): 28 x GemmaDecoderLayer(
        (self_attn): GemmaFlashAttention2(
          (q_proj): Linear(in_features=3072, out_features=4096, bias=False)
          (k_proj): Linear(in_features=3072, out_features=4096, bias=False)
          (v_proj): Linear(in_features=3072, out_features=4096, bias=False)
          (o_proj): Linear(in_features=4096, out_features=3072, bias=False)
          (rotary_emb): GemmaRotaryEmbedding()
        )
        (mlp): GemmaMLP(
          (gate_proj): Linear(in_features=3072, out_features=24576, bias=False)
          (up_proj): Linear(in_features=3072, out_features=24576, bias=False)
          (down_proj): Linear(in_features=24576, out_features=3072, bias=False)
          (act_fn): GELUActivation()
        )
        (input_layernorm): GemmaRMSNorm()
        (post_attention_layernorm): GemmaRMSNorm()
      )
    )
    (norm): Gem

#### 3.1 Prepare LoRA Fine-Tuning

In [23]:
model.gradient_checkpointing_enable()
if model.config.to_dict()["use_cache"]:
    model.use_cache = False

In [24]:
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

In [25]:
peft_model = get_peft_model(model=model, peft_config=peft_config)

#### 3.2 Check trainable parameters

In [26]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [27]:
print_trainable_parameters(peft_model)

trainable params: 50003968 || all params: 8587684864 || trainable%: 0.5822753022717346


### 4. Train the model

In [28]:
args_definition = dict(
    output_dir="/gemma7bit-lora-sql",
    overwrite_output_dir=True,
    evaluation_strategy="steps",
    per_device_train_batch_size=8,
    gradient_accumulation_steps=4,
    learning_rate=3e-4,
    max_steps=500,
    lr_scheduler_type="cosine",
    max_grad_norm = 0.3,
    warmup_steps=100,
    logging_steps=20,
    save_steps=20,
    logging_first_step=True,
    seed=1399,
    bf16=True,
    report_to="wandb",
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    load_best_model_at_end=True
)
args = TrainingArguments(**args_definition)

In [29]:
trainer = SFTTrainer(
    model=peft_model,
    args=args,
    train_dataset=train_data,
    eval_dataset=val_data,
    tokenizer=tokenizer,
    peft_config=peft_config,
    formatting_func=formatting_func,
    max_seq_length=1024,
    packing=True,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
)

Generating train split: 0 examples [00:00, ? examples/s]

Generating train split: 0 examples [00:00, ? examples/s]



In [30]:
trainer.train()

[34m[1mwandb[0m: Currently logged in as: [33mjj-ovalle[0m. Use [1m`wandb login --relogin`[0m to force relogin


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss,Validation Loss
20,16.1657,13.648507
40,7.8281,0.780848
60,0.6243,0.526963
80,0.5179,0.485938
100,0.4908,0.475376
120,0.4752,0.459994
140,0.4877,0.458405
160,0.4626,0.45597
180,0.4569,0.442755
200,0.4504,0.435354


TrainOutput(global_step=480, training_loss=1.4095023949941, metrics={'train_runtime': 6056.8142, 'train_samples_per_second': 2.642, 'train_steps_per_second': 0.083, 'total_flos': 7.360747943881605e+17, 'train_loss': 1.4095023949941, 'epoch': 1.53})

#### 4.1 Compare outputs

In [31]:
fine_tuned_model = peft_model.merge_and_unload()

In [33]:
prompt = template.format(context=sample["context"], question=sample["question"])
input_ids = tokenizer(text=prompt, return_tensors="pt").to(device)
outputs = fine_tuned_model.generate(**input_ids, **generation_config)

In [34]:
display(Markdown("#### Completion:"))
display(Markdown(tokenizer.decode(token_ids=outputs[0], skip_special_tokens=True)))
display(Markdown("#### Answer:"))
Markdown(sample["answer"])

#### Completion:

You are a powerful text-to-SQL model. Your job is to answer questions about a database. You are given a question and context regarding one or more tables.

You must output the SQL query that answers the question.

### Input:
```what is the date when the away team is bolton wanderers?```

### Context:
```CREATE TABLE table_name_66 (date VARCHAR, away_team VARCHAR)```

### Response:
```SELECT date FROM table_name_66 WHERE away_team = "bolton wanderers";```

#### Answer:

SELECT date FROM table_name_66 WHERE away_team = "bolton wanderers"

#### 4.2 Performance on test set

In [35]:
not_tuned_model = AutoModelForCausalLM.from_pretrained(
    pretrained_model_name_or_path=model_name,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    attn_implementation="flash_attention_2"
)

fine_tuned_model.use_cache = True

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [36]:
def generate_responses(example, ft_model, og_model):
    prompt = template.format(context=example["context"], question=example["question"])
    input_ids = tokenizer(text=prompt, return_tensors="pt").to(device)
    ft_outputs = ft_model.generate(**input_ids, **generation_config)
    og_outputs = og_model.generate(**input_ids, **generation_config)

    display(Markdown("#### Prompt:"))
    display(Markdown(prompt))
    display(Markdown("#### Original Completion:"))
    display(Markdown(tokenizer.decode(token_ids=og_outputs[0], skip_special_tokens=True) \
           .replace(prompt, "")))
    display(Markdown("#### Fine-tuned Completion:"))
    display(Markdown(tokenizer.decode(token_ids=ft_outputs[0], skip_special_tokens=True) \
           .replace(prompt, "")))
    display(Markdown("#### Expected Answer:"))
    display(Markdown("`{answer}`".format(answer=example["answer"])))
    display(Markdown("-----------------------------"))

In [38]:
for i in range(5):
    generate_responses(val_data[i], ft_model=fine_tuned_model, og_model=not_tuned_model)

#### Prompt:

You are a powerful text-to-SQL model. Your job is to answer questions about a database. You are given a question and context regarding one or more tables.

You must output the SQL query that answers the question.

### Input:
```How many Games has a Team of cai zaragoza and a Rank smaller than 1?```

### Context:
```CREATE TABLE table_name_24 (games INTEGER, team VARCHAR, rank VARCHAR)```



#### Original Completion:

```INSERT INTO table_name_24 VALUES (1, 'caisa', '6')```

```INSERT INTO table_name_24 VALUES (2, 'caisa', '5')```

```INSERT INTO table_name_24 VALUES (3, 'caisa', '4')```

```INSERT INTO table_name_24 VALUES (4, 'cai exorbitantly', '2')```

```INSERT INTO table_name_24 VALUES (

#### Fine-tuned Completion:

### Response:
```SELECT SUM(games) FROM table_name_24 WHERE team = "cai zaragoza" AND rank < 1;```

#### Expected Answer:

`SELECT SUM(games) FROM table_name_24 WHERE team = "cai zaragoza" AND rank < 1`

-----------------------------

#### Prompt:

You are a powerful text-to-SQL model. Your job is to answer questions about a database. You are given a question and context regarding one or more tables.

You must output the SQL query that answers the question.

### Input:
```What is the total interviews of Iowa, and with an evening gown smaller than 9.625?```

### Context:
```CREATE TABLE table_name_23 (interview VARCHAR, state VARCHAR, evening_gown VARCHAR)```



#### Original Completion:

```INSERT INTO table_name_23(interview, state, evening_gown) VALUES ('1', 'New York', '10.35')```

```INSERT INTO table_name_23(interview, state, evening_gown) VALUES ('2', 'California', '11.17')```

```INSERT INTO table_name_23(interview, state, evening_gown) VALUES ('3', 'New York', '11.6

#### Fine-tuned Completion:

### Response:
```SELECT COUNT(interview) FROM table_name_23 WHERE state = "iowa" AND evening_gown < 9.625;```

#### Expected Answer:

`SELECT COUNT(interview) FROM table_name_23 WHERE state = "iowa" AND evening_gown < 9.625`

-----------------------------

#### Prompt:

You are a powerful text-to-SQL model. Your job is to answer questions about a database. You are given a question and context regarding one or more tables.

You must output the SQL query that answers the question.

### Input:
```Can you tell me the High assists that has the Date of november 25?```

### Context:
```CREATE TABLE table_name_44 (high_assists VARCHAR, date VARCHAR)```



#### Original Completion:

```INSERT INTO table_name_44 (high_assists, date) VALUES ('Luka Doncic', 'November 25, 2020')```

```INSERT INTO table_name_44 (high_assists, date) VALUES ('Luka Doncic', 'November 25, 2020')```

### Output:
```SELECT high_assists FROM table_name_44 WHERE date = 'November 25,

#### Fine-tuned Completion:

### Response:
```SELECT high_assists FROM table_name_44 WHERE date = "november 25";```

#### Expected Answer:

`SELECT high_assists FROM table_name_44 WHERE date = "november 25"`

-----------------------------

#### Prompt:

You are a powerful text-to-SQL model. Your job is to answer questions about a database. You are given a question and context regarding one or more tables.

You must output the SQL query that answers the question.

### Input:
```What is the current venue for the Miami Masters tournament?```

### Context:
```CREATE TABLE table_14903081_1 (current_venue VARCHAR, tournament VARCHAR)```



#### Original Completion:

### Training examples:
```WHERE tournament = "Miami Masters" AND current_venue IS NOT NULL
SELECT current_venue FROM table_14903081_1 WHERE tournament = "Miami Masters" AND current_venue IS NOT NULL
```

- The SQL query will be given with syntax-highlighting in between ``````.
- Text-to-SQL queries typically require an additional sentence or two to provide context. You can predict this context too, and we

#### Fine-tuned Completion:

### Response:
```SELECT current_venue FROM table_14903081_1 WHERE tournament = "Miami Masters";```

#### Expected Answer:

`SELECT current_venue FROM table_14903081_1 WHERE tournament = "Miami Masters"`

-----------------------------

#### Prompt:

You are a powerful text-to-SQL model. Your job is to answer questions about a database. You are given a question and context regarding one or more tables.

You must output the SQL query that answers the question.

### Input:
```Which highest number of Seats has votes of 244,867?```

### Context:
```CREATE TABLE table_name_83 (seats INTEGER, votes VARCHAR)```



#### Original Completion:

```INSERT INTO table_name_83 (seats, votes)
VALUES
(2300, '224,568'),
(3950, '244,867'),
(3800, '185,679')```

### Output:
```SELECT seats FROM table_name_83
WHERE votes = '244,867'
ORDER BY seats DESC
LIMIT

#### Fine-tuned Completion:

### Response:
```SELECT MAX(seats) FROM table_name_83 WHERE votes = 244 OFFSET 867;```

#### Expected Answer:

`SELECT MAX(seats) FROM table_name_83 WHERE votes = 244 OFFSET 867`

-----------------------------

### 5. Save model

In [39]:
model_save_name = "gemma7b-ft-lora-sql-v2"

In [40]:
# Save model & tokenizer
fine_tuned_model.push_to_hub(model_save_name)
tokenizer.push_to_hub(model_save_name)

  0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/2.11G [00:00<?, ?B/s]

README.md:   0%|          | 0.00/5.18k [00:00<?, ?B/s]

  0%|          | 0/1 [00:00<?, ?it/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/jjovalle99/gemma7b-ft-lora-sql-v2/commit/8e47d7355bda7b2ede0ce7e5569add2395d31781', commit_message='Upload tokenizer', commit_description='', oid='8e47d7355bda7b2ede0ce7e5569add2395d31781', pr_url=None, pr_revision=None, pr_num=None)

In [41]:
# Save adapters
trainer.push_to_hub(model_save_name + "adapters")

  0%|          | 0/1 [00:00<?, ?it/s]

training_args.bin:   0%|          | 0.00/4.92k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/jjovalle99/gemma7bit-lora-sql/commit/ede022bae427fbea1269b9c4748e43ce6b7da034', commit_message='gemma7b-ft-lora-sql-v2adapters', commit_description='', oid='ede022bae427fbea1269b9c4748e43ce6b7da034', pr_url=None, pr_revision=None, pr_num=None)