In [None]:
!pip install -q bitsandbytes datasets accelerate loralib
!pip install -q git+https://github.com/huggingface/peft.git git+https://github.com/huggingface/transformers.git

In [1]:
import torch
import torch.nn as nn
import bitsandbytes as bnb
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM
import os


# Load Model

In [19]:
MODEL_ID = 'bigscience/bloom-1b1'
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.float32,
    device_map='auto',
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

In [20]:
print(model)

BloomForCausalLM(
  (transformer): BloomModel(
    (word_embeddings): Embedding(250880, 1536)
    (word_embeddings_layernorm): LayerNorm((1536,), eps=1e-05, elementwise_affine=True)
    (h): ModuleList(
      (0-23): 24 x BloomBlock(
        (input_layernorm): LayerNorm((1536,), eps=1e-05, elementwise_affine=True)
        (self_attention): BloomAttention(
          (query_key_value): Linear(in_features=1536, out_features=4608, bias=True)
          (dense): Linear(in_features=1536, out_features=1536, bias=True)
          (attention_dropout): Dropout(p=0.0, inplace=False)
        )
        (post_attention_layernorm): LayerNorm((1536,), eps=1e-05, elementwise_affine=True)
        (mlp): BloomMLP(
          (dense_h_to_4h): Linear(in_features=1536, out_features=6144, bias=True)
          (gelu_impl): BloomGelu()
          (dense_4h_to_h): Linear(in_features=6144, out_features=1536, bias=True)
        )
      )
    )
    (ln_f): LayerNorm((1536,), eps=1e-05, elementwise_affine=True)
  )
  (

# Freeze Parameters

In [21]:
for params in model.parameters():
  params.requires_grad = False # Freeze all parameter
  if params.ndim == 1:
    params.data = params.data.to(torch.float32) # cast to float32 for stability

# Enables the gradients for the input embeddings.
# This is useful for fine-tuning adapter weights while keeping the model weights fixed.
model.enable_input_require_grads()
# reduce number of stored activations
model.gradient_checkpointing_enable()

In [22]:
#Cast the last layer for the ouput of the LM to be float of fp32
class CastToFloat(nn.Sequential):
    def forward(self,x):
        return super().forward(x).to(torch.float32)
model.lm_head = CastToFloat(model.lm_head)

# Lora Configuration

In [23]:
print(model)

BloomForCausalLM(
  (transformer): BloomModel(
    (word_embeddings): Embedding(250880, 1536)
    (word_embeddings_layernorm): LayerNorm((1536,), eps=1e-05, elementwise_affine=True)
    (h): ModuleList(
      (0-23): 24 x BloomBlock(
        (input_layernorm): LayerNorm((1536,), eps=1e-05, elementwise_affine=True)
        (self_attention): BloomAttention(
          (query_key_value): Linear(in_features=1536, out_features=4608, bias=True)
          (dense): Linear(in_features=1536, out_features=1536, bias=True)
          (attention_dropout): Dropout(p=0.0, inplace=False)
        )
        (post_attention_layernorm): LayerNorm((1536,), eps=1e-05, elementwise_affine=True)
        (mlp): BloomMLP(
          (dense_h_to_4h): Linear(in_features=1536, out_features=6144, bias=True)
          (gelu_impl): BloomGelu()
          (dense_4h_to_h): Linear(in_features=6144, out_features=1536, bias=True)
        )
      )
    )
    (ln_f): LayerNorm((1536,), eps=1e-05, elementwise_affine=True)
  )
  (

In [24]:
from peft import LoraConfig, get_peft_model
target_modules_to_apply = ['query_key_value', # as in the paper
                          'dense_h_to_4h',
                          'dense_4h_to_h']
task = 'CAUSAL_LM'
desired_rank = 8
lora_alpha = desired_rank*2
lora_dropout = 0.01
lora_config = LoraConfig(
    task_type = task,
    r = desired_rank,
    lora_alpha = lora_alpha,
    lora_dropout = lora_dropout,
    target_modules=target_modules_to_apply
)

peft_model = get_peft_model(model,lora_config)
peft_model.print_trainable_parameters()

trainable params: 4,128,768 || all params: 1,069,443,072 || trainable%: 0.38606711363127144


# Load Dataset

In [None]:
# pip install -U datasets

In [8]:
from datasets import load_dataset

dataset = load_dataset("b-mc2/sql-create-context")

Downloading readme:   0%|          | 0.00/4.43k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/21.8M [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

In [9]:
dataset

DatasetDict({
    train: Dataset({
        features: ['question', 'answer', 'context'],
        num_rows: 78577
    })
})

In [10]:
from torch.utils.data import random_split

generator1 = torch.Generator().manual_seed(42)
train,val,test = random_split(dataset['train'], [0.6,0.2,0.2], generator=generator1)

In [11]:
print(f'Sizes:\ntrain-> {len(train)}\nval-> {len(val)}\ntest-> {len(test)}\n')

Sizes:
train-> 47147
val-> 15715
test-> 15715



In [12]:
def slice_from_indices(indices):
  return dataset['train'][indices]

train_set = slice_from_indices(train.indices)
val_set   = slice_from_indices(val.indices)
test_set  = slice_from_indices(test.indices)

In [13]:
train_set.keys()

dict_keys(['question', 'answer', 'context'])

In [14]:
train_set['answer'][0]

'SELECT MIN(points) FROM table_name_36 WHERE position > 5'

In [15]:
import pandas as pd
Train = pd.DataFrame(train_set)
Val   = pd.DataFrame(val_set)
Test = pd.DataFrame(test_set)

In [None]:
Test.columns

In [16]:
# tamplate to adapt
def create_prompt(context, question, answer):
  if len(answer) < 1:
    answer = "Cannot Find Answer"

  prompt_template = f"### CONTEXT\n{context}\n\n### QUESTION\n{question}\n\n### ANSWER\n{answer}</s>"
  return prompt_template

mapped_qa_train = dataset.map(lambda sample: tokenizer(create_prompt(sample['context'], sample['question'], sample['answer'])),
                              )

Map:   0%|          | 0/78577 [00:00<?, ? examples/s]

In [None]:
mapped_qa_train

# Train

In [25]:
from transformers import Trainer,TrainingArguments,DataCollatorForLanguageModeling


trainer = Trainer(
    model = peft_model,
    train_dataset= mapped_qa_train["train"],

    args= TrainingArguments(
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    warmup_steps=100,
    max_steps=100,
    learning_rate=1e-3,
    fp16=True,
    logging_steps=1,
    output_dir='outputs',
    ),
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
model.config.use_cache = False  # silence the warnings. Please re-enable for inference!
trainer.train()

Step,Training Loss
1,3.4868
2,3.4319
3,3.5732
4,3.3464
5,3.4016
6,3.2708
7,3.3984
8,3.0965
9,3.0287
10,3.1111


TrainOutput(global_step=100, training_loss=1.600878701210022, metrics={'train_runtime': 108.7496, 'train_samples_per_second': 14.713, 'train_steps_per_second': 0.92, 'total_flos': 550666206904320.0, 'train_loss': 1.600878701210022, 'epoch': 0.02})

In [26]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

# Push To HF

In [29]:
HUGGING_FACE_USER_NAME = "ahmedelsayed"
dataset = "sql-create-context"
model = 'bloom-1b1'
peft_model.push_to_hub(f"{HUGGING_FACE_USER_NAME}/{model}", use_auth_token=True)

adapter_model.safetensors:   0%|          | 0.00/16.5M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/ahmedelsayed/bloom-1b1/commit/f3d6a628665a4e0959228ebb51c7d1f3943abb5e', commit_message='Upload model', commit_description='', oid='f3d6a628665a4e0959228ebb51c7d1f3943abb5e', pr_url=None, pr_revision=None, pr_num=None)

# Load From HF Repo

In [30]:
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer

peft_model_id = f"{HUGGING_FACE_USER_NAME}/{model}"
config = PeftConfig.from_pretrained(peft_model_id)
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_8bit=False, device_map='auto')
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

# Load the Lora model
qa_model = PeftModel.from_pretrained(model, peft_model_id)

adapter_config.json:   0%|          | 0.00/632 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/16.5M [00:00<?, ?B/s]

In [31]:
from IPython.display import display, Markdown

def make_inference(context, question):
  batch = tokenizer(f"### CONTEXT\n{context}\n\n### QUESTION\n{question}\n\n### ANSWER\n", return_tensors='pt')

  with torch.cuda.amp.autocast():
    output_tokens = qa_model.generate(**batch, max_new_tokens=200)

  display(Markdown((tokenizer.decode(output_tokens[0], skip_special_tokens=True))))



In [33]:
Test

Unnamed: 0,question,answer,context
0,"What is Home Team, when Tie No is 4?",SELECT home_team FROM table_name_49 WHERE tie_...,"CREATE TABLE table_name_49 (home_team VARCHAR,..."
1,What stadium was the game held in when the fin...,SELECT stadium FROM table_name_88 WHERE final_...,"CREATE TABLE table_name_88 (stadium VARCHAR, f..."
2,What was the score when she was a runner-up?,SELECT score FROM table_name_38 WHERE outcome ...,"CREATE TABLE table_name_38 (score VARCHAR, out..."
3,What district featured an election between ja...,SELECT district FROM table_1341884_40 WHERE ca...,CREATE TABLE table_1341884_40 (district VARCHA...
4,Show the names of phones and the districts of ...,"SELECT T3.Name, T2.District FROM phone_market ...","CREATE TABLE market (District VARCHAR, Market_..."
...,...,...,...
15710,"What is the highest value for Byes, when Wins ...",SELECT MAX(byes) FROM table_name_64 WHERE wins...,"CREATE TABLE table_name_64 (byes INTEGER, agai..."
15711,error (see notes),SELECT MIN(weeks_at_number_1) FROM table_25760...,CREATE TABLE table_25760427_2 (weeks_at_number...
15712,Which Percentage has a Draw of 6?,SELECT percentage FROM table_name_9 WHERE draw...,"CREATE TABLE table_name_9 (percentage VARCHAR,..."
15713,What is the Result of Week larger than 3 on 19...,SELECT result FROM table_name_94 WHERE week > ...,"CREATE TABLE table_name_94 (result VARCHAR, we..."


In [36]:
Test.iloc[0]['context']

'CREATE TABLE table_name_49 (home_team VARCHAR, tie_no VARCHAR)'

In [37]:
context = Test.iloc[0]['context']
question = Test.iloc[0]['question']

make_inference(context, question)



### CONTEXT
CREATE TABLE table_name_49 (home_team VARCHAR, tie_no VARCHAR)

### QUESTION
What is Home Team, when Tie No is 4?

### ANSWER
SELECT home_team FROM table_name_49 WHERE tie_no = 4

In [38]:
Test.iloc[0]['answer']

'SELECT home_team FROM table_name_49 WHERE tie_no = "4"'