In [1]:
!pip install bitsandbytes
!pip install accelerate
!pip install --upgrade transformers
!pip install --upgrade peft
!pip install --upgrade datasets

Collecting bitsandbytes
  Downloading bitsandbytes-0.42.0-py3-none-any.whl (105.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m105.0/105.0 MB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: bitsandbytes
Successfully installed bitsandbytes-0.42.0
Collecting accelerate
  Downloading accelerate-0.27.2-py3-none-any.whl (279 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m280.0/280.0 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: accelerate
Successfully installed accelerate-0.27.2
Collecting transformers
  Downloading transformers-4.38.0-py3-none-any.whl (8.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.5/8.5 MB[0m [31m44.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.37.2
    Uninstalling transformers-4.37.2:
      Successfully uninstalled transformers-4

In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

In [3]:
tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T", padding_side="right",)
tokenizer.pad_token = tokenizer.eos_token
bnb_config = BitsAndBytesConfig(
   load_in_8bit=True,
#    bnb_4bit_quant_type="nf4",
#    bnb_4bit_use_double_quant=True,
   bnb_8bit_compute_dtype=torch.bfloat16
)
model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T", device_map="auto", quantization_config=bnb_config)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/776 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/560 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/4.40G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/129 [00:00<?, ?B/s]

In [4]:
txt = """###SYSTEM: Given the context, generate a SQL query for the following question

###context: CREATE TABLE head (age INTEGER)

###question: How many heads of the departments are older than 56 ?"""
tokens = tokenizer(txt, return_tensors="pt")['input_ids'].to("cuda")
op = model.generate(tokens, max_new_tokens=200)
print(tokenizer.decode(op[0]))

<s> ###SYSTEM: Given the context, generate a SQL query for the following question

###context: CREATE TABLE head (age INTEGER)

###question: How many heads of the departments are older than 56 ?

###answer:

SELECT COUNT(*) FROM head WHERE age > 56

###SYSTEM: Given the context, generate a SQL query for the following question

###context: CREATE TABLE head (age INTEGER)

###question: How many heads of the departments are older than 56 ?

###answer:

SELECT COUNT(*) FROM head WHERE age > 56

###SYSTEM: Given the context, generate a SQL query for the following question

###context: CREATE TABLE head (age INTEGER)

###question: How many heads of the departments are older than 56 ?

###answer:

SELECT COUNT(*) FROM head WHERE age > 56

###SYSTEM: Given the context, generate a SQL query for the following question

###context: CREATE TABLE head (age INTE


In [5]:
from peft import get_peft_model, LoraConfig, TaskType, prepare_model_for_kbit_training

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

peft_config = LoraConfig(inference_mode=False, r=8, lora_alpha=32, lora_dropout=0.1, peft_type=TaskType.CAUSAL_LM)
model = get_peft_model(model, peft_config)

print(model.print_trainable_parameters())


trainable params: 1,126,400 || all params: 1,101,174,784 || trainable%: 0.10229075496156657
None


In [6]:
from datasets import load_dataset, Dataset
# Define the dataset for fine-tuning
dataset_id = "b-mc2/sql-create-context"

data = load_dataset(dataset_id, split="train")
data = data.select(range(2000))

Downloading readme:   0%|          | 0.00/4.43k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/21.8M [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

In [7]:
print(data[0].keys())

dict_keys(['question', 'context', 'answer'])


In [8]:
def format_dataset(data_point):
    prompt = f"""###SYSTEM: Given the context, generate an SQL query for the following question

###conext: {data_point['context']}

###question: {data_point['question']}
"""
    tokens = tokenizer(prompt,
        truncation=True,
        max_length=256,
        padding="max_length",)
    tokens["labels"] = tokens['input_ids'].copy()
    return tokens


In [9]:
dataset = data.map(format_dataset)
print(dataset[0].keys())

# Convert the dataframe back to a Dataset object.
#formatted_data = Dataset.from_pandas(df)

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

dict_keys(['question', 'context', 'answer', 'input_ids', 'attention_mask', 'labels'])


In [10]:
print(tokenizer.decode(dataset[0]['input_ids']))

<s> ###SYSTEM: Given the context, generate an SQL query for the following question

###conext: CREATE TABLE head (age INTEGER)

###question: How many heads of the departments are older than 56 ?
</s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s></s>


In [11]:
dataset = dataset.remove_columns(['answer', "context", 'question'])
print(dataset)

Dataset({
    features: ['input_ids', 'attention_mask', 'labels'],
    num_rows: 2000
})


In [12]:
tmp = dataset.train_test_split(test_size=0.1)
train_dataset = tmp["train"]
test_dataset = tmp["test"]
print(train_dataset)
print(test_dataset)

Dataset({
    features: ['input_ids', 'attention_mask', 'labels'],
    num_rows: 1800
})
Dataset({
    features: ['input_ids', 'attention_mask', 'labels'],
    num_rows: 200
})


In [13]:
from transformers import Trainer, TrainingArguments, DataCollatorForLanguageModeling
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)

trainer = Trainer(
                    model = model,
                    train_dataset=train_dataset,
                    eval_dataset = test_dataset,
                    tokenizer = tokenizer,
                    data_collator = data_collator,

                    args = TrainingArguments(
                        output_dir="./training",
                        remove_unused_columns=False,
                        per_device_train_batch_size=2,
                        gradient_checkpointing=True,
                        gradient_accumulation_steps=4,
                        max_steps=200,
                        learning_rate=2.5e-5,
                        logging_steps=5,
                        fp16=True,
                        optim="paged_adamw_8bit",
                        save_strategy="steps",
                        save_steps=50,
                        evaluation_strategy="steps",
                        eval_steps=5,
                        do_eval=True,
                        label_names = ["input_ids", "labels", "attention_mask"],
                        report_to = "none",

                ))

In [14]:
trainer.train()

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss,Validation Loss
5,3.0819,2.983482
10,2.9619,2.936488
15,2.9262,2.862749
20,2.825,2.796146
25,2.8095,2.73316
30,2.7051,2.656501
35,2.6617,2.579534
40,2.6438,2.529444
45,2.5542,2.450968
50,2.4043,2.37352




TrainOutput(global_step=200, training_loss=1.7736280822753907, metrics={'train_runtime': 1185.4176, 'train_samples_per_second': 1.35, 'train_steps_per_second': 0.169, 'total_flos': 2545185875558400.0, 'train_loss': 1.7736280822753907, 'epoch': 0.89})

In [15]:
txt = """###SYSTEM: Given the context, generate a SQL query for the following question

###context: CREATE TABLE head (age INTEGER)

###question: How many heads of the departments are older than 56 ?"""
tokens = tokenizer(txt, return_tensors="pt")['input_ids'].to("cuda")
op = model.generate(tokens, max_new_tokens=200)
print(tokenizer.decode(op[0]))



<s> ###SYSTEM: Given the context, generate a SQL query for the following question

###context: CREATE TABLE head (age INTEGER)

###question: How many heads of the departments are older than 56 ?

###solution: SELECT head.age FROM head WHERE age > 56

###expected_answer: 1

###note: 

###-

###-

###-

###-

###-

###-

###-

###-

###-

###-

###-

###-

###-

###-

###-

###-

###-

###-

###-

###-

###-

###-

###-

###-

###-

###-

###-

###-

###-

###-

###-

###-

###


In [18]:
print(type(tokenizer.decode(op[0])))

<class 'str'>


In [19]:
model.save_pretrained("tinyllama_200_steps", safe_serialization=False, )

In [27]:
import locale
def getpreferredencoding(do_setlocale = True):
    return "UTF-8"
locale.getpreferredencoding = getpreferredencoding

!zip -r tinyllama_200_steps.zip 'tinyllama_200_steps'

  adding: tinyllama_200_steps/ (stored 0%)
  adding: tinyllama_200_steps/README.md (deflated 66%)
  adding: tinyllama_200_steps/adapter_model.bin (deflated 8%)
  adding: tinyllama_200_steps/adapter_config.json (deflated 50%)


In [28]:
from peft import PeftModel
model = PeftModel.from_pretrained(model, "tinyllama_200_steps")