### Step-1: Basic Installations

#### Step-1.1: GPU Information

In [1]:
%%capture
!pip install GPUtil

In [2]:
import torch
import GPUtil
import os

GPUtil.showUtilization()

if torch.cuda.is_available():
    print("GPU is available!")
else:
    print("GPU not available.")

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"

# Change if needed to accomodate memory requirements!
os.environ["CUDA_VISIBLE_DEVICES"] = "0"  # Set to the GPU ID (0 for T4)

!nvidia-smi # Verify notebook is running on a GPU

| ID | GPU | MEM |
------------------
|  0 |  0% |  0% |
|  1 |  0% |  0% |
GPU is available!
Sun Feb  2 23:49:48 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 560.35.03              Driver Version: 560.35.03      CUDA Version: 12.6     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   39C    P8              9W /   70W |       3MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+---

In [3]:
%%capture

!pip install -q datasets
!pip install -q bitsandbytes
!pip install -q -U bitsandbytes

In [4]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [5]:
import transformers
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    GenerationConfig,
    TrainingArguments,
    Trainer,
    BitsAndBytesConfig,
)

from datasets import load_dataset
from peft import LoraConfig, PeftModel, get_peft_model
from peft import prepare_model_for_kbit_training
import bitsandbytes

### Step-2: Loading the Pretrained DeepSeek-R1 Model and Quantization

In [6]:
base_model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

'''
NOTE:
load_in_4bit: Enables loading the model using 4-bit quantization, reducing
memory and computational costs.

bnb_4bit_compute_dtype: Sets the computational data type for the 4-bit quantized
model, controlling precision during inference or training.
'''

model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    quantization_config=bnb_config,
    device_map="auto"
)

tokenizer = AutoTokenizer.from_pretrained(base_model_id)

config.json:   0%|          | 0.00/679 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.55G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/3.06k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

### Step-3: Parameter-Efficient Fine-Tuning (PEFT)

In [7]:
# gradient checkpointing to reduce memory usage for increased compute time
# model.gradient_checkpointing_enable()

# compressing the base model into a smaller, more efficient model
# model = prepare_model_for_kbit_training(model)

#### Step-3.1: Configure Model with Low-Rank Adaptation (LoRA)

##### Let's Find the Name of the Layers to Target for LoRA Training

In [8]:
for name, module in model.named_modules(): # Check the modules within attention
    if 'attn' in name or 'attention' in name:
        print(name)
        for sub_name, sub_module in module.named_modules(): # Check the sub-modules within attention
            print(f"  - {sub_name}")

model.layers.0.self_attn
  - 
  - q_proj
  - k_proj
  - v_proj
  - o_proj
  - rotary_emb
model.layers.0.self_attn.q_proj
  - 
model.layers.0.self_attn.k_proj
  - 
model.layers.0.self_attn.v_proj
  - 
model.layers.0.self_attn.o_proj
  - 
model.layers.0.self_attn.rotary_emb
  - 
model.layers.0.post_attention_layernorm
  - 
model.layers.1.self_attn
  - 
  - q_proj
  - k_proj
  - v_proj
  - o_proj
  - rotary_emb
model.layers.1.self_attn.q_proj
  - 
model.layers.1.self_attn.k_proj
  - 
model.layers.1.self_attn.v_proj
  - 
model.layers.1.self_attn.o_proj
  - 
model.layers.1.self_attn.rotary_emb
  - 
model.layers.1.post_attention_layernorm
  - 
model.layers.2.self_attn
  - 
  - q_proj
  - k_proj
  - v_proj
  - o_proj
  - rotary_emb
model.layers.2.self_attn.q_proj
  - 
model.layers.2.self_attn.k_proj
  - 
model.layers.2.self_attn.v_proj
  - 
model.layers.2.self_attn.o_proj
  - 
model.layers.2.self_attn.rotary_emb
  - 
model.layers.2.post_attention_layernorm
  - 
model.layers.3.self_attn
  - 
 

In [9]:
def print_parameters(model):
    trainable_params, all_params = 0, 0
    for _, param in model.named_parameters():
        all_params += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()

    print(f"All parameters introduced by Non-LoRA: {all_params:,}")
    print(f"Parameters introduced by LoRA: {trainable_params:,}")
    print(f"Reduction factor: {round(trainable_params / all_params, 5) * 100}")

In [10]:
config = LoraConfig(
    r=8,
    lora_alpha=64,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        # "gate_proj",
        # "down_proj",
        # "up_proj",
        # "o_proj"
    ],
    bias="none",
    lora_dropout=0.05,
    task_type="CAUSAL_LM",
)


model = get_peft_model(model, config)

### Step-4: Loading Dataset for Training

In [11]:
# Load the Spider dataset
data = load_dataset("xlangai/spider")
print("First Data", data["train"][0])

README.md:   0%|          | 0.00/5.51k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/831k [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/126k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/7000 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1034 [00:00<?, ? examples/s]

First Data {'db_id': 'department_management', 'query': 'SELECT count(*) FROM head WHERE age  >  56', 'question': 'How many heads of the departments are older than 56 ?', 'query_toks': ['SELECT', 'count', '(', '*', ')', 'FROM', 'head', 'WHERE', 'age', '>', '56'], 'query_toks_no_value': ['select', 'count', '(', '*', ')', 'from', 'head', 'where', 'age', '>', 'value'], 'question_toks': ['How', 'many', 'heads', 'of', 'the', 'departments', 'are', 'older', 'than', '56', '?']}


#### Step-4.1: Lets Define a Function to Tokenize the Input.
Let's tokenize the 'question' and 'query' columns for training.

In [12]:
import sqlparse


# Formatting function to preprocess the data
def formatting_func(samples):
    questions_with_preamble = [
        f"{question} SQL:" for question in samples["question"]
    ]

    sql_queries = []
    for query in samples["query"]:
        sql_query = sqlparse.format(
            query, reindent=True, keyword_case='upper'
        )
        sql_queries.append(sql_query)

    formatted_queries = [
        f"```sql\n{query}\n```" for query in sql_queries
    ]

    return {
        "questions": questions_with_preamble,
        "queries": formatted_queries
    }


# Tokenization function
def tokenize_function(samples):
    max_length = 1024  # Set a reasonable max_length based on your data

    inputs = tokenizer(
        samples["questions"],
        truncation=True,
        padding="max_length",
        max_length=max_length,
        return_tensors="pt"
    )

    outputs = tokenizer(
        samples["queries"],
        truncation=True,
        padding="max_length",
        max_length=max_length,
        return_tensors="pt"
    )

    return {
        "input_ids": inputs["input_ids"],
        "labels": outputs["input_ids"]
    }

In [13]:
# Apply the formatting function to the dataset
data = data.map(formatting_func, batched=True)

# Apply the tokenization function to the formatted data
data = data.map(tokenize_function, batched=True)

Map:   0%|          | 0/7000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1034 [00:00<?, ? examples/s]

Map:   0%|          | 0/7000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1034 [00:00<?, ? examples/s]

### Step-5: Training the Model

In [14]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [16]:
train_epochs_val = 5
'''
train_epochs_val is the times the model will iterate over the entire training
dataset. Increasing the value may allow the model to learn more from the data,
but be cautious of overfitting.
'''

learning_rate_val = 2e-5


trainer = Trainer(
    model=model,
    train_dataset=data["train"],
    eval_dataset=data["validation"],
    args=TrainingArguments(
        output_dir="./finetunedModel",       # directory where checkpoints are saved
        per_device_train_batch_size=1,       # number of samples processed in one forward/backward pass per GPU
        per_device_eval_batch_size=1,
        gradient_accumulation_steps=4,       # [default = 1] number of updates steps to accumulate the gradients for
        num_train_epochs=train_epochs_val,   # [IMPORTANT] number of times of complete pass through the entire training dataset
        learning_rate=learning_rate_val,
        weight_decay=0.01,
        fp16=True,
        logging_steps = 10,                   # specify frequency of printing training loss data
        save_steps=500,                       # save checkpoint after number of iterations
        evaluation_strategy="epoch",
        push_to_hub=True,
        hub_model_id="abdussahid26/DeepSeek-R1-Distill-Qwen-1.5B-xlangai-spider-text-2-SQL-5-train-epochs",
        report_to="none",
        optim="paged_adamw_8bit",            # use paging to improve memory management of default adamw optimizer
        logging_dir="./logs",                # directory to save training log outputs
    ),

)



In [17]:
trainer.train()

Epoch,Training Loss,Validation Loss
1,1.235,0.298801
2,1.1652,0.293118
3,1.1291,0.291187
4,0.9951,0.290897
5,0.8963,0.290053


TrainOutput(global_step=8750, training_loss=1.3336515538351876, metrics={'train_runtime': 28522.2713, 'train_samples_per_second': 1.227, 'train_steps_per_second': 0.307, 'total_flos': 3.3228093652992e+17, 'train_loss': 1.3336515538351876, 'epoch': 5.0})

### Step-6: Inference

In [None]:
text = (
    "Display a list of all instructors, showing each instructor's ID and the number of sections taught." 
    "Make sure to show the number of sections as 0 for instructors who have not taught any section."
    "Your query should use an outer join, and should not use subqueries."
)

device = "cuda"
inputs = tokenizer(
    text,
    return_tensors="pt"
).to(device)

outputs = model.generate(
    **inputs,
    max_length=300,
    temperature=0.2,  # Low temperature for deterministic output
    top_k=50,  # Limits the randomness
)

print(tokenizer.decode(outputs[0], skip_special_tokens=True))

### Step-7: Saving and Pushing the fine-tuned Model to Huggingface Hub

In [19]:
trainer.save_model()
trainer.push_to_hub()

No files have been modified since last commit. Skipping to prevent empty commit.


CommitInfo(commit_url='https://huggingface.co/abdussahid26/DeepSeek-R1-Distill-Qwen-1.5B-xlangai-spider-text-2-SQL-5-train-epochs/commit/be918de515ef76f95f242a080c461b284c10ff32', commit_message='End of training', commit_description='', oid='be918de515ef76f95f242a080c461b284c10ff32', pr_url=None, repo_url=RepoUrl('https://huggingface.co/abdussahid26/DeepSeek-R1-Distill-Qwen-1.5B-xlangai-spider-text-2-SQL-5-train-epochs', endpoint='https://huggingface.co', repo_type='model', repo_id='abdussahid26/DeepSeek-R1-Distill-Qwen-1.5B-xlangai-spider-text-2-SQL-5-train-epochs'), pr_revision=None, pr_num=None)