<a href="https://colab.research.google.com/github/Zimal-Alam/Zimal-Alam/blob/main/deepseek_demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install datasets -q
!pip install bitsandbytes -q
!pip install -U bitsandbytes -q

In [2]:
!pip install -U bitsandbytes



In [3]:
!pip install evaluate



In [4]:
import torch
from transformers import (
    AutoTokenizer, AutoModelForCausalLM, GenerationConfig, TrainingArguments, Trainer, BitsAndBytesConfig
)
from datasets import load_dataset, DatasetDict
from peft import LoraConfig, get_peft_model
from torch.utils.data import DataLoader
import evaluate
import os

In [5]:
# Load tokenizer and model
model_name = "deepseek-ai/deepseek-llm-7b-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# 4-bit quantization for efficiency
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto"
)



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


pytorch_model.bin.index.json:   0%|          | 0.00/22.5k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.97G [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.6k [00:00<?, ?B/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/3.85G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/121 [00:00<?, ?B/s]

In [6]:
# LoRA configuration for efficient fine-tuning
lora_config = LoraConfig(
    r=16,  # Increased LoRA rank for better adaptation
    lora_alpha=32,
    target_modules=["q_proj", "k_proj", "v_proj"],
    lora_dropout=0.1,  # Slightly increased dropout to prevent overfitting
    bias="none"
)


model = get_peft_model(model, lora_config)

In [7]:
# Set generation configuration
model.generation_config = GenerationConfig.from_pretrained(model_name)
model.generation_config.pad_token_id = model.generation_config.eos_token_id

In [11]:
# Load Spider dataset
spider_dataset = load_dataset("spider")

# Preprocessing function with dynamic padding
def preprocess_function(examples):
    inputs = [f"Question: {q} SQL: " for q in examples["question"]]
    targets = examples["query"]

    model_inputs = tokenizer(inputs, padding="max_length", truncation=True, max_length=512)
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(targets, padding="max_length", truncation=True, max_length=512)

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

# Apply preprocessing
tokenized_spider = spider_dataset.map(preprocess_function, batched=True)

Map:   0%|          | 0/7000 [00:00<?, ? examples/s]



Map:   0%|          | 0/1034 [00:00<?, ? examples/s]

In [None]:
# Training arguments with higher epochs and gradient accumulation
training_args = TrainingArguments(
    output_dir="./saved_model",
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=8,
    learning_rate=3e-5,
    num_train_epochs=5,  # Increased epochs for better accuracy
    weight_decay=0.01,
    fp16=True,
    logging_steps=50,
    save_steps=1000,
    evaluation_strategy="steps",
    eval_steps=1000,
    save_total_limit=2,
    load_best_model_at_end=True,
    report_to="none"
)

# Define Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_spider["train"],
    eval_dataset=tokenized_spider["validation"],
)

# Train the model
trainer.train()

# Save trained model locally
trainer.save_model("./saved_model")



Step,Training Loss,Validation Loss


In [None]:
# Evaluation function using exact match accuracy
def compute_exact_match(predictions, references):
    metric = evaluate.load("accuracy")
    return metric.compute(predictions=predictions, references=references)

# Testing the model on Spider
model.eval()
correct, total = 0, 0

for sample in tokenized_spider["validation"]:
    input_text = f"Question: {sample['question']} SQL: "
    inputs = tokenizer(input_text, return_tensors="pt").to(model.device)

    # Using beam search instead of greedy decoding
    outputs = model.generate(**inputs, max_new_tokens=100, num_beams=5, early_stopping=True)
    predicted_sql = tokenizer.decode(outputs[0], skip_special_tokens=True)

    if predicted_sql.strip() == sample["query"].strip():
        correct += 1
    total += 1

accuracy = correct / total * 100
print(f"Final Accuracy on Spider: {accuracy:.2f}%")

# Ensure accuracy is above 80%
assert accuracy >= 80, "Accuracy did not meet the required threshold!"

#Initial code




In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig

model_name = "deepseek-ai/deepseek-llm-7b-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
model.generation_config = GenerationConfig.from_pretrained(model_name)
model.generation_config.pad_token_id = model.generation_config.eos_token_id

text = "An attention function can be described as mapping a query and a set of key-value pairs to an output, where the query, keys, values, and output are all vectors. The output is"
inputs = tokenizer(text, return_tensors="pt")
outputs = model.generate(**inputs.to(model.device), max_new_tokens=100)

result = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(result)


In [None]:
text = "An attention function can be described as mapping a query and a set of key-value pairs to an output, where the query, keys, values, and output are all vectors. The output is"
inputs = tokenizer(text, return_tensors="pt")
outputs = model.generate(**inputs.to(model.device), max_new_tokens=100)

result = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(result)


An attention function can be described as mapping a query and a set of key-value pairs to an output, where the query, keys, values, and output are all vectors. The output is a vector of real numbers, and the attention function is a function that maps the query and key-value pairs to the output.

The attention function is a key component of the attention mechanism, which is a type of neural network architecture that allows the network to focus on specific parts of the input data. The attention mechanism is used in various applications, such as machine translation, image captioning, and question answering.

The attention function is typically implemented using a neural network architecture, where the


In [None]:
!pip install datasets -q
!pip install bitsandbytes -q
!pip install -U bitsandbytes -q

In [None]:
!pip install colab-env --quiet

import warnings
warnings.filterwarnings("ignore", message="You seem to be using the pipelines sequentially on GPU")

import colab_env
import os

access_token_write = os.getenv("HUGGINGFACE_ACCESS_TOKEN_WRITE")

from huggingface_hub import login

login(
  token=access_token_write,
  add_to_git_credential=True
)

In [None]:
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    GenerationConfig,
    TrainingArguments,
    Trainer,
    BitsAndBytesConfig,
)
from datasets import load_dataset
import bitsandbytes
from peft import LoraConfig, PeftModel  # Import LoraConfig from peft

# Load the tokenizer and model with 4-bit quantization
model_name = "deepseek-ai/deepseek-llm-7b-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto"
)

# Define LoRA configuration
lora_config = LoraConfig(
    r=8,  # Rank of the LoRA update matrices
    lora_alpha=32,  # Scaling factor for the LoRA updates
    target_modules=["q_proj", "k_proj", "v_proj"],  # Specific target modules for DeepSeek LLM
    lora_dropout=0.05,  # Dropout probability for the LoRA layers
    bias="none",  # No bias for the LoRA layers
)


# Apply LoRA
# The model is already quantized during loading using bnb_config, and compute dtype is set to bfloat16.)
from peft import get_peft_model # Import get_peft_model
model = get_peft_model(model, lora_config) # Use get_peft_model to add LoRA to the model

# Configure the model for training
model.generation_config = GenerationConfig.from_pretrained(model_name)
model.generation_config.pad_token_id = model.generation_config.eos_token_id


# Load the Spider dataset
spider_dataset = load_dataset("spider")

# Preprocessing function
def preprocess_function(examples):
    inputs = [f"Question: {q} SQL: " for q in examples["question"]]
    targets = examples["query"]
    model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding="max_length")
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(targets, max_length=512, truncation=True, padding="max_length")
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

# Apply preprocessing
tokenized_spider = spider_dataset.map(preprocess_function, batched=True)

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=4,
    learning_rate=2e-5,
    num_train_epochs=1,
    weight_decay=0.01,
    fp16=True,
    logging_steps=10,
    save_steps=500,
    evaluation_strategy="epoch",
    push_to_hub=True,
    hub_model_id="frankmorales2020/deepseek-llm-7b-base-spider",
    report_to="none"
)

# Define the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_spider["train"],
    eval_dataset=tokenized_spider["validation"],
)

# Fine-tune the model
trainer.train()

# Save and push the fine-tuned model to Hugging Face Hub
trainer.save_model()
trainer.push_to_hub()