In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
from peft import get_peft_model, LoraConfig, TaskType
from datasets import load_dataset
import transformers

# # Load model and tokenizer
# model_name = "deepseek-ai/deepseek-llm-1.5b"
# model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
# tokenizer = AutoTokenizer.from_pretrained(model_name)

tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B")
model = AutoModelForCausalLM.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B")



In [2]:
# Load dataset
dataset = load_dataset("json", data_files={"train": "data.jsonl"})


def preprocess_function(examples):
    # Combine instruction and response (adjust the formatting as needed)
    texts = [
        f"Instruction: {instr}\nResponse: {resp}"
        for instr, resp in zip(examples["instruction"], examples["response"])
    ]
    # Tokenize the combined text
    tokenized = tokenizer(
        texts,
        truncation=True,
        padding="max_length",  # or use another strategy as needed
        max_length=512       # adjust maximum length as needed
    )
    # For causal LM training, it's common to set labels to be the same as input_ids
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

# Apply preprocessing to your training split
processed_dataset = dataset["train"].map(preprocess_function, batched=True)
# Optionally remove the original columns
processed_dataset = processed_dataset.remove_columns(["instruction", "response"])


# LoRA configuration
lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM, 
    inference_mode=False,
    r=16, 
    lora_alpha=32, 
    lora_dropout=0.1
)

# Apply LoRA
model = get_peft_model(model, lora_config)

training_args = TrainingArguments(
    output_dir="./fine-tuned-deepseek",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    eval_strategy="no",  # note: use 'eval_strategy' as per the warning
    save_strategy="epoch",
    logging_dir="./logs",
    num_train_epochs=3,
    learning_rate=2e-4,
    fp16=True,
    remove_unused_columns=False  # disable removal of columns not in the model's forward signature
)


# Train model
trainer = transformers.Trainer(
    model=model,
    args=training_args,
    train_dataset=processed_dataset,
)
trainer.train()


Step,Training Loss


TrainOutput(global_step=3, training_loss=0.0, metrics={'train_runtime': 2.8757, 'train_samples_per_second': 2.086, 'train_steps_per_second': 1.043, 'total_flos': 28493906706432.0, 'train_loss': 0.0, 'epoch': 3.0})

In [8]:
from peft import PeftConfig

base_model_id = "deepseek-r1-1.5B-qwen-distill-finetuned"  # your base model identifier

if hasattr(model, "peft_config"):
    # Check if the peft_config is a dictionary (typically mapping adapter names to config objects)
    if isinstance(model.peft_config, dict):
        for adapter_name, adapter_config in model.peft_config.items():
            if isinstance(adapter_config, str):
                # If the adapter config is a string (likely a path), load it as a proper PeftConfig object.
                config_obj = PeftConfig.from_pretrained(adapter_config)
                config_obj.base_model_name_or_path = base_model_id
                model.peft_config[adapter_name] = config_obj
            elif isinstance(adapter_config, dict):
                # If it's a dictionary, update it directly.
                model.peft_config[adapter_name]["base_model_name_or_path"] = base_model_id
            else:
                # Otherwise, assume it's already an object and set the attribute.
                adapter_config.base_model_name_or_path = base_model_id
    elif isinstance(model.peft_config, str):
        # If peft_config is directly a string, load it as a proper config object.
        peft_config = PeftConfig.from_pretrained(model.peft_config)
        peft_config.base_model_name_or_path = base_model_id
        model.peft_config = peft_config
    else:
        # Fallback: assume it's already a config object.
        model.peft_config.base_model_name_or_path = base_model_id
else:
    # If there is no peft_config attribute, update the base model name in model.config
    model.config.base_model_name_or_path = base_model_id

# --- Save the model and tokenizer ---
output_dir = "./deepseek-r1-1.5B-qwen-distill-finetuned"
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)

print(f"Model and tokenizer saved to {output_dir}")


Model and tokenizer saved to ./deepseek-r1-1.5B-qwen-distill-finetuned


In [14]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

# Specify your base model identifier (this should be the original model you started with)
base_model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"

# Load the tokenizer and the full base model (this ensures we have a valid config with a "model_type")
tokenizer = AutoTokenizer.from_pretrained(base_model_id)
base_model = AutoModelForCausalLM.from_pretrained(base_model_id, trust_remote_code=True)

# Path to your saved adapter (fine-tuned) weights/configuration
adapter_path = "./deepseek-r1-1.5B-qwen-distill-finetuned"

# Load the adapter (PEFT) weights on top of the base model
model = PeftModel.from_pretrained(base_model, adapter_path)

# Set the model to evaluation mode
model.eval()


PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): Qwen2ForCausalLM(
      (model): Qwen2Model(
        (embed_tokens): Embedding(151936, 1536)
        (layers): ModuleList(
          (0-27): 28 x Qwen2DecoderLayer(
            (self_attn): Qwen2Attention(
              (q_proj): lora.Linear(
                (base_layer): Linear(in_features=1536, out_features=1536, bias=True)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=1536, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=1536, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): Linear(in_fea

In [15]:

# --- Inference Example ---
prompt = "Instruction: How do I reset my password??\nResponse:"
inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=50)

# Decode and print the generated output
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Instruction: How do I reset my password??
Response: To reset your password, you should follow these steps:

1. Enter your new password in the password field.
2. Click the "Remember Me" button.
3. Log in to your account.

Please note: If you have an account with a
