In [3]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

model_name = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
max_seq_length = 2048 
dtype = None 
load_in_4bit = True

tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_4bit = load_in_4bit
    ).to("cuda")

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
`low_cpu_mem_usage` was None, now default to True since model is quantized.
Loading checkpoint shards: 100%|██████████| 2/2 [00:33<00:00, 16.91s/it]


In [4]:
!wandb login

wandb: Currently logged in as: awsed-aq (awsed-aq-lut-university) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin


In [5]:
import wandb
wandb.init(
    project="ai-surveyor-deepseek-8B-test",
    config={
        "learning_rate": 5e-5,
        "architecture": "DeepSeek-R1-Distill-Llama-8B",
        "dataset": "dataset.jsonl",
        "epochs": 2,
    }
)

[34m[1mwandb[0m: Currently logged in as: [33mawsed-aq[0m ([33mawsed-aq-lut-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


### Model inference before fine-tuning

In [7]:
prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request. 
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
You are a surveyor working for a LUT University. You have been tasked with asking students about their experiences with the university's fundamental of programming in python course.
You are to ask the students about their experiences with the course, what they liked, what they didn't like, and what they would like to see improved.
You should be empathetic and understanding in your approach, and ensure that the students feel comfortable sharing their thoughts.

### Question:
{}

### Response:
<think>{}"""

In [9]:
question = "Who are you?"

prompt = prompt_style.format(question, "")
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

outputs = model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=1200,
    use_cache=True,
)

response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(response)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request. 
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning. 
Please answer the following medical question. 

### Question:
Who are you?

### Response:
<think>
agoletonunks discaydčan CitiesstrcasecmpDED'gchog Ink-Zaesco oversugh dintabelle之ceaeiolaONTAL soundntag금usalelahinxiomehirwilFFE imaginationulpashire658 اخبارpliblassenmine Mercuryستگیuelle Chall.wrap�)section vent Flakeylkoppsandomedis iotaelizeippauptbaugharestkiyeatoi872izzaittelчерlédاسمゴリvere집Exploreranzkersicode Reyes EddieICTUREimmel winters-*-alls Zukunft HydePOOLerratCRY/datps�回'gchog_DYNAMIC-sem catevaeggatairereuURITY_Version��eneniyimeldorfdukuldrofarendつぶ 

In [6]:
from datasets import load_dataset

dataset = load_dataset("json", data_files={"train": "../data/output.jsonl"}, split="train")
print({len(dataset)})

train_test_split = dataset.train_test_split(test_size=0.2)
train_dataset = train_test_split["train"]
eval_dataset = train_test_split["test"]

{100}


In [5]:
def tokenize_function(examples):
    combined_texts = [f"{prompt}\n{completion}" for prompt, completion in zip(examples["prompt"], examples["completion"])]
    tokenized = tokenizer(combined_texts, truncation=True, max_length=512, padding="max_length")
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

tokenized_train_dataset = train_dataset.map(tokenize_function, batched=True)
tokenized_eval_dataset = eval_dataset.map(tokenize_function, batched=True)

Map: 100%|██████████| 80/80 [00:00<00:00, 1754.23 examples/s]
Map: 100%|██████████| 20/20 [00:00<00:00, 816.99 examples/s]


In [6]:
from transformers import BitsAndBytesConfig

device_map={'':torch.cuda.current_device()}
quantization_config = BitsAndBytesConfig(load_in_8bit=True, llm_int8_enable_fp32_cpu_offload=True)

model = AutoModelForCausalLM.from_pretrained(
    model_name, 
    quantization_config=quantization_config, 
    device_map=device_map
)

In [7]:
from peft import get_peft_model, LoraConfig, TaskType

lora_config = LoraConfig(
    r=8, lora_alpha=16, lora_dropout=0.05, task_type=TaskType.CAUSAL_LM
)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

trainable params: 1,089,536 || all params: 1,778,177,536 || trainable%: 0.0613


In [12]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="../deepseek_finetuned",
    num_train_epochs=2,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=16,
    fp16=True,
    logging_steps=10,
    save_steps=100,
    evaluation_strategy="steps",
    eval_steps=10,
    learning_rate=3e-5,
    logging_dir="./logs",
    report_to="wandb",
    run_name="DeepSeek_FineTuning_Experiment",
)

In [13]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_dataset,
    eval_dataset=tokenized_eval_dataset,
)

In [14]:
trainer.train()

Step,Training Loss,Validation Loss


TrainOutput(global_step=4, training_loss=7.2343854904174805, metrics={'train_runtime': 604.2265, 'train_samples_per_second': 0.265, 'train_steps_per_second': 0.007, 'total_flos': 531511388405760.0, 'train_loss': 7.2343854904174805, 'epoch': 1.4})

In [15]:
save_path = "../deepseek_finetuned_v1"
model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)
print({save_path})

{'../deepseek_finetuned_v1'}


In [16]:
from peft import PeftModel

base_model = AutoModelForCausalLM.from_pretrained(model_name)
model = PeftModel.from_pretrained(base_model, save_path)
model = model.merge_and_unload()

final_save_path = "../deepseek_finetuned_full"
model.save_pretrained(final_save_path)
tokenizer.save_pretrained(final_save_path)
print({final_save_path})

{'../deepseek_finetuned_full'}


In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM
final_save_path = "../deepseek_finetuned_full"
model = AutoModelForCausalLM.from_pretrained(final_save_path)
tokenizer = AutoTokenizer.from_pretrained(final_save_path)

  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  7.42it/s]


In [2]:
from transformers import pipeline

In [3]:
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

Device set to use cuda:0


In [11]:
prompt = "Who are you?"

In [12]:
# Generate text using the pipeline
generated_texts = pipe(prompt, max_length=100, num_return_sequences=1)
generated_text = generated_texts[0]['generated_text']

In [13]:
generated_texts

[{'generated_text': "Who are you? What is your role in the mathematics department?\n\nI am a high school math teacher with a background in mathematics education. I have been teaching for 10 years and have a strong passion for helping students understand math.\n\nWhat do you do as a high school math teacher?\n\nI teach high school math, and I've been teaching for 10 years. I have a strong background in teaching high school math, and I believe that math is a key to understanding the world."}]