In [1]:
pip install datasets



In [2]:
from transformers import Trainer, TrainingArguments
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

In [3]:
# load tokenizer and model
model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"

model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [4]:
# load dataset
data = load_dataset("json", data_files="/content/programming_jokes.json")

In [5]:
# ensure the tokenizer has a padding token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

In [6]:
# tokenize the dataset
def tokenize_function(examples):
    inputs =  tokenizer(examples["text"], padding="max_length", truncation=True, max_length=50)
    inputs["labels"] = inputs["input_ids"].copy()
    return inputs

tokenized_data = data.map(tokenize_function, batched=True)

In [7]:
# prepare tbe model for LoRA tuning
# LoRA configurations
lora_config = LoraConfig(
    task_type = "CAUSAL_LM",
    inference_mode = False,
    r = 8,
    lora_alpha = 16,
    lora_dropout = 0.1
)

In [8]:
# prepare the model for lora
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, lora_config)

In [9]:
# define training arguments
training_args = TrainingArguments(
    output_dir = "./results",
    overwrite_output_dir = "True",
    num_train_epochs = 5,
    per_device_train_batch_size = 4,
    save_steps = 100,
    save_total_limit = 2,
    learning_rate = 5e-5,
    logging_dir = "./logs",
    logging_steps = 10,
    fp16 = True,
    report_to = "none" # Disable wandb reporting explicitly
)

In [10]:
# define the trainer
trainer = Trainer(
    model = model,
    args = training_args,
    train_dataset = tokenized_data["train"],
    tokenizer = tokenizer
)

  trainer = Trainer(


In [11]:
# train the model
trainer.train()

Step,Training Loss
10,12.999
20,10.6567
30,5.7636
40,2.1105
50,1.6225
60,1.5263
70,1.4521
80,1.4193
90,1.39
100,1.3692


TrainOutput(global_step=1250, training_loss=0.5932690200805664, metrics={'train_runtime': 236.5671, 'train_samples_per_second': 21.136, 'train_steps_per_second': 5.284, 'total_flos': 2317205760000000.0, 'train_loss': 0.5932690200805664, 'epoch': 5.0})

In [12]:
model.save_pretrained('./fine_tuned_model')
tokenizer.save_pretrained('./fine_tuned_model')

('./fine_tuned_model/tokenizer_config.json',
 './fine_tuned_model/special_tokens_map.json',
 './fine_tuned_model/tokenizer.json')

In [13]:
!zip -r /content/fine_tuned_model.zip /content/fine_tuned_model

  adding: content/fine_tuned_model/ (stored 0%)
  adding: content/fine_tuned_model/special_tokens_map.json (deflated 73%)
  adding: content/fine_tuned_model/adapter_model.safetensors (deflated 8%)
  adding: content/fine_tuned_model/tokenizer_config.json (deflated 85%)
  adding: content/fine_tuned_model/tokenizer.json (deflated 81%)
  adding: content/fine_tuned_model/adapter_config.json (deflated 53%)
  adding: content/fine_tuned_model/README.md (deflated 66%)


In [14]:
import zipfile
import os

# Path to the .zip file
zip_path = "/content/fine_tuned_model.zip"
extracted_path = "/content/fine_tuned_model"

# Extract the .zip file
try:
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extracted_path)
    print(f"Model extracted to {extracted_path}")
except Exception as e:
    print(f"Error extracting the zip file: {e}")

# Load the model and tokenizer
from transformers import AutoTokenizer, AutoModelForCausalLM

try:
    tokenizer = AutoTokenizer.from_pretrained(extracted_path)
    model = AutoModelForCausalLM.from_pretrained(extracted_path)
    print("Model loaded successfully")
except Exception as e:
    print(f"An error occurred while loading the model: {e}")

Model extracted to /content/fine_tuned_model
Model loaded successfully


In [25]:
# input prompt
try:
  prompt = input('Enter your prompt: ')
  print('Prompt entered successfully')
except Exception as e:
    print(f"An error occurred: {e}")


Enter your prompt: Tell me a joke:
Prompt entered successfully


In [26]:
# tokenize the input
try:
  input_ids = tokenizer.encode(prompt, return_tensors='pt')
  print('Input tokenized successfully')
except Exception as e:
    print(f"An error occurred: {e}")

Input tokenized successfully


In [27]:
outputs = model.generate(
    input_ids,
    max_length=50,
    num_return_sequences=1,
    temperature=0.7,
    top_p=0.9,
    top_k=50,
    do_sample=True
)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


In [28]:
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Tell me a joke: Why did the programmer get lost? Because it was confused with numbers. 🙄

Okay, so I need to tell a joke about why a programmer got lost because it confused with numbers. Hmm, let me think
