In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
from datasets import load_dataset
from peft import LoraConfig, get_peft_model
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_name = "stabilityai/stablelm-base-alpha-3b"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Fix padding token issue
tokenizer.pad_token = tokenizer.eos_token

# Load model in 8-bit (memory efficient)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    load_in_8bit=True,
)

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
Loading checkpoint shards: 100%|██████████| 2/2 [00:06<00:00,  3.05s/it]


In [3]:
from transformers import pipeline

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device_map="auto"
)

def chat1(query, max_new_tokens=100, temperature=0.0):
    result = pipe(
        query,
        max_new_tokens=max_new_tokens,
        do_sample=False,   # Greedy decoding for fast and stable replies
        temperature=temperature
    )[0]["generated_text"]
    return result.replace(query, "").strip()

Device set to use cuda:0


In [4]:
test_questions = [
    "What is overfitting?",
    "What is the role of dropout in neural networks?",
    "What is data augmentation?"
]

for q in test_questions:
    response = chat1(q)
    print(f"> Question: {q}\n> Answer: {response}\n")

The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


> Question: What is overfitting?
> Answer: I'm not sure what you mean by that. I'm not sure what you mean by that.

>I'm not sure what you mean by that.

I'm not sure what you mean by that.

>I'm not sure what you mean by that.

I'm not sure what you mean by that.

>I'm not sure what you mean by that.

I'm not sure what you mean by that.

>

> Question: What is the role of dropout in neural networks?
> Answer: The answer is that the dropout is a very important factor in neural networks. It is a very important factor in the training of neural networks. The dropout is a very important factor in the training of neural networks. The dropout is a very important factor in the training of neural networks. The dropout is a very important factor in the training of neural networks. The dropout is a very important factor in the training of neural networks. The dropout is a very important factor in

> Question: What is data augmentation?
> Answer: I'm not sure what you mean by this.
User2: I mean 

In [5]:
# Apply LoRA for efficient fine-tuning

lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["query_key_value", "dense", "dense_h_to_4h", "dense_4h_to_h"],
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, lora_config)

In [6]:

# Load your handcrafted ML Q&A dataset
dataset_dict = load_dataset('json', data_files='/home/user2/Downloads/LLMOps/ml_educational_qa_dataset.jsonl')
dataset = dataset_dict['train']  # select the split

In [7]:
# Preprocessing function
def preprocess(batch):
    inputs = [f"Question: {p}\nAnswer:" for p in batch['prompt']]
    targets = batch['response']
    full_texts = [f"{inp} {resp}" for inp, resp in zip(inputs, targets)]

    tokenized = tokenizer(
        full_texts,
        truncation=True,
        padding="max_length",
        max_length=256  # reduce from 512 for memory saving
    )
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized


dataset = dataset.map(preprocess, batched=True)
dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

In [None]:
# Fine-tuning configuration 

training_args = TrainingArguments(
    output_dir='./fine-tuned-model',  
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    learning_rate=5e-5,
    num_train_epochs=1,
    logging_steps=10,
    save_strategy='steps',
    save_steps=100,
    # save_optimizer=False,  # <- only save model weights
    save_safetensors=False,
    fp16=True,
    eval_strategy='no',
    report_to='none',
)


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    tokenizer=tokenizer,
)

  trainer = Trainer(


In [9]:

#  Start fine-tuning

trainer.train()

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 0}.


Step,Training Loss
10,2.4537
20,0.441


TrainOutput(global_step=25, training_loss=1.231599178314209, metrics={'train_runtime': 18.9492, 'train_samples_per_second': 5.277, 'train_steps_per_second': 1.319, 'total_flos': 528090975436800.0, 'train_loss': 1.231599178314209, 'epoch': 1.0})

In [10]:
# Memory-efficient inference function
def chat2(prompt, max_new_tokens=128):
    input_text = f"Question: {prompt}\nAnswer:"
    inputs = tokenizer(input_text, return_tensors='pt').to(model.device)

    output = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        do_sample=True,
        top_p=0.95,
        temperature=0.7,
        pad_token_id=tokenizer.eos_token_id,
    )

    response = tokenizer.decode(output[0], skip_special_tokens=True)
    return response.split("Answer:")[-1].strip()

In [12]:
# Example usage after fine-tuning
test_questions = [
    "What is overfitting?",
    "What is the role of dropout in neural networks?",
    "What is data augmentation?"
]

for q in test_questions:
    response = chat2(q)
    print(f"> Question: {q}\n> Answer: {response}\n")

> Question: What is overfitting?
> Answer: Overfitting is the problem of overfitting.

> Question: What is the role of dropout in neural networks?
> Answer: The role of dropout is to allow the network to focus on more important tasks/inputs, and ignore irrelevant or less important ones.

> Question: What is data augmentation?
> Answer: Data augmentation is the process of modifying the data so that it better fits a given dataset.

