In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model
import torch

In [2]:
import json
from datasets import Dataset

# Load the JSON file
with open('sales_dataset.json', 'r') as f:
    data = json.load(f)

# Convert to Hugging Face Dataset
dataset = Dataset.from_dict({
    'question': [item['question'] for item in data],
    'answer': [item['answer'] for item in data]
})

In [3]:
# Load model directly
!pip install bitsandbytes


from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16"
)

tokenizer = AutoTokenizer.from_pretrained("unsloth/DeepSeek-R1-Distill-Qwen-7B")
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained("unsloth/DeepSeek-R1-Distill-Qwen-7B", quantization_config=bnb_config, device_map="auto")



Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [5]:
# Configure LoRA
lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

# Wrap the model with LoRA
model = get_peft_model(model, lora_config)

In [6]:
def preprocess_function(examples):
    # Tokenize the inputs
    tokenized_inputs = tokenizer(
        examples['question'],
        examples['answer'],
        truncation=True,
        padding="max_length",
        max_length=512,
    )
    
    # Add labels for next-token prediction
    tokenized_inputs['labels'] = tokenized_inputs['input_ids'].copy()
    return tokenized_inputs

tokenized_dataset = dataset.map(preprocess_function, batched=True)

Map:   0%|          | 0/50000 [00:00<?, ? examples/s]

In [7]:
# Create a data collator

from transformers import DataCollatorWithPadding
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [8]:
# Set up training
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=16,
    optim="paged_adamw_32bit",
    learning_rate=2e-4,
    num_train_epochs=3,
    logging_steps=100,
    fp16=True,
    save_strategy="epoch",
)

In [9]:
# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    eval_dataset=tokenized_dataset,
    data_collator=data_collator,
)

In [10]:

import torch
torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats()


In [11]:
# Set the model to training mode
model.train()

# Train the model
trainer.train()

Step,Training Loss
100,0.9492
200,0.0321
300,0.0123
400,0.0094
500,0.0091
600,0.009
700,0.0089
800,0.0088
900,0.0088
1000,0.0088


TrainOutput(global_step=9375, training_loss=0.018038951660792033, metrics={'train_runtime': 27220.7266, 'train_samples_per_second': 5.511, 'train_steps_per_second': 0.344, 'total_flos': 3.2593039589376e+18, 'train_loss': 0.018038951660792033, 'epoch': 3.0})

In [None]:
eval_results = trainer.evaluate()
print(f"Evaluation Results: {eval_results}")

In [12]:
from transformers import pipeline

# Load the fine-tuned model
fine_tuned_model = AutoModelForCausalLM.from_pretrained('./fine-tuned-kassie')
fine_tuned_tokenizer = AutoTokenizer.from_pretrained('./fine-tuned-kassie')

# Create a text generation pipeline
qa_pipeline = pipeline('text-generation', model=fine_tuned_model, tokenizer=fine_tuned_tokenizer)

# Generate an answer for a new question
new_question = "What is the best strategy to increase sales?"
answer = qa_pipeline(new_question, max_length=512)
print(answer)

OSError: Incorrect path_or_model_id: './fine-tuned-kassie'. Please provide either the path to a local folder or the repo_id of a model on the Hub.