In [1]:
import json
import datasets
from datasets import Dataset
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
from peft import LoraConfig, get_peft_model

# Load dataset from JSON file
def load_dataset(filename='weather_dataset.json'):
    with open(filename, 'r') as f:
        data = json.load(f)
    return Dataset.from_dict(data)

# Load the dataset
data = load_dataset()

ModuleNotFoundError: No module named 'peft'

In [2]:


# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained('unsloth/Phi-3-mini-4k-instruct')
model = AutoModelForCausalLM.from_pretrained('unsloth/Phi-3-mini-4k-instruct', trust_remote_code=True)

# Add adapter configuration
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=16,
    bias="none",
    task_type="CAUSAL_LM"
)

# Get the PEFT model
model = get_peft_model(model, peft_config)

# Tokenize the dataset
def tokenize_function(examples):
    inputs = [ex['user_input'] for ex in examples]
    responses = [ex['assistant_response'] for ex in examples]
    model_inputs = tokenizer(inputs, padding="max_length", truncation=True)
    model_inputs["labels"] = tokenizer(responses, padding="max_length", truncation=True)["input_ids"]
    return model_inputs

tokenized_data = data.map(tokenize_function, batched=True)

# Prepare data collator
data_collator = transformers.DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,
)

# Training arguments
training_args = TrainingArguments(
    output_dir="./results",
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=4,
    save_steps=10_000,
    save_total_limit=2,
    prediction_loss_only=True,
    fp16=False,  # Set to False for CPU training
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=tokenized_data,
    eval_dataset=tokenized_data,  # Use the same dataset for evaluation for simplicity
)

# Train the model
trainer.train()

# Save the model and tokenizer
model.save_pretrained('path_to_save_your_model')
tokenizer.save_pretrained('path_to_save_your_model')


OSError: unsloth/Phi-3-mini-4k-instruct does not appear to have a file named config.json. Checkout 'https://huggingface.co/unsloth/Phi-3-mini-4k-instruct/tree/None' for available files.