In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load the Mistral tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [2]:
from peft import LoraConfig, get_peft_model

# Define LoRA configuration
lora_config = LoraConfig(
    r=8,  # Rank of the low-rank matrices
    lora_alpha=32,  # Scaling factor
    target_modules=["q_proj", "v_proj"],  # Target layers
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM",
)

# Apply LoRA to the model
model = get_peft_model(model, lora_config)


In [3]:
tokenizer.pad_token = tokenizer.eos_token


In [4]:
from datasets import load_dataset

# Load the dataset
dataset = load_dataset("json", data_files="dataset.jsonl")

In [2]:
def preprocess_function(examples):
    # Format the input as a conversation
    inputs = [f"<user>: {q}\n<bot>:" for q in examples["input"]]
    targets = [r for r in examples["output"]]

    # Tokenize inputs and targets
    model_inputs = tokenizer(inputs, max_length=128, truncation=True, padding="max_length")
    labels = tokenizer(targets, max_length=128, truncation=True, padding="max_length")

    # Add labels to the model inputs
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

tokenized_dataset = dataset.map(preprocess_function, batched=True)


NameError: name 'dataset' is not defined

In [8]:
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=2,  # Reduce batch size
    num_train_epochs=3,
    save_steps=10_000,
    save_total_limit=2,
    logging_dir="./logs",
    logging_steps=100,
    evaluation_strategy="steps",
    eval_steps=500,
    warmup_steps=500,
    weight_decay=0.01,
    fp16=True,  # Enable mixed precision
)



In [9]:
from datasets import load_metric

# Load a metric (e.g., BLEU)
metric = load_metric("bleu")

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
    return metric.compute(predictions=predictions, references=labels)

# Pass the compute_metrics function to the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    compute_metrics=compute_metrics,
)

ValueError: You have set `args.eval_strategy` to IntervalStrategy.STEPS but you didn't pass an `eval_dataset` to `Trainer`. Either set `args.eval_strategy` to `no` or pass an `eval_dataset`. 

In [None]:
import torch

# Check GPU memory
print(f"GPU Memory Allocated: {torch.cuda.memory_allocated() / 1e9} GB")
print(f"GPU Memory Reserved: {torch.cuda.memory_reserved() / 1e9} GB")

# Fine-tune the model
trainer.train()

# Save the fine-tuned model
model.save_pretrained("./fine-tuned-mistral")
tokenizer.save_pretrained("./fine-tuned-mistral")