In [2]:
#mistralai/Mistral-7B-Instruct-v0.2

In [None]:
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer, BitsAndBytesConfig, TrainingArguments, Trainer
from datasets import load_dataset, DatasetDict
from peft import LoraConfig, get_peft_model, TaskType

In [5]:
from huggingface_hub import login
login(token="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")

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to C:\Users\thegh\.cache\huggingface\token
Login successful


In [23]:
torch.cuda.empty_cache()  # Clear CUDA cache

# Load the Mistral model and tokenizer

In [None]:
model_name = "mistralai/Mistral-7B-Instruct-v0.2"
bnb_config = BitsAndBytesConfig(llm_int8_enable_fp32_cpu_offload=True, load_in_4bit=True)
model = AutoModelForSequenceClassification.from_pretrained(model_name, quantization_config=bnb_config, num_labels=3)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Explicitly set and verify the pad_token

In [None]:
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})
    model.resize_token_embeddings(len(tokenizer))

# Check if the padding token is correctly set

In [None]:
print(f"Pad token: {tokenizer.pad_token}, ID: {tokenizer.pad_token_id}")

# Apply PEFT to add trainable LoRA adapters

In [None]:
peft_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    inference_mode=False,
    r=8,
    lora_alpha=32,
    lora_dropout=0.1
)
model = get_peft_model(model, peft_config)

# Load dataset and preprocess

In [None]:
dataset = load_dataset("tweet_eval", "sentiment")
max_length = 128

def preprocess_data(examples):
    return tokenizer(examples['text'], padding='max_length', truncation=True, max_length=max_length)

encoded_dataset = dataset.map(preprocess_data, batched=True)

# Split dataset

In [None]:
train_testvalid = encoded_dataset["train"].train_test_split(test_size=0.2)
test_valid = train_testvalid["test"].train_test_split(test_size=0.5)
dataset = DatasetDict({
    'train': train_testvalid['train'],
    'validation': test_valid['test'],
    'test': test_valid['train']
})

# Define training arguments without mixed precision

In [None]:
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=1,  # Set batch size to 1
    gradient_accumulation_steps=16,  # Simulate larger batch size
    num_train_epochs=3,
    weight_decay=0.01,
    fp16=False,  # Disable FP16
)

# Define the trainer

In [None]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = torch.argmax(logits, dim=-1)
    acc = accuracy_score(labels, predictions)
    f1 = f1_score(labels, predictions, average='weighted')
    return {"accuracy": acc, "f1": f1}

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["validation"],
    compute_metrics=compute_metrics,
)

# Train the model

In [1]:
trainer.train()

  from .autonotebook import tqdm as notebook_tqdm
`low_cpu_mem_usage` was None, now set to True since model is quantized.
Loading checkpoint shards: 100%|██████████| 3/3 [00:23<00:00,  7.69s/it]
Some weights of MistralForSequenceClassification were not initialized from the model checkpoint at mistralai/Mistral-7B-Instruct-v0.2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Pad token: [PAD], ID: 32000


  attn_output = torch.nn.functional.scaled_dot_product_attention(


Epoch,Training Loss,Validation Loss


KeyboardInterrupt: 

# Save the trained model

In [None]:
model.save_pretrained("./quantized_mistral_with_lora")
tokenizer.save_pretrained("./quantized_mistral_with_lora")

# Evaluate the model on the test set

In [None]:
results = trainer.evaluate(eval_dataset=dataset["test"])
print(results)